gcc/
[official-gcc.git] / gcc / config / i386 / i386.c
blob001de0534c4fc47fb21a5faf7d6e5619d122dbda
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "alias.h"
26 #include "symtab.h"
27 #include "tree.h"
28 #include "fold-const.h"
29 #include "stringpool.h"
30 #include "attribs.h"
31 #include "calls.h"
32 #include "stor-layout.h"
33 #include "varasm.h"
34 #include "tm_p.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-codes.h"
41 #include "insn-attr.h"
42 #include "flags.h"
43 #include "except.h"
44 #include "function.h"
45 #include "recog.h"
46 #include "expmed.h"
47 #include "dojump.h"
48 #include "explow.h"
49 #include "emit-rtl.h"
50 #include "stmt.h"
51 #include "expr.h"
52 #include "optabs.h"
53 #include "diagnostic-core.h"
54 #include "toplev.h"
55 #include "predict.h"
56 #include "dominance.h"
57 #include "cfg.h"
58 #include "cfgrtl.h"
59 #include "cfganal.h"
60 #include "lcm.h"
61 #include "cfgbuild.h"
62 #include "cfgcleanup.h"
63 #include "basic-block.h"
64 #include "target.h"
65 #include "common/common-target.h"
66 #include "langhooks.h"
67 #include "reload.h"
68 #include "plugin-api.h"
69 #include "ipa-ref.h"
70 #include "cgraph.h"
71 #include "tree-ssa-alias.h"
72 #include "internal-fn.h"
73 #include "gimple-fold.h"
74 #include "tree-eh.h"
75 #include "gimple-expr.h"
76 #include "gimple.h"
77 #include "gimplify.h"
78 #include "cfgloop.h"
79 #include "dwarf2.h"
80 #include "df.h"
81 #include "tm-constrs.h"
82 #include "params.h"
83 #include "cselib.h"
84 #include "debug.h"
85 #include "sched-int.h"
86 #include "sbitmap.h"
87 #include "fibheap.h"
88 #include "opts.h"
89 #include "diagnostic.h"
90 #include "dumpfile.h"
91 #include "tree-pass.h"
92 #include "context.h"
93 #include "pass_manager.h"
94 #include "target-globals.h"
95 #include "tree-vectorizer.h"
96 #include "shrink-wrap.h"
97 #include "builtins.h"
98 #include "rtl-iter.h"
99 #include "tree-iterator.h"
100 #include "tree-chkp.h"
101 #include "rtl-chkp.h"
103 #include "target-def.h"
105 static rtx legitimize_dllimport_symbol (rtx, bool);
106 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
107 static rtx legitimize_pe_coff_symbol (rtx, bool);
109 #ifndef CHECK_STACK_LIMIT
110 #define CHECK_STACK_LIMIT (-1)
111 #endif
113 /* Return index of given mode in mult and division cost tables. */
114 #define MODE_INDEX(mode) \
115 ((mode) == QImode ? 0 \
116 : (mode) == HImode ? 1 \
117 : (mode) == SImode ? 2 \
118 : (mode) == DImode ? 3 \
119 : 4)
121 /* Processor costs (relative to an add) */
122 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
123 #define COSTS_N_BYTES(N) ((N) * 2)
125 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
127 static stringop_algs ix86_size_memcpy[2] = {
128 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
129 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
130 static stringop_algs ix86_size_memset[2] = {
131 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
134 const
135 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
136 COSTS_N_BYTES (2), /* cost of an add instruction */
137 COSTS_N_BYTES (3), /* cost of a lea instruction */
138 COSTS_N_BYTES (2), /* variable shift costs */
139 COSTS_N_BYTES (3), /* constant shift costs */
140 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
141 COSTS_N_BYTES (3), /* HI */
142 COSTS_N_BYTES (3), /* SI */
143 COSTS_N_BYTES (3), /* DI */
144 COSTS_N_BYTES (5)}, /* other */
145 0, /* cost of multiply per each bit set */
146 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
147 COSTS_N_BYTES (3), /* HI */
148 COSTS_N_BYTES (3), /* SI */
149 COSTS_N_BYTES (3), /* DI */
150 COSTS_N_BYTES (5)}, /* other */
151 COSTS_N_BYTES (3), /* cost of movsx */
152 COSTS_N_BYTES (3), /* cost of movzx */
153 0, /* "large" insn */
154 2, /* MOVE_RATIO */
155 2, /* cost for loading QImode using movzbl */
156 {2, 2, 2}, /* cost of loading integer registers
157 in QImode, HImode and SImode.
158 Relative to reg-reg move (2). */
159 {2, 2, 2}, /* cost of storing integer registers */
160 2, /* cost of reg,reg fld/fst */
161 {2, 2, 2}, /* cost of loading fp registers
162 in SFmode, DFmode and XFmode */
163 {2, 2, 2}, /* cost of storing fp registers
164 in SFmode, DFmode and XFmode */
165 3, /* cost of moving MMX register */
166 {3, 3}, /* cost of loading MMX registers
167 in SImode and DImode */
168 {3, 3}, /* cost of storing MMX registers
169 in SImode and DImode */
170 3, /* cost of moving SSE register */
171 {3, 3, 3}, /* cost of loading SSE registers
172 in SImode, DImode and TImode */
173 {3, 3, 3}, /* cost of storing SSE registers
174 in SImode, DImode and TImode */
175 3, /* MMX or SSE register to integer */
176 0, /* size of l1 cache */
177 0, /* size of l2 cache */
178 0, /* size of prefetch block */
179 0, /* number of parallel prefetches */
180 2, /* Branch cost */
181 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
182 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
183 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
184 COSTS_N_BYTES (2), /* cost of FABS instruction. */
185 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
186 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
187 ix86_size_memcpy,
188 ix86_size_memset,
189 1, /* scalar_stmt_cost. */
190 1, /* scalar load_cost. */
191 1, /* scalar_store_cost. */
192 1, /* vec_stmt_cost. */
193 1, /* vec_to_scalar_cost. */
194 1, /* scalar_to_vec_cost. */
195 1, /* vec_align_load_cost. */
196 1, /* vec_unalign_load_cost. */
197 1, /* vec_store_cost. */
198 1, /* cond_taken_branch_cost. */
199 1, /* cond_not_taken_branch_cost. */
202 /* Processor costs (relative to an add) */
203 static stringop_algs i386_memcpy[2] = {
204 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
205 DUMMY_STRINGOP_ALGS};
206 static stringop_algs i386_memset[2] = {
207 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
208 DUMMY_STRINGOP_ALGS};
210 static const
211 struct processor_costs i386_cost = { /* 386 specific costs */
212 COSTS_N_INSNS (1), /* cost of an add instruction */
213 COSTS_N_INSNS (1), /* cost of a lea instruction */
214 COSTS_N_INSNS (3), /* variable shift costs */
215 COSTS_N_INSNS (2), /* constant shift costs */
216 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
217 COSTS_N_INSNS (6), /* HI */
218 COSTS_N_INSNS (6), /* SI */
219 COSTS_N_INSNS (6), /* DI */
220 COSTS_N_INSNS (6)}, /* other */
221 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
222 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
223 COSTS_N_INSNS (23), /* HI */
224 COSTS_N_INSNS (23), /* SI */
225 COSTS_N_INSNS (23), /* DI */
226 COSTS_N_INSNS (23)}, /* other */
227 COSTS_N_INSNS (3), /* cost of movsx */
228 COSTS_N_INSNS (2), /* cost of movzx */
229 15, /* "large" insn */
230 3, /* MOVE_RATIO */
231 4, /* cost for loading QImode using movzbl */
232 {2, 4, 2}, /* cost of loading integer registers
233 in QImode, HImode and SImode.
234 Relative to reg-reg move (2). */
235 {2, 4, 2}, /* cost of storing integer registers */
236 2, /* cost of reg,reg fld/fst */
237 {8, 8, 8}, /* cost of loading fp registers
238 in SFmode, DFmode and XFmode */
239 {8, 8, 8}, /* cost of storing fp registers
240 in SFmode, DFmode and XFmode */
241 2, /* cost of moving MMX register */
242 {4, 8}, /* cost of loading MMX registers
243 in SImode and DImode */
244 {4, 8}, /* cost of storing MMX registers
245 in SImode and DImode */
246 2, /* cost of moving SSE register */
247 {4, 8, 16}, /* cost of loading SSE registers
248 in SImode, DImode and TImode */
249 {4, 8, 16}, /* cost of storing SSE registers
250 in SImode, DImode and TImode */
251 3, /* MMX or SSE register to integer */
252 0, /* size of l1 cache */
253 0, /* size of l2 cache */
254 0, /* size of prefetch block */
255 0, /* number of parallel prefetches */
256 1, /* Branch cost */
257 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
258 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
259 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
260 COSTS_N_INSNS (22), /* cost of FABS instruction. */
261 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
262 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
263 i386_memcpy,
264 i386_memset,
265 1, /* scalar_stmt_cost. */
266 1, /* scalar load_cost. */
267 1, /* scalar_store_cost. */
268 1, /* vec_stmt_cost. */
269 1, /* vec_to_scalar_cost. */
270 1, /* scalar_to_vec_cost. */
271 1, /* vec_align_load_cost. */
272 2, /* vec_unalign_load_cost. */
273 1, /* vec_store_cost. */
274 3, /* cond_taken_branch_cost. */
275 1, /* cond_not_taken_branch_cost. */
278 static stringop_algs i486_memcpy[2] = {
279 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
280 DUMMY_STRINGOP_ALGS};
281 static stringop_algs i486_memset[2] = {
282 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
283 DUMMY_STRINGOP_ALGS};
285 static const
286 struct processor_costs i486_cost = { /* 486 specific costs */
287 COSTS_N_INSNS (1), /* cost of an add instruction */
288 COSTS_N_INSNS (1), /* cost of a lea instruction */
289 COSTS_N_INSNS (3), /* variable shift costs */
290 COSTS_N_INSNS (2), /* constant shift costs */
291 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
292 COSTS_N_INSNS (12), /* HI */
293 COSTS_N_INSNS (12), /* SI */
294 COSTS_N_INSNS (12), /* DI */
295 COSTS_N_INSNS (12)}, /* other */
296 1, /* cost of multiply per each bit set */
297 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
298 COSTS_N_INSNS (40), /* HI */
299 COSTS_N_INSNS (40), /* SI */
300 COSTS_N_INSNS (40), /* DI */
301 COSTS_N_INSNS (40)}, /* other */
302 COSTS_N_INSNS (3), /* cost of movsx */
303 COSTS_N_INSNS (2), /* cost of movzx */
304 15, /* "large" insn */
305 3, /* MOVE_RATIO */
306 4, /* cost for loading QImode using movzbl */
307 {2, 4, 2}, /* cost of loading integer registers
308 in QImode, HImode and SImode.
309 Relative to reg-reg move (2). */
310 {2, 4, 2}, /* cost of storing integer registers */
311 2, /* cost of reg,reg fld/fst */
312 {8, 8, 8}, /* cost of loading fp registers
313 in SFmode, DFmode and XFmode */
314 {8, 8, 8}, /* cost of storing fp registers
315 in SFmode, DFmode and XFmode */
316 2, /* cost of moving MMX register */
317 {4, 8}, /* cost of loading MMX registers
318 in SImode and DImode */
319 {4, 8}, /* cost of storing MMX registers
320 in SImode and DImode */
321 2, /* cost of moving SSE register */
322 {4, 8, 16}, /* cost of loading SSE registers
323 in SImode, DImode and TImode */
324 {4, 8, 16}, /* cost of storing SSE registers
325 in SImode, DImode and TImode */
326 3, /* MMX or SSE register to integer */
327 4, /* size of l1 cache. 486 has 8kB cache
328 shared for code and data, so 4kB is
329 not really precise. */
330 4, /* size of l2 cache */
331 0, /* size of prefetch block */
332 0, /* number of parallel prefetches */
333 1, /* Branch cost */
334 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
335 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
336 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
337 COSTS_N_INSNS (3), /* cost of FABS instruction. */
338 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
339 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
340 i486_memcpy,
341 i486_memset,
342 1, /* scalar_stmt_cost. */
343 1, /* scalar load_cost. */
344 1, /* scalar_store_cost. */
345 1, /* vec_stmt_cost. */
346 1, /* vec_to_scalar_cost. */
347 1, /* scalar_to_vec_cost. */
348 1, /* vec_align_load_cost. */
349 2, /* vec_unalign_load_cost. */
350 1, /* vec_store_cost. */
351 3, /* cond_taken_branch_cost. */
352 1, /* cond_not_taken_branch_cost. */
355 static stringop_algs pentium_memcpy[2] = {
356 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
357 DUMMY_STRINGOP_ALGS};
358 static stringop_algs pentium_memset[2] = {
359 {libcall, {{-1, rep_prefix_4_byte, false}}},
360 DUMMY_STRINGOP_ALGS};
362 static const
363 struct processor_costs pentium_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (4), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (11), /* HI */
370 COSTS_N_INSNS (11), /* SI */
371 COSTS_N_INSNS (11), /* DI */
372 COSTS_N_INSNS (11)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (25), /* HI */
376 COSTS_N_INSNS (25), /* SI */
377 COSTS_N_INSNS (25), /* DI */
378 COSTS_N_INSNS (25)}, /* other */
379 COSTS_N_INSNS (3), /* cost of movsx */
380 COSTS_N_INSNS (2), /* cost of movzx */
381 8, /* "large" insn */
382 6, /* MOVE_RATIO */
383 6, /* cost for loading QImode using movzbl */
384 {2, 4, 2}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 4, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 8, /* cost of moving MMX register */
394 {8, 8}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {8, 8}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {4, 8, 16}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {4, 8, 16}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 8, /* size of l2 cache */
406 0, /* size of prefetch block */
407 0, /* number of parallel prefetches */
408 2, /* Branch cost */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (1), /* cost of FABS instruction. */
413 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
415 pentium_memcpy,
416 pentium_memset,
417 1, /* scalar_stmt_cost. */
418 1, /* scalar load_cost. */
419 1, /* scalar_store_cost. */
420 1, /* vec_stmt_cost. */
421 1, /* vec_to_scalar_cost. */
422 1, /* scalar_to_vec_cost. */
423 1, /* vec_align_load_cost. */
424 2, /* vec_unalign_load_cost. */
425 1, /* vec_store_cost. */
426 3, /* cond_taken_branch_cost. */
427 1, /* cond_not_taken_branch_cost. */
430 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
431 (we ensure the alignment). For small blocks inline loop is still a
432 noticeable win, for bigger blocks either rep movsl or rep movsb is
433 way to go. Rep movsb has apparently more expensive startup time in CPU,
434 but after 4K the difference is down in the noise. */
435 static stringop_algs pentiumpro_memcpy[2] = {
436 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
437 {8192, rep_prefix_4_byte, false},
438 {-1, rep_prefix_1_byte, false}}},
439 DUMMY_STRINGOP_ALGS};
440 static stringop_algs pentiumpro_memset[2] = {
441 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
442 {8192, rep_prefix_4_byte, false},
443 {-1, libcall, false}}},
444 DUMMY_STRINGOP_ALGS};
445 static const
446 struct processor_costs pentiumpro_cost = {
447 COSTS_N_INSNS (1), /* cost of an add instruction */
448 COSTS_N_INSNS (1), /* cost of a lea instruction */
449 COSTS_N_INSNS (1), /* variable shift costs */
450 COSTS_N_INSNS (1), /* constant shift costs */
451 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
452 COSTS_N_INSNS (4), /* HI */
453 COSTS_N_INSNS (4), /* SI */
454 COSTS_N_INSNS (4), /* DI */
455 COSTS_N_INSNS (4)}, /* other */
456 0, /* cost of multiply per each bit set */
457 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
458 COSTS_N_INSNS (17), /* HI */
459 COSTS_N_INSNS (17), /* SI */
460 COSTS_N_INSNS (17), /* DI */
461 COSTS_N_INSNS (17)}, /* other */
462 COSTS_N_INSNS (1), /* cost of movsx */
463 COSTS_N_INSNS (1), /* cost of movzx */
464 8, /* "large" insn */
465 6, /* MOVE_RATIO */
466 2, /* cost for loading QImode using movzbl */
467 {4, 4, 4}, /* cost of loading integer registers
468 in QImode, HImode and SImode.
469 Relative to reg-reg move (2). */
470 {2, 2, 2}, /* cost of storing integer registers */
471 2, /* cost of reg,reg fld/fst */
472 {2, 2, 6}, /* cost of loading fp registers
473 in SFmode, DFmode and XFmode */
474 {4, 4, 6}, /* cost of storing fp registers
475 in SFmode, DFmode and XFmode */
476 2, /* cost of moving MMX register */
477 {2, 2}, /* cost of loading MMX registers
478 in SImode and DImode */
479 {2, 2}, /* cost of storing MMX registers
480 in SImode and DImode */
481 2, /* cost of moving SSE register */
482 {2, 2, 8}, /* cost of loading SSE registers
483 in SImode, DImode and TImode */
484 {2, 2, 8}, /* cost of storing SSE registers
485 in SImode, DImode and TImode */
486 3, /* MMX or SSE register to integer */
487 8, /* size of l1 cache. */
488 256, /* size of l2 cache */
489 32, /* size of prefetch block */
490 6, /* number of parallel prefetches */
491 2, /* Branch cost */
492 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
493 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
494 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
495 COSTS_N_INSNS (2), /* cost of FABS instruction. */
496 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
497 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
498 pentiumpro_memcpy,
499 pentiumpro_memset,
500 1, /* scalar_stmt_cost. */
501 1, /* scalar load_cost. */
502 1, /* scalar_store_cost. */
503 1, /* vec_stmt_cost. */
504 1, /* vec_to_scalar_cost. */
505 1, /* scalar_to_vec_cost. */
506 1, /* vec_align_load_cost. */
507 2, /* vec_unalign_load_cost. */
508 1, /* vec_store_cost. */
509 3, /* cond_taken_branch_cost. */
510 1, /* cond_not_taken_branch_cost. */
513 static stringop_algs geode_memcpy[2] = {
514 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
515 DUMMY_STRINGOP_ALGS};
516 static stringop_algs geode_memset[2] = {
517 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
518 DUMMY_STRINGOP_ALGS};
519 static const
520 struct processor_costs geode_cost = {
521 COSTS_N_INSNS (1), /* cost of an add instruction */
522 COSTS_N_INSNS (1), /* cost of a lea instruction */
523 COSTS_N_INSNS (2), /* variable shift costs */
524 COSTS_N_INSNS (1), /* constant shift costs */
525 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
526 COSTS_N_INSNS (4), /* HI */
527 COSTS_N_INSNS (7), /* SI */
528 COSTS_N_INSNS (7), /* DI */
529 COSTS_N_INSNS (7)}, /* other */
530 0, /* cost of multiply per each bit set */
531 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
532 COSTS_N_INSNS (23), /* HI */
533 COSTS_N_INSNS (39), /* SI */
534 COSTS_N_INSNS (39), /* DI */
535 COSTS_N_INSNS (39)}, /* other */
536 COSTS_N_INSNS (1), /* cost of movsx */
537 COSTS_N_INSNS (1), /* cost of movzx */
538 8, /* "large" insn */
539 4, /* MOVE_RATIO */
540 1, /* cost for loading QImode using movzbl */
541 {1, 1, 1}, /* cost of loading integer registers
542 in QImode, HImode and SImode.
543 Relative to reg-reg move (2). */
544 {1, 1, 1}, /* cost of storing integer registers */
545 1, /* cost of reg,reg fld/fst */
546 {1, 1, 1}, /* cost of loading fp registers
547 in SFmode, DFmode and XFmode */
548 {4, 6, 6}, /* cost of storing fp registers
549 in SFmode, DFmode and XFmode */
551 1, /* cost of moving MMX register */
552 {1, 1}, /* cost of loading MMX registers
553 in SImode and DImode */
554 {1, 1}, /* cost of storing MMX registers
555 in SImode and DImode */
556 1, /* cost of moving SSE register */
557 {1, 1, 1}, /* cost of loading SSE registers
558 in SImode, DImode and TImode */
559 {1, 1, 1}, /* cost of storing SSE registers
560 in SImode, DImode and TImode */
561 1, /* MMX or SSE register to integer */
562 64, /* size of l1 cache. */
563 128, /* size of l2 cache. */
564 32, /* size of prefetch block */
565 1, /* number of parallel prefetches */
566 1, /* Branch cost */
567 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
568 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
569 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
570 COSTS_N_INSNS (1), /* cost of FABS instruction. */
571 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
572 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
573 geode_memcpy,
574 geode_memset,
575 1, /* scalar_stmt_cost. */
576 1, /* scalar load_cost. */
577 1, /* scalar_store_cost. */
578 1, /* vec_stmt_cost. */
579 1, /* vec_to_scalar_cost. */
580 1, /* scalar_to_vec_cost. */
581 1, /* vec_align_load_cost. */
582 2, /* vec_unalign_load_cost. */
583 1, /* vec_store_cost. */
584 3, /* cond_taken_branch_cost. */
585 1, /* cond_not_taken_branch_cost. */
588 static stringop_algs k6_memcpy[2] = {
589 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
590 DUMMY_STRINGOP_ALGS};
591 static stringop_algs k6_memset[2] = {
592 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
593 DUMMY_STRINGOP_ALGS};
594 static const
595 struct processor_costs k6_cost = {
596 COSTS_N_INSNS (1), /* cost of an add instruction */
597 COSTS_N_INSNS (2), /* cost of a lea instruction */
598 COSTS_N_INSNS (1), /* variable shift costs */
599 COSTS_N_INSNS (1), /* constant shift costs */
600 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
601 COSTS_N_INSNS (3), /* HI */
602 COSTS_N_INSNS (3), /* SI */
603 COSTS_N_INSNS (3), /* DI */
604 COSTS_N_INSNS (3)}, /* other */
605 0, /* cost of multiply per each bit set */
606 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
607 COSTS_N_INSNS (18), /* HI */
608 COSTS_N_INSNS (18), /* SI */
609 COSTS_N_INSNS (18), /* DI */
610 COSTS_N_INSNS (18)}, /* other */
611 COSTS_N_INSNS (2), /* cost of movsx */
612 COSTS_N_INSNS (2), /* cost of movzx */
613 8, /* "large" insn */
614 4, /* MOVE_RATIO */
615 3, /* cost for loading QImode using movzbl */
616 {4, 5, 4}, /* cost of loading integer registers
617 in QImode, HImode and SImode.
618 Relative to reg-reg move (2). */
619 {2, 3, 2}, /* cost of storing integer registers */
620 4, /* cost of reg,reg fld/fst */
621 {6, 6, 6}, /* cost of loading fp registers
622 in SFmode, DFmode and XFmode */
623 {4, 4, 4}, /* cost of storing fp registers
624 in SFmode, DFmode and XFmode */
625 2, /* cost of moving MMX register */
626 {2, 2}, /* cost of loading MMX registers
627 in SImode and DImode */
628 {2, 2}, /* cost of storing MMX registers
629 in SImode and DImode */
630 2, /* cost of moving SSE register */
631 {2, 2, 8}, /* cost of loading SSE registers
632 in SImode, DImode and TImode */
633 {2, 2, 8}, /* cost of storing SSE registers
634 in SImode, DImode and TImode */
635 6, /* MMX or SSE register to integer */
636 32, /* size of l1 cache. */
637 32, /* size of l2 cache. Some models
638 have integrated l2 cache, but
639 optimizing for k6 is not important
640 enough to worry about that. */
641 32, /* size of prefetch block */
642 1, /* number of parallel prefetches */
643 1, /* Branch cost */
644 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
645 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
646 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
647 COSTS_N_INSNS (2), /* cost of FABS instruction. */
648 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
649 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
650 k6_memcpy,
651 k6_memset,
652 1, /* scalar_stmt_cost. */
653 1, /* scalar load_cost. */
654 1, /* scalar_store_cost. */
655 1, /* vec_stmt_cost. */
656 1, /* vec_to_scalar_cost. */
657 1, /* scalar_to_vec_cost. */
658 1, /* vec_align_load_cost. */
659 2, /* vec_unalign_load_cost. */
660 1, /* vec_store_cost. */
661 3, /* cond_taken_branch_cost. */
662 1, /* cond_not_taken_branch_cost. */
665 /* For some reason, Athlon deals better with REP prefix (relative to loops)
666 compared to K8. Alignment becomes important after 8 bytes for memcpy and
667 128 bytes for memset. */
668 static stringop_algs athlon_memcpy[2] = {
669 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
670 DUMMY_STRINGOP_ALGS};
671 static stringop_algs athlon_memset[2] = {
672 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
673 DUMMY_STRINGOP_ALGS};
674 static const
675 struct processor_costs athlon_cost = {
676 COSTS_N_INSNS (1), /* cost of an add instruction */
677 COSTS_N_INSNS (2), /* cost of a lea instruction */
678 COSTS_N_INSNS (1), /* variable shift costs */
679 COSTS_N_INSNS (1), /* constant shift costs */
680 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
681 COSTS_N_INSNS (5), /* HI */
682 COSTS_N_INSNS (5), /* SI */
683 COSTS_N_INSNS (5), /* DI */
684 COSTS_N_INSNS (5)}, /* other */
685 0, /* cost of multiply per each bit set */
686 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
687 COSTS_N_INSNS (26), /* HI */
688 COSTS_N_INSNS (42), /* SI */
689 COSTS_N_INSNS (74), /* DI */
690 COSTS_N_INSNS (74)}, /* other */
691 COSTS_N_INSNS (1), /* cost of movsx */
692 COSTS_N_INSNS (1), /* cost of movzx */
693 8, /* "large" insn */
694 9, /* MOVE_RATIO */
695 4, /* cost for loading QImode using movzbl */
696 {3, 4, 3}, /* cost of loading integer registers
697 in QImode, HImode and SImode.
698 Relative to reg-reg move (2). */
699 {3, 4, 3}, /* cost of storing integer registers */
700 4, /* cost of reg,reg fld/fst */
701 {4, 4, 12}, /* cost of loading fp registers
702 in SFmode, DFmode and XFmode */
703 {6, 6, 8}, /* cost of storing fp registers
704 in SFmode, DFmode and XFmode */
705 2, /* cost of moving MMX register */
706 {4, 4}, /* cost of loading MMX registers
707 in SImode and DImode */
708 {4, 4}, /* cost of storing MMX registers
709 in SImode and DImode */
710 2, /* cost of moving SSE register */
711 {4, 4, 6}, /* cost of loading SSE registers
712 in SImode, DImode and TImode */
713 {4, 4, 5}, /* cost of storing SSE registers
714 in SImode, DImode and TImode */
715 5, /* MMX or SSE register to integer */
716 64, /* size of l1 cache. */
717 256, /* size of l2 cache. */
718 64, /* size of prefetch block */
719 6, /* number of parallel prefetches */
720 5, /* Branch cost */
721 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
722 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
723 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
724 COSTS_N_INSNS (2), /* cost of FABS instruction. */
725 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
726 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
727 athlon_memcpy,
728 athlon_memset,
729 1, /* scalar_stmt_cost. */
730 1, /* scalar load_cost. */
731 1, /* scalar_store_cost. */
732 1, /* vec_stmt_cost. */
733 1, /* vec_to_scalar_cost. */
734 1, /* scalar_to_vec_cost. */
735 1, /* vec_align_load_cost. */
736 2, /* vec_unalign_load_cost. */
737 1, /* vec_store_cost. */
738 3, /* cond_taken_branch_cost. */
739 1, /* cond_not_taken_branch_cost. */
742 /* K8 has optimized REP instruction for medium sized blocks, but for very
743 small blocks it is better to use loop. For large blocks, libcall can
744 do nontemporary accesses and beat inline considerably. */
745 static stringop_algs k8_memcpy[2] = {
746 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
747 {-1, rep_prefix_4_byte, false}}},
748 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
749 {-1, libcall, false}}}};
750 static stringop_algs k8_memset[2] = {
751 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
752 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
753 {libcall, {{48, unrolled_loop, false},
754 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
755 static const
756 struct processor_costs k8_cost = {
757 COSTS_N_INSNS (1), /* cost of an add instruction */
758 COSTS_N_INSNS (2), /* cost of a lea instruction */
759 COSTS_N_INSNS (1), /* variable shift costs */
760 COSTS_N_INSNS (1), /* constant shift costs */
761 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
762 COSTS_N_INSNS (4), /* HI */
763 COSTS_N_INSNS (3), /* SI */
764 COSTS_N_INSNS (4), /* DI */
765 COSTS_N_INSNS (5)}, /* other */
766 0, /* cost of multiply per each bit set */
767 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
768 COSTS_N_INSNS (26), /* HI */
769 COSTS_N_INSNS (42), /* SI */
770 COSTS_N_INSNS (74), /* DI */
771 COSTS_N_INSNS (74)}, /* other */
772 COSTS_N_INSNS (1), /* cost of movsx */
773 COSTS_N_INSNS (1), /* cost of movzx */
774 8, /* "large" insn */
775 9, /* MOVE_RATIO */
776 4, /* cost for loading QImode using movzbl */
777 {3, 4, 3}, /* cost of loading integer registers
778 in QImode, HImode and SImode.
779 Relative to reg-reg move (2). */
780 {3, 4, 3}, /* cost of storing integer registers */
781 4, /* cost of reg,reg fld/fst */
782 {4, 4, 12}, /* cost of loading fp registers
783 in SFmode, DFmode and XFmode */
784 {6, 6, 8}, /* cost of storing fp registers
785 in SFmode, DFmode and XFmode */
786 2, /* cost of moving MMX register */
787 {3, 3}, /* cost of loading MMX registers
788 in SImode and DImode */
789 {4, 4}, /* cost of storing MMX registers
790 in SImode and DImode */
791 2, /* cost of moving SSE register */
792 {4, 3, 6}, /* cost of loading SSE registers
793 in SImode, DImode and TImode */
794 {4, 4, 5}, /* cost of storing SSE registers
795 in SImode, DImode and TImode */
796 5, /* MMX or SSE register to integer */
797 64, /* size of l1 cache. */
798 512, /* size of l2 cache. */
799 64, /* size of prefetch block */
800 /* New AMD processors never drop prefetches; if they cannot be performed
801 immediately, they are queued. We set number of simultaneous prefetches
802 to a large constant to reflect this (it probably is not a good idea not
803 to limit number of prefetches at all, as their execution also takes some
804 time). */
805 100, /* number of parallel prefetches */
806 3, /* Branch cost */
807 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
808 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
809 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
810 COSTS_N_INSNS (2), /* cost of FABS instruction. */
811 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
812 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
814 k8_memcpy,
815 k8_memset,
816 4, /* scalar_stmt_cost. */
817 2, /* scalar load_cost. */
818 2, /* scalar_store_cost. */
819 5, /* vec_stmt_cost. */
820 0, /* vec_to_scalar_cost. */
821 2, /* scalar_to_vec_cost. */
822 2, /* vec_align_load_cost. */
823 3, /* vec_unalign_load_cost. */
824 3, /* vec_store_cost. */
825 3, /* cond_taken_branch_cost. */
826 2, /* cond_not_taken_branch_cost. */
829 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
830 very small blocks it is better to use loop. For large blocks, libcall can
831 do nontemporary accesses and beat inline considerably. */
832 static stringop_algs amdfam10_memcpy[2] = {
833 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
834 {-1, rep_prefix_4_byte, false}}},
835 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
836 {-1, libcall, false}}}};
837 static stringop_algs amdfam10_memset[2] = {
838 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
839 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
840 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
841 {-1, libcall, false}}}};
842 struct processor_costs amdfam10_cost = {
843 COSTS_N_INSNS (1), /* cost of an add instruction */
844 COSTS_N_INSNS (2), /* cost of a lea instruction */
845 COSTS_N_INSNS (1), /* variable shift costs */
846 COSTS_N_INSNS (1), /* constant shift costs */
847 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
848 COSTS_N_INSNS (4), /* HI */
849 COSTS_N_INSNS (3), /* SI */
850 COSTS_N_INSNS (4), /* DI */
851 COSTS_N_INSNS (5)}, /* other */
852 0, /* cost of multiply per each bit set */
853 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
854 COSTS_N_INSNS (35), /* HI */
855 COSTS_N_INSNS (51), /* SI */
856 COSTS_N_INSNS (83), /* DI */
857 COSTS_N_INSNS (83)}, /* other */
858 COSTS_N_INSNS (1), /* cost of movsx */
859 COSTS_N_INSNS (1), /* cost of movzx */
860 8, /* "large" insn */
861 9, /* MOVE_RATIO */
862 4, /* cost for loading QImode using movzbl */
863 {3, 4, 3}, /* cost of loading integer registers
864 in QImode, HImode and SImode.
865 Relative to reg-reg move (2). */
866 {3, 4, 3}, /* cost of storing integer registers */
867 4, /* cost of reg,reg fld/fst */
868 {4, 4, 12}, /* cost of loading fp registers
869 in SFmode, DFmode and XFmode */
870 {6, 6, 8}, /* cost of storing fp registers
871 in SFmode, DFmode and XFmode */
872 2, /* cost of moving MMX register */
873 {3, 3}, /* cost of loading MMX registers
874 in SImode and DImode */
875 {4, 4}, /* cost of storing MMX registers
876 in SImode and DImode */
877 2, /* cost of moving SSE register */
878 {4, 4, 3}, /* cost of loading SSE registers
879 in SImode, DImode and TImode */
880 {4, 4, 5}, /* cost of storing SSE registers
881 in SImode, DImode and TImode */
882 3, /* MMX or SSE register to integer */
883 /* On K8:
884 MOVD reg64, xmmreg Double FSTORE 4
885 MOVD reg32, xmmreg Double FSTORE 4
886 On AMDFAM10:
887 MOVD reg64, xmmreg Double FADD 3
888 1/1 1/1
889 MOVD reg32, xmmreg Double FADD 3
890 1/1 1/1 */
891 64, /* size of l1 cache. */
892 512, /* size of l2 cache. */
893 64, /* size of prefetch block */
894 /* New AMD processors never drop prefetches; if they cannot be performed
895 immediately, they are queued. We set number of simultaneous prefetches
896 to a large constant to reflect this (it probably is not a good idea not
897 to limit number of prefetches at all, as their execution also takes some
898 time). */
899 100, /* number of parallel prefetches */
900 2, /* Branch cost */
901 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
902 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
903 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
904 COSTS_N_INSNS (2), /* cost of FABS instruction. */
905 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
906 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
908 amdfam10_memcpy,
909 amdfam10_memset,
910 4, /* scalar_stmt_cost. */
911 2, /* scalar load_cost. */
912 2, /* scalar_store_cost. */
913 6, /* vec_stmt_cost. */
914 0, /* vec_to_scalar_cost. */
915 2, /* scalar_to_vec_cost. */
916 2, /* vec_align_load_cost. */
917 2, /* vec_unalign_load_cost. */
918 2, /* vec_store_cost. */
919 2, /* cond_taken_branch_cost. */
920 1, /* cond_not_taken_branch_cost. */
923 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
924 very small blocks it is better to use loop. For large blocks, libcall
925 can do nontemporary accesses and beat inline considerably. */
926 static stringop_algs bdver1_memcpy[2] = {
927 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
928 {-1, rep_prefix_4_byte, false}}},
929 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
930 {-1, libcall, false}}}};
931 static stringop_algs bdver1_memset[2] = {
932 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
933 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
934 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
935 {-1, libcall, false}}}};
937 const struct processor_costs bdver1_cost = {
938 COSTS_N_INSNS (1), /* cost of an add instruction */
939 COSTS_N_INSNS (1), /* cost of a lea instruction */
940 COSTS_N_INSNS (1), /* variable shift costs */
941 COSTS_N_INSNS (1), /* constant shift costs */
942 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
943 COSTS_N_INSNS (4), /* HI */
944 COSTS_N_INSNS (4), /* SI */
945 COSTS_N_INSNS (6), /* DI */
946 COSTS_N_INSNS (6)}, /* other */
947 0, /* cost of multiply per each bit set */
948 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
949 COSTS_N_INSNS (35), /* HI */
950 COSTS_N_INSNS (51), /* SI */
951 COSTS_N_INSNS (83), /* DI */
952 COSTS_N_INSNS (83)}, /* other */
953 COSTS_N_INSNS (1), /* cost of movsx */
954 COSTS_N_INSNS (1), /* cost of movzx */
955 8, /* "large" insn */
956 9, /* MOVE_RATIO */
957 4, /* cost for loading QImode using movzbl */
958 {5, 5, 4}, /* cost of loading integer registers
959 in QImode, HImode and SImode.
960 Relative to reg-reg move (2). */
961 {4, 4, 4}, /* cost of storing integer registers */
962 2, /* cost of reg,reg fld/fst */
963 {5, 5, 12}, /* cost of loading fp registers
964 in SFmode, DFmode and XFmode */
965 {4, 4, 8}, /* cost of storing fp registers
966 in SFmode, DFmode and XFmode */
967 2, /* cost of moving MMX register */
968 {4, 4}, /* cost of loading MMX registers
969 in SImode and DImode */
970 {4, 4}, /* cost of storing MMX registers
971 in SImode and DImode */
972 2, /* cost of moving SSE register */
973 {4, 4, 4}, /* cost of loading SSE registers
974 in SImode, DImode and TImode */
975 {4, 4, 4}, /* cost of storing SSE registers
976 in SImode, DImode and TImode */
977 2, /* MMX or SSE register to integer */
978 /* On K8:
979 MOVD reg64, xmmreg Double FSTORE 4
980 MOVD reg32, xmmreg Double FSTORE 4
981 On AMDFAM10:
982 MOVD reg64, xmmreg Double FADD 3
983 1/1 1/1
984 MOVD reg32, xmmreg Double FADD 3
985 1/1 1/1 */
986 16, /* size of l1 cache. */
987 2048, /* size of l2 cache. */
988 64, /* size of prefetch block */
989 /* New AMD processors never drop prefetches; if they cannot be performed
990 immediately, they are queued. We set number of simultaneous prefetches
991 to a large constant to reflect this (it probably is not a good idea not
992 to limit number of prefetches at all, as their execution also takes some
993 time). */
994 100, /* number of parallel prefetches */
995 2, /* Branch cost */
996 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
997 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
998 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
999 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1000 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1001 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1003 bdver1_memcpy,
1004 bdver1_memset,
1005 6, /* scalar_stmt_cost. */
1006 4, /* scalar load_cost. */
1007 4, /* scalar_store_cost. */
1008 6, /* vec_stmt_cost. */
1009 0, /* vec_to_scalar_cost. */
1010 2, /* scalar_to_vec_cost. */
1011 4, /* vec_align_load_cost. */
1012 4, /* vec_unalign_load_cost. */
1013 4, /* vec_store_cost. */
1014 4, /* cond_taken_branch_cost. */
1015 2, /* cond_not_taken_branch_cost. */
1018 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1019 very small blocks it is better to use loop. For large blocks, libcall
1020 can do nontemporary accesses and beat inline considerably. */
1022 static stringop_algs bdver2_memcpy[2] = {
1023 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1024 {-1, rep_prefix_4_byte, false}}},
1025 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1026 {-1, libcall, false}}}};
1027 static stringop_algs bdver2_memset[2] = {
1028 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1029 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1030 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1031 {-1, libcall, false}}}};
1033 const struct processor_costs bdver2_cost = {
1034 COSTS_N_INSNS (1), /* cost of an add instruction */
1035 COSTS_N_INSNS (1), /* cost of a lea instruction */
1036 COSTS_N_INSNS (1), /* variable shift costs */
1037 COSTS_N_INSNS (1), /* constant shift costs */
1038 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1039 COSTS_N_INSNS (4), /* HI */
1040 COSTS_N_INSNS (4), /* SI */
1041 COSTS_N_INSNS (6), /* DI */
1042 COSTS_N_INSNS (6)}, /* other */
1043 0, /* cost of multiply per each bit set */
1044 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1045 COSTS_N_INSNS (35), /* HI */
1046 COSTS_N_INSNS (51), /* SI */
1047 COSTS_N_INSNS (83), /* DI */
1048 COSTS_N_INSNS (83)}, /* other */
1049 COSTS_N_INSNS (1), /* cost of movsx */
1050 COSTS_N_INSNS (1), /* cost of movzx */
1051 8, /* "large" insn */
1052 9, /* MOVE_RATIO */
1053 4, /* cost for loading QImode using movzbl */
1054 {5, 5, 4}, /* cost of loading integer registers
1055 in QImode, HImode and SImode.
1056 Relative to reg-reg move (2). */
1057 {4, 4, 4}, /* cost of storing integer registers */
1058 2, /* cost of reg,reg fld/fst */
1059 {5, 5, 12}, /* cost of loading fp registers
1060 in SFmode, DFmode and XFmode */
1061 {4, 4, 8}, /* cost of storing fp registers
1062 in SFmode, DFmode and XFmode */
1063 2, /* cost of moving MMX register */
1064 {4, 4}, /* cost of loading MMX registers
1065 in SImode and DImode */
1066 {4, 4}, /* cost of storing MMX registers
1067 in SImode and DImode */
1068 2, /* cost of moving SSE register */
1069 {4, 4, 4}, /* cost of loading SSE registers
1070 in SImode, DImode and TImode */
1071 {4, 4, 4}, /* cost of storing SSE registers
1072 in SImode, DImode and TImode */
1073 2, /* MMX or SSE register to integer */
1074 /* On K8:
1075 MOVD reg64, xmmreg Double FSTORE 4
1076 MOVD reg32, xmmreg Double FSTORE 4
1077 On AMDFAM10:
1078 MOVD reg64, xmmreg Double FADD 3
1079 1/1 1/1
1080 MOVD reg32, xmmreg Double FADD 3
1081 1/1 1/1 */
1082 16, /* size of l1 cache. */
1083 2048, /* size of l2 cache. */
1084 64, /* size of prefetch block */
1085 /* New AMD processors never drop prefetches; if they cannot be performed
1086 immediately, they are queued. We set number of simultaneous prefetches
1087 to a large constant to reflect this (it probably is not a good idea not
1088 to limit number of prefetches at all, as their execution also takes some
1089 time). */
1090 100, /* number of parallel prefetches */
1091 2, /* Branch cost */
1092 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1093 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1094 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1095 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1096 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1097 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1099 bdver2_memcpy,
1100 bdver2_memset,
1101 6, /* scalar_stmt_cost. */
1102 4, /* scalar load_cost. */
1103 4, /* scalar_store_cost. */
1104 6, /* vec_stmt_cost. */
1105 0, /* vec_to_scalar_cost. */
1106 2, /* scalar_to_vec_cost. */
1107 4, /* vec_align_load_cost. */
1108 4, /* vec_unalign_load_cost. */
1109 4, /* vec_store_cost. */
1110 4, /* cond_taken_branch_cost. */
1111 2, /* cond_not_taken_branch_cost. */
1115 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1116 very small blocks it is better to use loop. For large blocks, libcall
1117 can do nontemporary accesses and beat inline considerably. */
1118 static stringop_algs bdver3_memcpy[2] = {
1119 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1120 {-1, rep_prefix_4_byte, false}}},
1121 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1122 {-1, libcall, false}}}};
1123 static stringop_algs bdver3_memset[2] = {
1124 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1125 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1126 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1127 {-1, libcall, false}}}};
1128 struct processor_costs bdver3_cost = {
1129 COSTS_N_INSNS (1), /* cost of an add instruction */
1130 COSTS_N_INSNS (1), /* cost of a lea instruction */
1131 COSTS_N_INSNS (1), /* variable shift costs */
1132 COSTS_N_INSNS (1), /* constant shift costs */
1133 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1134 COSTS_N_INSNS (4), /* HI */
1135 COSTS_N_INSNS (4), /* SI */
1136 COSTS_N_INSNS (6), /* DI */
1137 COSTS_N_INSNS (6)}, /* other */
1138 0, /* cost of multiply per each bit set */
1139 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1140 COSTS_N_INSNS (35), /* HI */
1141 COSTS_N_INSNS (51), /* SI */
1142 COSTS_N_INSNS (83), /* DI */
1143 COSTS_N_INSNS (83)}, /* other */
1144 COSTS_N_INSNS (1), /* cost of movsx */
1145 COSTS_N_INSNS (1), /* cost of movzx */
1146 8, /* "large" insn */
1147 9, /* MOVE_RATIO */
1148 4, /* cost for loading QImode using movzbl */
1149 {5, 5, 4}, /* cost of loading integer registers
1150 in QImode, HImode and SImode.
1151 Relative to reg-reg move (2). */
1152 {4, 4, 4}, /* cost of storing integer registers */
1153 2, /* cost of reg,reg fld/fst */
1154 {5, 5, 12}, /* cost of loading fp registers
1155 in SFmode, DFmode and XFmode */
1156 {4, 4, 8}, /* cost of storing fp registers
1157 in SFmode, DFmode and XFmode */
1158 2, /* cost of moving MMX register */
1159 {4, 4}, /* cost of loading MMX registers
1160 in SImode and DImode */
1161 {4, 4}, /* cost of storing MMX registers
1162 in SImode and DImode */
1163 2, /* cost of moving SSE register */
1164 {4, 4, 4}, /* cost of loading SSE registers
1165 in SImode, DImode and TImode */
1166 {4, 4, 4}, /* cost of storing SSE registers
1167 in SImode, DImode and TImode */
1168 2, /* MMX or SSE register to integer */
1169 16, /* size of l1 cache. */
1170 2048, /* size of l2 cache. */
1171 64, /* size of prefetch block */
1172 /* New AMD processors never drop prefetches; if they cannot be performed
1173 immediately, they are queued. We set number of simultaneous prefetches
1174 to a large constant to reflect this (it probably is not a good idea not
1175 to limit number of prefetches at all, as their execution also takes some
1176 time). */
1177 100, /* number of parallel prefetches */
1178 2, /* Branch cost */
1179 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1180 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1181 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1182 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1183 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1184 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1186 bdver3_memcpy,
1187 bdver3_memset,
1188 6, /* scalar_stmt_cost. */
1189 4, /* scalar load_cost. */
1190 4, /* scalar_store_cost. */
1191 6, /* vec_stmt_cost. */
1192 0, /* vec_to_scalar_cost. */
1193 2, /* scalar_to_vec_cost. */
1194 4, /* vec_align_load_cost. */
1195 4, /* vec_unalign_load_cost. */
1196 4, /* vec_store_cost. */
1197 4, /* cond_taken_branch_cost. */
1198 2, /* cond_not_taken_branch_cost. */
1201 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1202 very small blocks it is better to use loop. For large blocks, libcall
1203 can do nontemporary accesses and beat inline considerably. */
1204 static stringop_algs bdver4_memcpy[2] = {
1205 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1206 {-1, rep_prefix_4_byte, false}}},
1207 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1208 {-1, libcall, false}}}};
1209 static stringop_algs bdver4_memset[2] = {
1210 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1211 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1212 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1213 {-1, libcall, false}}}};
1214 struct processor_costs bdver4_cost = {
1215 COSTS_N_INSNS (1), /* cost of an add instruction */
1216 COSTS_N_INSNS (1), /* cost of a lea instruction */
1217 COSTS_N_INSNS (1), /* variable shift costs */
1218 COSTS_N_INSNS (1), /* constant shift costs */
1219 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1220 COSTS_N_INSNS (4), /* HI */
1221 COSTS_N_INSNS (4), /* SI */
1222 COSTS_N_INSNS (6), /* DI */
1223 COSTS_N_INSNS (6)}, /* other */
1224 0, /* cost of multiply per each bit set */
1225 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1226 COSTS_N_INSNS (35), /* HI */
1227 COSTS_N_INSNS (51), /* SI */
1228 COSTS_N_INSNS (83), /* DI */
1229 COSTS_N_INSNS (83)}, /* other */
1230 COSTS_N_INSNS (1), /* cost of movsx */
1231 COSTS_N_INSNS (1), /* cost of movzx */
1232 8, /* "large" insn */
1233 9, /* MOVE_RATIO */
1234 4, /* cost for loading QImode using movzbl */
1235 {5, 5, 4}, /* cost of loading integer registers
1236 in QImode, HImode and SImode.
1237 Relative to reg-reg move (2). */
1238 {4, 4, 4}, /* cost of storing integer registers */
1239 2, /* cost of reg,reg fld/fst */
1240 {5, 5, 12}, /* cost of loading fp registers
1241 in SFmode, DFmode and XFmode */
1242 {4, 4, 8}, /* cost of storing fp registers
1243 in SFmode, DFmode and XFmode */
1244 2, /* cost of moving MMX register */
1245 {4, 4}, /* cost of loading MMX registers
1246 in SImode and DImode */
1247 {4, 4}, /* cost of storing MMX registers
1248 in SImode and DImode */
1249 2, /* cost of moving SSE register */
1250 {4, 4, 4}, /* cost of loading SSE registers
1251 in SImode, DImode and TImode */
1252 {4, 4, 4}, /* cost of storing SSE registers
1253 in SImode, DImode and TImode */
1254 2, /* MMX or SSE register to integer */
1255 16, /* size of l1 cache. */
1256 2048, /* size of l2 cache. */
1257 64, /* size of prefetch block */
1258 /* New AMD processors never drop prefetches; if they cannot be performed
1259 immediately, they are queued. We set number of simultaneous prefetches
1260 to a large constant to reflect this (it probably is not a good idea not
1261 to limit number of prefetches at all, as their execution also takes some
1262 time). */
1263 100, /* number of parallel prefetches */
1264 2, /* Branch cost */
1265 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1266 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1267 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1268 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1269 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1270 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1272 bdver4_memcpy,
1273 bdver4_memset,
1274 6, /* scalar_stmt_cost. */
1275 4, /* scalar load_cost. */
1276 4, /* scalar_store_cost. */
1277 6, /* vec_stmt_cost. */
1278 0, /* vec_to_scalar_cost. */
1279 2, /* scalar_to_vec_cost. */
1280 4, /* vec_align_load_cost. */
1281 4, /* vec_unalign_load_cost. */
1282 4, /* vec_store_cost. */
1283 4, /* cond_taken_branch_cost. */
1284 2, /* cond_not_taken_branch_cost. */
1287 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1288 very small blocks it is better to use loop. For large blocks, libcall can
1289 do nontemporary accesses and beat inline considerably. */
1290 static stringop_algs btver1_memcpy[2] = {
1291 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1292 {-1, rep_prefix_4_byte, false}}},
1293 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1294 {-1, libcall, false}}}};
1295 static stringop_algs btver1_memset[2] = {
1296 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1297 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1298 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1299 {-1, libcall, false}}}};
1300 const struct processor_costs btver1_cost = {
1301 COSTS_N_INSNS (1), /* cost of an add instruction */
1302 COSTS_N_INSNS (2), /* cost of a lea instruction */
1303 COSTS_N_INSNS (1), /* variable shift costs */
1304 COSTS_N_INSNS (1), /* constant shift costs */
1305 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1306 COSTS_N_INSNS (4), /* HI */
1307 COSTS_N_INSNS (3), /* SI */
1308 COSTS_N_INSNS (4), /* DI */
1309 COSTS_N_INSNS (5)}, /* other */
1310 0, /* cost of multiply per each bit set */
1311 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1312 COSTS_N_INSNS (35), /* HI */
1313 COSTS_N_INSNS (51), /* SI */
1314 COSTS_N_INSNS (83), /* DI */
1315 COSTS_N_INSNS (83)}, /* other */
1316 COSTS_N_INSNS (1), /* cost of movsx */
1317 COSTS_N_INSNS (1), /* cost of movzx */
1318 8, /* "large" insn */
1319 9, /* MOVE_RATIO */
1320 4, /* cost for loading QImode using movzbl */
1321 {3, 4, 3}, /* cost of loading integer registers
1322 in QImode, HImode and SImode.
1323 Relative to reg-reg move (2). */
1324 {3, 4, 3}, /* cost of storing integer registers */
1325 4, /* cost of reg,reg fld/fst */
1326 {4, 4, 12}, /* cost of loading fp registers
1327 in SFmode, DFmode and XFmode */
1328 {6, 6, 8}, /* cost of storing fp registers
1329 in SFmode, DFmode and XFmode */
1330 2, /* cost of moving MMX register */
1331 {3, 3}, /* cost of loading MMX registers
1332 in SImode and DImode */
1333 {4, 4}, /* cost of storing MMX registers
1334 in SImode and DImode */
1335 2, /* cost of moving SSE register */
1336 {4, 4, 3}, /* cost of loading SSE registers
1337 in SImode, DImode and TImode */
1338 {4, 4, 5}, /* cost of storing SSE registers
1339 in SImode, DImode and TImode */
1340 3, /* MMX or SSE register to integer */
1341 /* On K8:
1342 MOVD reg64, xmmreg Double FSTORE 4
1343 MOVD reg32, xmmreg Double FSTORE 4
1344 On AMDFAM10:
1345 MOVD reg64, xmmreg Double FADD 3
1346 1/1 1/1
1347 MOVD reg32, xmmreg Double FADD 3
1348 1/1 1/1 */
1349 32, /* size of l1 cache. */
1350 512, /* size of l2 cache. */
1351 64, /* size of prefetch block */
1352 100, /* number of parallel prefetches */
1353 2, /* Branch cost */
1354 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1355 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1356 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1357 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1358 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1359 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1361 btver1_memcpy,
1362 btver1_memset,
1363 4, /* scalar_stmt_cost. */
1364 2, /* scalar load_cost. */
1365 2, /* scalar_store_cost. */
1366 6, /* vec_stmt_cost. */
1367 0, /* vec_to_scalar_cost. */
1368 2, /* scalar_to_vec_cost. */
1369 2, /* vec_align_load_cost. */
1370 2, /* vec_unalign_load_cost. */
1371 2, /* vec_store_cost. */
1372 2, /* cond_taken_branch_cost. */
1373 1, /* cond_not_taken_branch_cost. */
1376 static stringop_algs btver2_memcpy[2] = {
1377 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1378 {-1, rep_prefix_4_byte, false}}},
1379 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1380 {-1, libcall, false}}}};
1381 static stringop_algs btver2_memset[2] = {
1382 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1383 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1384 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1385 {-1, libcall, false}}}};
1386 const struct processor_costs btver2_cost = {
1387 COSTS_N_INSNS (1), /* cost of an add instruction */
1388 COSTS_N_INSNS (2), /* cost of a lea instruction */
1389 COSTS_N_INSNS (1), /* variable shift costs */
1390 COSTS_N_INSNS (1), /* constant shift costs */
1391 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1392 COSTS_N_INSNS (4), /* HI */
1393 COSTS_N_INSNS (3), /* SI */
1394 COSTS_N_INSNS (4), /* DI */
1395 COSTS_N_INSNS (5)}, /* other */
1396 0, /* cost of multiply per each bit set */
1397 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1398 COSTS_N_INSNS (35), /* HI */
1399 COSTS_N_INSNS (51), /* SI */
1400 COSTS_N_INSNS (83), /* DI */
1401 COSTS_N_INSNS (83)}, /* other */
1402 COSTS_N_INSNS (1), /* cost of movsx */
1403 COSTS_N_INSNS (1), /* cost of movzx */
1404 8, /* "large" insn */
1405 9, /* MOVE_RATIO */
1406 4, /* cost for loading QImode using movzbl */
1407 {3, 4, 3}, /* cost of loading integer registers
1408 in QImode, HImode and SImode.
1409 Relative to reg-reg move (2). */
1410 {3, 4, 3}, /* cost of storing integer registers */
1411 4, /* cost of reg,reg fld/fst */
1412 {4, 4, 12}, /* cost of loading fp registers
1413 in SFmode, DFmode and XFmode */
1414 {6, 6, 8}, /* cost of storing fp registers
1415 in SFmode, DFmode and XFmode */
1416 2, /* cost of moving MMX register */
1417 {3, 3}, /* cost of loading MMX registers
1418 in SImode and DImode */
1419 {4, 4}, /* cost of storing MMX registers
1420 in SImode and DImode */
1421 2, /* cost of moving SSE register */
1422 {4, 4, 3}, /* cost of loading SSE registers
1423 in SImode, DImode and TImode */
1424 {4, 4, 5}, /* cost of storing SSE registers
1425 in SImode, DImode and TImode */
1426 3, /* MMX or SSE register to integer */
1427 /* On K8:
1428 MOVD reg64, xmmreg Double FSTORE 4
1429 MOVD reg32, xmmreg Double FSTORE 4
1430 On AMDFAM10:
1431 MOVD reg64, xmmreg Double FADD 3
1432 1/1 1/1
1433 MOVD reg32, xmmreg Double FADD 3
1434 1/1 1/1 */
1435 32, /* size of l1 cache. */
1436 2048, /* size of l2 cache. */
1437 64, /* size of prefetch block */
1438 100, /* number of parallel prefetches */
1439 2, /* Branch cost */
1440 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1441 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1442 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1443 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1444 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1445 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1446 btver2_memcpy,
1447 btver2_memset,
1448 4, /* scalar_stmt_cost. */
1449 2, /* scalar load_cost. */
1450 2, /* scalar_store_cost. */
1451 6, /* vec_stmt_cost. */
1452 0, /* vec_to_scalar_cost. */
1453 2, /* scalar_to_vec_cost. */
1454 2, /* vec_align_load_cost. */
1455 2, /* vec_unalign_load_cost. */
1456 2, /* vec_store_cost. */
1457 2, /* cond_taken_branch_cost. */
1458 1, /* cond_not_taken_branch_cost. */
1461 static stringop_algs pentium4_memcpy[2] = {
1462 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1463 DUMMY_STRINGOP_ALGS};
1464 static stringop_algs pentium4_memset[2] = {
1465 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1466 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1467 DUMMY_STRINGOP_ALGS};
1469 static const
1470 struct processor_costs pentium4_cost = {
1471 COSTS_N_INSNS (1), /* cost of an add instruction */
1472 COSTS_N_INSNS (3), /* cost of a lea instruction */
1473 COSTS_N_INSNS (4), /* variable shift costs */
1474 COSTS_N_INSNS (4), /* constant shift costs */
1475 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1476 COSTS_N_INSNS (15), /* HI */
1477 COSTS_N_INSNS (15), /* SI */
1478 COSTS_N_INSNS (15), /* DI */
1479 COSTS_N_INSNS (15)}, /* other */
1480 0, /* cost of multiply per each bit set */
1481 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1482 COSTS_N_INSNS (56), /* HI */
1483 COSTS_N_INSNS (56), /* SI */
1484 COSTS_N_INSNS (56), /* DI */
1485 COSTS_N_INSNS (56)}, /* other */
1486 COSTS_N_INSNS (1), /* cost of movsx */
1487 COSTS_N_INSNS (1), /* cost of movzx */
1488 16, /* "large" insn */
1489 6, /* MOVE_RATIO */
1490 2, /* cost for loading QImode using movzbl */
1491 {4, 5, 4}, /* cost of loading integer registers
1492 in QImode, HImode and SImode.
1493 Relative to reg-reg move (2). */
1494 {2, 3, 2}, /* cost of storing integer registers */
1495 2, /* cost of reg,reg fld/fst */
1496 {2, 2, 6}, /* cost of loading fp registers
1497 in SFmode, DFmode and XFmode */
1498 {4, 4, 6}, /* cost of storing fp registers
1499 in SFmode, DFmode and XFmode */
1500 2, /* cost of moving MMX register */
1501 {2, 2}, /* cost of loading MMX registers
1502 in SImode and DImode */
1503 {2, 2}, /* cost of storing MMX registers
1504 in SImode and DImode */
1505 12, /* cost of moving SSE register */
1506 {12, 12, 12}, /* cost of loading SSE registers
1507 in SImode, DImode and TImode */
1508 {2, 2, 8}, /* cost of storing SSE registers
1509 in SImode, DImode and TImode */
1510 10, /* MMX or SSE register to integer */
1511 8, /* size of l1 cache. */
1512 256, /* size of l2 cache. */
1513 64, /* size of prefetch block */
1514 6, /* number of parallel prefetches */
1515 2, /* Branch cost */
1516 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1517 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1518 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1519 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1520 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1521 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1522 pentium4_memcpy,
1523 pentium4_memset,
1524 1, /* scalar_stmt_cost. */
1525 1, /* scalar load_cost. */
1526 1, /* scalar_store_cost. */
1527 1, /* vec_stmt_cost. */
1528 1, /* vec_to_scalar_cost. */
1529 1, /* scalar_to_vec_cost. */
1530 1, /* vec_align_load_cost. */
1531 2, /* vec_unalign_load_cost. */
1532 1, /* vec_store_cost. */
1533 3, /* cond_taken_branch_cost. */
1534 1, /* cond_not_taken_branch_cost. */
1537 static stringop_algs nocona_memcpy[2] = {
1538 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1539 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1540 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1542 static stringop_algs nocona_memset[2] = {
1543 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1544 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1545 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1546 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1548 static const
1549 struct processor_costs nocona_cost = {
1550 COSTS_N_INSNS (1), /* cost of an add instruction */
1551 COSTS_N_INSNS (1), /* cost of a lea instruction */
1552 COSTS_N_INSNS (1), /* variable shift costs */
1553 COSTS_N_INSNS (1), /* constant shift costs */
1554 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1555 COSTS_N_INSNS (10), /* HI */
1556 COSTS_N_INSNS (10), /* SI */
1557 COSTS_N_INSNS (10), /* DI */
1558 COSTS_N_INSNS (10)}, /* other */
1559 0, /* cost of multiply per each bit set */
1560 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1561 COSTS_N_INSNS (66), /* HI */
1562 COSTS_N_INSNS (66), /* SI */
1563 COSTS_N_INSNS (66), /* DI */
1564 COSTS_N_INSNS (66)}, /* other */
1565 COSTS_N_INSNS (1), /* cost of movsx */
1566 COSTS_N_INSNS (1), /* cost of movzx */
1567 16, /* "large" insn */
1568 17, /* MOVE_RATIO */
1569 4, /* cost for loading QImode using movzbl */
1570 {4, 4, 4}, /* cost of loading integer registers
1571 in QImode, HImode and SImode.
1572 Relative to reg-reg move (2). */
1573 {4, 4, 4}, /* cost of storing integer registers */
1574 3, /* cost of reg,reg fld/fst */
1575 {12, 12, 12}, /* cost of loading fp registers
1576 in SFmode, DFmode and XFmode */
1577 {4, 4, 4}, /* cost of storing fp registers
1578 in SFmode, DFmode and XFmode */
1579 6, /* cost of moving MMX register */
1580 {12, 12}, /* cost of loading MMX registers
1581 in SImode and DImode */
1582 {12, 12}, /* cost of storing MMX registers
1583 in SImode and DImode */
1584 6, /* cost of moving SSE register */
1585 {12, 12, 12}, /* cost of loading SSE registers
1586 in SImode, DImode and TImode */
1587 {12, 12, 12}, /* cost of storing SSE registers
1588 in SImode, DImode and TImode */
1589 8, /* MMX or SSE register to integer */
1590 8, /* size of l1 cache. */
1591 1024, /* size of l2 cache. */
1592 64, /* size of prefetch block */
1593 8, /* number of parallel prefetches */
1594 1, /* Branch cost */
1595 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1596 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1597 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1598 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1599 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1600 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1601 nocona_memcpy,
1602 nocona_memset,
1603 1, /* scalar_stmt_cost. */
1604 1, /* scalar load_cost. */
1605 1, /* scalar_store_cost. */
1606 1, /* vec_stmt_cost. */
1607 1, /* vec_to_scalar_cost. */
1608 1, /* scalar_to_vec_cost. */
1609 1, /* vec_align_load_cost. */
1610 2, /* vec_unalign_load_cost. */
1611 1, /* vec_store_cost. */
1612 3, /* cond_taken_branch_cost. */
1613 1, /* cond_not_taken_branch_cost. */
1616 static stringop_algs atom_memcpy[2] = {
1617 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1618 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1619 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1620 static stringop_algs atom_memset[2] = {
1621 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1622 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1623 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1624 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1625 static const
1626 struct processor_costs atom_cost = {
1627 COSTS_N_INSNS (1), /* cost of an add instruction */
1628 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1629 COSTS_N_INSNS (1), /* variable shift costs */
1630 COSTS_N_INSNS (1), /* constant shift costs */
1631 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1632 COSTS_N_INSNS (4), /* HI */
1633 COSTS_N_INSNS (3), /* SI */
1634 COSTS_N_INSNS (4), /* DI */
1635 COSTS_N_INSNS (2)}, /* other */
1636 0, /* cost of multiply per each bit set */
1637 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1638 COSTS_N_INSNS (26), /* HI */
1639 COSTS_N_INSNS (42), /* SI */
1640 COSTS_N_INSNS (74), /* DI */
1641 COSTS_N_INSNS (74)}, /* other */
1642 COSTS_N_INSNS (1), /* cost of movsx */
1643 COSTS_N_INSNS (1), /* cost of movzx */
1644 8, /* "large" insn */
1645 17, /* MOVE_RATIO */
1646 4, /* cost for loading QImode using movzbl */
1647 {4, 4, 4}, /* cost of loading integer registers
1648 in QImode, HImode and SImode.
1649 Relative to reg-reg move (2). */
1650 {4, 4, 4}, /* cost of storing integer registers */
1651 4, /* cost of reg,reg fld/fst */
1652 {12, 12, 12}, /* cost of loading fp registers
1653 in SFmode, DFmode and XFmode */
1654 {6, 6, 8}, /* cost of storing fp registers
1655 in SFmode, DFmode and XFmode */
1656 2, /* cost of moving MMX register */
1657 {8, 8}, /* cost of loading MMX registers
1658 in SImode and DImode */
1659 {8, 8}, /* cost of storing MMX registers
1660 in SImode and DImode */
1661 2, /* cost of moving SSE register */
1662 {8, 8, 8}, /* cost of loading SSE registers
1663 in SImode, DImode and TImode */
1664 {8, 8, 8}, /* cost of storing SSE registers
1665 in SImode, DImode and TImode */
1666 5, /* MMX or SSE register to integer */
1667 32, /* size of l1 cache. */
1668 256, /* size of l2 cache. */
1669 64, /* size of prefetch block */
1670 6, /* number of parallel prefetches */
1671 3, /* Branch cost */
1672 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1673 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1674 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1675 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1676 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1677 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1678 atom_memcpy,
1679 atom_memset,
1680 1, /* scalar_stmt_cost. */
1681 1, /* scalar load_cost. */
1682 1, /* scalar_store_cost. */
1683 1, /* vec_stmt_cost. */
1684 1, /* vec_to_scalar_cost. */
1685 1, /* scalar_to_vec_cost. */
1686 1, /* vec_align_load_cost. */
1687 2, /* vec_unalign_load_cost. */
1688 1, /* vec_store_cost. */
1689 3, /* cond_taken_branch_cost. */
1690 1, /* cond_not_taken_branch_cost. */
1693 static stringop_algs slm_memcpy[2] = {
1694 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1695 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1696 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1697 static stringop_algs slm_memset[2] = {
1698 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1699 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1700 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1701 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1702 static const
1703 struct processor_costs slm_cost = {
1704 COSTS_N_INSNS (1), /* cost of an add instruction */
1705 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1706 COSTS_N_INSNS (1), /* variable shift costs */
1707 COSTS_N_INSNS (1), /* constant shift costs */
1708 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1709 COSTS_N_INSNS (3), /* HI */
1710 COSTS_N_INSNS (3), /* SI */
1711 COSTS_N_INSNS (4), /* DI */
1712 COSTS_N_INSNS (2)}, /* other */
1713 0, /* cost of multiply per each bit set */
1714 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1715 COSTS_N_INSNS (26), /* HI */
1716 COSTS_N_INSNS (42), /* SI */
1717 COSTS_N_INSNS (74), /* DI */
1718 COSTS_N_INSNS (74)}, /* other */
1719 COSTS_N_INSNS (1), /* cost of movsx */
1720 COSTS_N_INSNS (1), /* cost of movzx */
1721 8, /* "large" insn */
1722 17, /* MOVE_RATIO */
1723 4, /* cost for loading QImode using movzbl */
1724 {4, 4, 4}, /* cost of loading integer registers
1725 in QImode, HImode and SImode.
1726 Relative to reg-reg move (2). */
1727 {4, 4, 4}, /* cost of storing integer registers */
1728 4, /* cost of reg,reg fld/fst */
1729 {12, 12, 12}, /* cost of loading fp registers
1730 in SFmode, DFmode and XFmode */
1731 {6, 6, 8}, /* cost of storing fp registers
1732 in SFmode, DFmode and XFmode */
1733 2, /* cost of moving MMX register */
1734 {8, 8}, /* cost of loading MMX registers
1735 in SImode and DImode */
1736 {8, 8}, /* cost of storing MMX registers
1737 in SImode and DImode */
1738 2, /* cost of moving SSE register */
1739 {8, 8, 8}, /* cost of loading SSE registers
1740 in SImode, DImode and TImode */
1741 {8, 8, 8}, /* cost of storing SSE registers
1742 in SImode, DImode and TImode */
1743 5, /* MMX or SSE register to integer */
1744 32, /* size of l1 cache. */
1745 256, /* size of l2 cache. */
1746 64, /* size of prefetch block */
1747 6, /* number of parallel prefetches */
1748 3, /* Branch cost */
1749 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1750 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1751 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1752 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1753 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1754 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1755 slm_memcpy,
1756 slm_memset,
1757 1, /* scalar_stmt_cost. */
1758 1, /* scalar load_cost. */
1759 1, /* scalar_store_cost. */
1760 1, /* vec_stmt_cost. */
1761 4, /* vec_to_scalar_cost. */
1762 1, /* scalar_to_vec_cost. */
1763 1, /* vec_align_load_cost. */
1764 2, /* vec_unalign_load_cost. */
1765 1, /* vec_store_cost. */
1766 3, /* cond_taken_branch_cost. */
1767 1, /* cond_not_taken_branch_cost. */
1770 static stringop_algs intel_memcpy[2] = {
1771 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1772 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1773 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1774 static stringop_algs intel_memset[2] = {
1775 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1776 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1777 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1778 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1779 static const
1780 struct processor_costs intel_cost = {
1781 COSTS_N_INSNS (1), /* cost of an add instruction */
1782 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1783 COSTS_N_INSNS (1), /* variable shift costs */
1784 COSTS_N_INSNS (1), /* constant shift costs */
1785 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1786 COSTS_N_INSNS (3), /* HI */
1787 COSTS_N_INSNS (3), /* SI */
1788 COSTS_N_INSNS (4), /* DI */
1789 COSTS_N_INSNS (2)}, /* other */
1790 0, /* cost of multiply per each bit set */
1791 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1792 COSTS_N_INSNS (26), /* HI */
1793 COSTS_N_INSNS (42), /* SI */
1794 COSTS_N_INSNS (74), /* DI */
1795 COSTS_N_INSNS (74)}, /* other */
1796 COSTS_N_INSNS (1), /* cost of movsx */
1797 COSTS_N_INSNS (1), /* cost of movzx */
1798 8, /* "large" insn */
1799 17, /* MOVE_RATIO */
1800 4, /* cost for loading QImode using movzbl */
1801 {4, 4, 4}, /* cost of loading integer registers
1802 in QImode, HImode and SImode.
1803 Relative to reg-reg move (2). */
1804 {4, 4, 4}, /* cost of storing integer registers */
1805 4, /* cost of reg,reg fld/fst */
1806 {12, 12, 12}, /* cost of loading fp registers
1807 in SFmode, DFmode and XFmode */
1808 {6, 6, 8}, /* cost of storing fp registers
1809 in SFmode, DFmode and XFmode */
1810 2, /* cost of moving MMX register */
1811 {8, 8}, /* cost of loading MMX registers
1812 in SImode and DImode */
1813 {8, 8}, /* cost of storing MMX registers
1814 in SImode and DImode */
1815 2, /* cost of moving SSE register */
1816 {8, 8, 8}, /* cost of loading SSE registers
1817 in SImode, DImode and TImode */
1818 {8, 8, 8}, /* cost of storing SSE registers
1819 in SImode, DImode and TImode */
1820 5, /* MMX or SSE register to integer */
1821 32, /* size of l1 cache. */
1822 256, /* size of l2 cache. */
1823 64, /* size of prefetch block */
1824 6, /* number of parallel prefetches */
1825 3, /* Branch cost */
1826 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1827 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1828 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1829 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1830 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1831 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1832 intel_memcpy,
1833 intel_memset,
1834 1, /* scalar_stmt_cost. */
1835 1, /* scalar load_cost. */
1836 1, /* scalar_store_cost. */
1837 1, /* vec_stmt_cost. */
1838 4, /* vec_to_scalar_cost. */
1839 1, /* scalar_to_vec_cost. */
1840 1, /* vec_align_load_cost. */
1841 2, /* vec_unalign_load_cost. */
1842 1, /* vec_store_cost. */
1843 3, /* cond_taken_branch_cost. */
1844 1, /* cond_not_taken_branch_cost. */
1847 /* Generic should produce code tuned for Core-i7 (and newer chips)
1848 and btver1 (and newer chips). */
1850 static stringop_algs generic_memcpy[2] = {
1851 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1852 {-1, libcall, false}}},
1853 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1854 {-1, libcall, false}}}};
1855 static stringop_algs generic_memset[2] = {
1856 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1857 {-1, libcall, false}}},
1858 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1859 {-1, libcall, false}}}};
1860 static const
1861 struct processor_costs generic_cost = {
1862 COSTS_N_INSNS (1), /* cost of an add instruction */
1863 /* On all chips taken into consideration lea is 2 cycles and more. With
1864 this cost however our current implementation of synth_mult results in
1865 use of unnecessary temporary registers causing regression on several
1866 SPECfp benchmarks. */
1867 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1868 COSTS_N_INSNS (1), /* variable shift costs */
1869 COSTS_N_INSNS (1), /* constant shift costs */
1870 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1871 COSTS_N_INSNS (4), /* HI */
1872 COSTS_N_INSNS (3), /* SI */
1873 COSTS_N_INSNS (4), /* DI */
1874 COSTS_N_INSNS (2)}, /* other */
1875 0, /* cost of multiply per each bit set */
1876 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1877 COSTS_N_INSNS (26), /* HI */
1878 COSTS_N_INSNS (42), /* SI */
1879 COSTS_N_INSNS (74), /* DI */
1880 COSTS_N_INSNS (74)}, /* other */
1881 COSTS_N_INSNS (1), /* cost of movsx */
1882 COSTS_N_INSNS (1), /* cost of movzx */
1883 8, /* "large" insn */
1884 17, /* MOVE_RATIO */
1885 4, /* cost for loading QImode using movzbl */
1886 {4, 4, 4}, /* cost of loading integer registers
1887 in QImode, HImode and SImode.
1888 Relative to reg-reg move (2). */
1889 {4, 4, 4}, /* cost of storing integer registers */
1890 4, /* cost of reg,reg fld/fst */
1891 {12, 12, 12}, /* cost of loading fp registers
1892 in SFmode, DFmode and XFmode */
1893 {6, 6, 8}, /* cost of storing fp registers
1894 in SFmode, DFmode and XFmode */
1895 2, /* cost of moving MMX register */
1896 {8, 8}, /* cost of loading MMX registers
1897 in SImode and DImode */
1898 {8, 8}, /* cost of storing MMX registers
1899 in SImode and DImode */
1900 2, /* cost of moving SSE register */
1901 {8, 8, 8}, /* cost of loading SSE registers
1902 in SImode, DImode and TImode */
1903 {8, 8, 8}, /* cost of storing SSE registers
1904 in SImode, DImode and TImode */
1905 5, /* MMX or SSE register to integer */
1906 32, /* size of l1 cache. */
1907 512, /* size of l2 cache. */
1908 64, /* size of prefetch block */
1909 6, /* number of parallel prefetches */
1910 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1911 value is increased to perhaps more appropriate value of 5. */
1912 3, /* Branch cost */
1913 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1914 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1915 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1916 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1917 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1918 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1919 generic_memcpy,
1920 generic_memset,
1921 1, /* scalar_stmt_cost. */
1922 1, /* scalar load_cost. */
1923 1, /* scalar_store_cost. */
1924 1, /* vec_stmt_cost. */
1925 1, /* vec_to_scalar_cost. */
1926 1, /* scalar_to_vec_cost. */
1927 1, /* vec_align_load_cost. */
1928 2, /* vec_unalign_load_cost. */
1929 1, /* vec_store_cost. */
1930 3, /* cond_taken_branch_cost. */
1931 1, /* cond_not_taken_branch_cost. */
1934 /* core_cost should produce code tuned for Core familly of CPUs. */
1935 static stringop_algs core_memcpy[2] = {
1936 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1937 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1938 {-1, libcall, false}}}};
1939 static stringop_algs core_memset[2] = {
1940 {libcall, {{6, loop_1_byte, true},
1941 {24, loop, true},
1942 {8192, rep_prefix_4_byte, true},
1943 {-1, libcall, false}}},
1944 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1945 {-1, libcall, false}}}};
1947 static const
1948 struct processor_costs core_cost = {
1949 COSTS_N_INSNS (1), /* cost of an add instruction */
1950 /* On all chips taken into consideration lea is 2 cycles and more. With
1951 this cost however our current implementation of synth_mult results in
1952 use of unnecessary temporary registers causing regression on several
1953 SPECfp benchmarks. */
1954 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1955 COSTS_N_INSNS (1), /* variable shift costs */
1956 COSTS_N_INSNS (1), /* constant shift costs */
1957 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1958 COSTS_N_INSNS (4), /* HI */
1959 COSTS_N_INSNS (3), /* SI */
1960 COSTS_N_INSNS (4), /* DI */
1961 COSTS_N_INSNS (2)}, /* other */
1962 0, /* cost of multiply per each bit set */
1963 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1964 COSTS_N_INSNS (26), /* HI */
1965 COSTS_N_INSNS (42), /* SI */
1966 COSTS_N_INSNS (74), /* DI */
1967 COSTS_N_INSNS (74)}, /* other */
1968 COSTS_N_INSNS (1), /* cost of movsx */
1969 COSTS_N_INSNS (1), /* cost of movzx */
1970 8, /* "large" insn */
1971 17, /* MOVE_RATIO */
1972 4, /* cost for loading QImode using movzbl */
1973 {4, 4, 4}, /* cost of loading integer registers
1974 in QImode, HImode and SImode.
1975 Relative to reg-reg move (2). */
1976 {4, 4, 4}, /* cost of storing integer registers */
1977 4, /* cost of reg,reg fld/fst */
1978 {12, 12, 12}, /* cost of loading fp registers
1979 in SFmode, DFmode and XFmode */
1980 {6, 6, 8}, /* cost of storing fp registers
1981 in SFmode, DFmode and XFmode */
1982 2, /* cost of moving MMX register */
1983 {8, 8}, /* cost of loading MMX registers
1984 in SImode and DImode */
1985 {8, 8}, /* cost of storing MMX registers
1986 in SImode and DImode */
1987 2, /* cost of moving SSE register */
1988 {8, 8, 8}, /* cost of loading SSE registers
1989 in SImode, DImode and TImode */
1990 {8, 8, 8}, /* cost of storing SSE registers
1991 in SImode, DImode and TImode */
1992 5, /* MMX or SSE register to integer */
1993 64, /* size of l1 cache. */
1994 512, /* size of l2 cache. */
1995 64, /* size of prefetch block */
1996 6, /* number of parallel prefetches */
1997 /* FIXME perhaps more appropriate value is 5. */
1998 3, /* Branch cost */
1999 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2000 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2001 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2002 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2003 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2004 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2005 core_memcpy,
2006 core_memset,
2007 1, /* scalar_stmt_cost. */
2008 1, /* scalar load_cost. */
2009 1, /* scalar_store_cost. */
2010 1, /* vec_stmt_cost. */
2011 1, /* vec_to_scalar_cost. */
2012 1, /* scalar_to_vec_cost. */
2013 1, /* vec_align_load_cost. */
2014 2, /* vec_unalign_load_cost. */
2015 1, /* vec_store_cost. */
2016 3, /* cond_taken_branch_cost. */
2017 1, /* cond_not_taken_branch_cost. */
2021 /* Set by -mtune. */
2022 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2024 /* Set by -mtune or -Os. */
2025 const struct processor_costs *ix86_cost = &pentium_cost;
2027 /* Processor feature/optimization bitmasks. */
2028 #define m_386 (1<<PROCESSOR_I386)
2029 #define m_486 (1<<PROCESSOR_I486)
2030 #define m_PENT (1<<PROCESSOR_PENTIUM)
2031 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2032 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2033 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2034 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2035 #define m_CORE2 (1<<PROCESSOR_CORE2)
2036 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2037 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2038 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2039 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2040 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2041 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2042 #define m_KNL (1<<PROCESSOR_KNL)
2043 #define m_INTEL (1<<PROCESSOR_INTEL)
2045 #define m_GEODE (1<<PROCESSOR_GEODE)
2046 #define m_K6 (1<<PROCESSOR_K6)
2047 #define m_K6_GEODE (m_K6 | m_GEODE)
2048 #define m_K8 (1<<PROCESSOR_K8)
2049 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2050 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2051 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2052 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2053 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2054 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2055 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2056 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2057 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2058 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2059 #define m_BTVER (m_BTVER1 | m_BTVER2)
2060 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2062 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2064 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2065 #undef DEF_TUNE
2066 #define DEF_TUNE(tune, name, selector) name,
2067 #include "x86-tune.def"
2068 #undef DEF_TUNE
2071 /* Feature tests against the various tunings. */
2072 unsigned char ix86_tune_features[X86_TUNE_LAST];
2074 /* Feature tests against the various tunings used to create ix86_tune_features
2075 based on the processor mask. */
2076 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2077 #undef DEF_TUNE
2078 #define DEF_TUNE(tune, name, selector) selector,
2079 #include "x86-tune.def"
2080 #undef DEF_TUNE
2083 /* Feature tests against the various architecture variations. */
2084 unsigned char ix86_arch_features[X86_ARCH_LAST];
2086 /* Feature tests against the various architecture variations, used to create
2087 ix86_arch_features based on the processor mask. */
2088 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2089 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2090 ~(m_386 | m_486 | m_PENT | m_K6),
2092 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2093 ~m_386,
2095 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2096 ~(m_386 | m_486),
2098 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2099 ~m_386,
2101 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2102 ~m_386,
2105 /* In case the average insn count for single function invocation is
2106 lower than this constant, emit fast (but longer) prologue and
2107 epilogue code. */
2108 #define FAST_PROLOGUE_INSN_COUNT 20
2110 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2111 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2112 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2113 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2115 /* Array of the smallest class containing reg number REGNO, indexed by
2116 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2118 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2120 /* ax, dx, cx, bx */
2121 AREG, DREG, CREG, BREG,
2122 /* si, di, bp, sp */
2123 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2124 /* FP registers */
2125 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2126 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2127 /* arg pointer */
2128 NON_Q_REGS,
2129 /* flags, fpsr, fpcr, frame */
2130 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2131 /* SSE registers */
2132 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2133 SSE_REGS, SSE_REGS,
2134 /* MMX registers */
2135 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2136 MMX_REGS, MMX_REGS,
2137 /* REX registers */
2138 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2139 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2140 /* SSE REX registers */
2141 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2142 SSE_REGS, SSE_REGS,
2143 /* AVX-512 SSE registers */
2144 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2145 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2146 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2147 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2148 /* Mask registers. */
2149 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2150 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2151 /* MPX bound registers */
2152 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2155 /* The "default" register map used in 32bit mode. */
2157 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2159 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2160 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2161 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2162 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2163 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2164 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2165 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2166 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2167 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2168 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2169 101, 102, 103, 104, /* bound registers */
2172 /* The "default" register map used in 64bit mode. */
2174 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2176 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2177 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2178 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2179 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2180 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2181 8,9,10,11,12,13,14,15, /* extended integer registers */
2182 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2183 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2184 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2185 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2186 126, 127, 128, 129, /* bound registers */
2189 /* Define the register numbers to be used in Dwarf debugging information.
2190 The SVR4 reference port C compiler uses the following register numbers
2191 in its Dwarf output code:
2192 0 for %eax (gcc regno = 0)
2193 1 for %ecx (gcc regno = 2)
2194 2 for %edx (gcc regno = 1)
2195 3 for %ebx (gcc regno = 3)
2196 4 for %esp (gcc regno = 7)
2197 5 for %ebp (gcc regno = 6)
2198 6 for %esi (gcc regno = 4)
2199 7 for %edi (gcc regno = 5)
2200 The following three DWARF register numbers are never generated by
2201 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2202 believes these numbers have these meanings.
2203 8 for %eip (no gcc equivalent)
2204 9 for %eflags (gcc regno = 17)
2205 10 for %trapno (no gcc equivalent)
2206 It is not at all clear how we should number the FP stack registers
2207 for the x86 architecture. If the version of SDB on x86/svr4 were
2208 a bit less brain dead with respect to floating-point then we would
2209 have a precedent to follow with respect to DWARF register numbers
2210 for x86 FP registers, but the SDB on x86/svr4 is so completely
2211 broken with respect to FP registers that it is hardly worth thinking
2212 of it as something to strive for compatibility with.
2213 The version of x86/svr4 SDB I have at the moment does (partially)
2214 seem to believe that DWARF register number 11 is associated with
2215 the x86 register %st(0), but that's about all. Higher DWARF
2216 register numbers don't seem to be associated with anything in
2217 particular, and even for DWARF regno 11, SDB only seems to under-
2218 stand that it should say that a variable lives in %st(0) (when
2219 asked via an `=' command) if we said it was in DWARF regno 11,
2220 but SDB still prints garbage when asked for the value of the
2221 variable in question (via a `/' command).
2222 (Also note that the labels SDB prints for various FP stack regs
2223 when doing an `x' command are all wrong.)
2224 Note that these problems generally don't affect the native SVR4
2225 C compiler because it doesn't allow the use of -O with -g and
2226 because when it is *not* optimizing, it allocates a memory
2227 location for each floating-point variable, and the memory
2228 location is what gets described in the DWARF AT_location
2229 attribute for the variable in question.
2230 Regardless of the severe mental illness of the x86/svr4 SDB, we
2231 do something sensible here and we use the following DWARF
2232 register numbers. Note that these are all stack-top-relative
2233 numbers.
2234 11 for %st(0) (gcc regno = 8)
2235 12 for %st(1) (gcc regno = 9)
2236 13 for %st(2) (gcc regno = 10)
2237 14 for %st(3) (gcc regno = 11)
2238 15 for %st(4) (gcc regno = 12)
2239 16 for %st(5) (gcc regno = 13)
2240 17 for %st(6) (gcc regno = 14)
2241 18 for %st(7) (gcc regno = 15)
2243 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2245 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2246 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2247 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2248 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2249 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2250 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2251 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2252 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2253 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2254 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2255 101, 102, 103, 104, /* bound registers */
2258 /* Define parameter passing and return registers. */
2260 static int const x86_64_int_parameter_registers[6] =
2262 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2265 static int const x86_64_ms_abi_int_parameter_registers[4] =
2267 CX_REG, DX_REG, R8_REG, R9_REG
2270 static int const x86_64_int_return_registers[4] =
2272 AX_REG, DX_REG, DI_REG, SI_REG
2275 /* Additional registers that are clobbered by SYSV calls. */
2277 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2279 SI_REG, DI_REG,
2280 XMM6_REG, XMM7_REG,
2281 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2282 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2285 /* Define the structure for the machine field in struct function. */
2287 struct GTY(()) stack_local_entry {
2288 unsigned short mode;
2289 unsigned short n;
2290 rtx rtl;
2291 struct stack_local_entry *next;
2294 /* Structure describing stack frame layout.
2295 Stack grows downward:
2297 [arguments]
2298 <- ARG_POINTER
2299 saved pc
2301 saved static chain if ix86_static_chain_on_stack
2303 saved frame pointer if frame_pointer_needed
2304 <- HARD_FRAME_POINTER
2305 [saved regs]
2306 <- regs_save_offset
2307 [padding0]
2309 [saved SSE regs]
2310 <- sse_regs_save_offset
2311 [padding1] |
2312 | <- FRAME_POINTER
2313 [va_arg registers] |
2315 [frame] |
2317 [padding2] | = to_allocate
2318 <- STACK_POINTER
2320 struct ix86_frame
2322 int nsseregs;
2323 int nregs;
2324 int va_arg_size;
2325 int red_zone_size;
2326 int outgoing_arguments_size;
2328 /* The offsets relative to ARG_POINTER. */
2329 HOST_WIDE_INT frame_pointer_offset;
2330 HOST_WIDE_INT hard_frame_pointer_offset;
2331 HOST_WIDE_INT stack_pointer_offset;
2332 HOST_WIDE_INT hfp_save_offset;
2333 HOST_WIDE_INT reg_save_offset;
2334 HOST_WIDE_INT sse_reg_save_offset;
2336 /* When save_regs_using_mov is set, emit prologue using
2337 move instead of push instructions. */
2338 bool save_regs_using_mov;
2341 /* Which cpu are we scheduling for. */
2342 enum attr_cpu ix86_schedule;
2344 /* Which cpu are we optimizing for. */
2345 enum processor_type ix86_tune;
2347 /* Which instruction set architecture to use. */
2348 enum processor_type ix86_arch;
2350 /* True if processor has SSE prefetch instruction. */
2351 unsigned char x86_prefetch_sse;
2353 /* -mstackrealign option */
2354 static const char ix86_force_align_arg_pointer_string[]
2355 = "force_align_arg_pointer";
2357 static rtx (*ix86_gen_leave) (void);
2358 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2359 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2360 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2361 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2362 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2363 static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx);
2364 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2365 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2366 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2367 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2368 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2369 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2371 /* Preferred alignment for stack boundary in bits. */
2372 unsigned int ix86_preferred_stack_boundary;
2374 /* Alignment for incoming stack boundary in bits specified at
2375 command line. */
2376 static unsigned int ix86_user_incoming_stack_boundary;
2378 /* Default alignment for incoming stack boundary in bits. */
2379 static unsigned int ix86_default_incoming_stack_boundary;
2381 /* Alignment for incoming stack boundary in bits. */
2382 unsigned int ix86_incoming_stack_boundary;
2384 /* Calling abi specific va_list type nodes. */
2385 static GTY(()) tree sysv_va_list_type_node;
2386 static GTY(()) tree ms_va_list_type_node;
2388 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2389 char internal_label_prefix[16];
2390 int internal_label_prefix_len;
2392 /* Fence to use after loop using movnt. */
2393 tree x86_mfence;
2395 /* Register class used for passing given 64bit part of the argument.
2396 These represent classes as documented by the PS ABI, with the exception
2397 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2398 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2400 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2401 whenever possible (upper half does contain padding). */
2402 enum x86_64_reg_class
2404 X86_64_NO_CLASS,
2405 X86_64_INTEGER_CLASS,
2406 X86_64_INTEGERSI_CLASS,
2407 X86_64_SSE_CLASS,
2408 X86_64_SSESF_CLASS,
2409 X86_64_SSEDF_CLASS,
2410 X86_64_SSEUP_CLASS,
2411 X86_64_X87_CLASS,
2412 X86_64_X87UP_CLASS,
2413 X86_64_COMPLEX_X87_CLASS,
2414 X86_64_MEMORY_CLASS
2417 #define MAX_CLASSES 8
2419 /* Table of constants used by fldpi, fldln2, etc.... */
2420 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2421 static bool ext_80387_constants_init = 0;
2424 static struct machine_function * ix86_init_machine_status (void);
2425 static rtx ix86_function_value (const_tree, const_tree, bool);
2426 static bool ix86_function_value_regno_p (const unsigned int);
2427 static unsigned int ix86_function_arg_boundary (machine_mode,
2428 const_tree);
2429 static rtx ix86_static_chain (const_tree, bool);
2430 static int ix86_function_regparm (const_tree, const_tree);
2431 static void ix86_compute_frame_layout (struct ix86_frame *);
2432 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2433 rtx, rtx, int);
2434 static void ix86_add_new_builtins (HOST_WIDE_INT);
2435 static tree ix86_canonical_va_list_type (tree);
2436 static void predict_jump (int);
2437 static unsigned int split_stack_prologue_scratch_regno (void);
2438 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2440 enum ix86_function_specific_strings
2442 IX86_FUNCTION_SPECIFIC_ARCH,
2443 IX86_FUNCTION_SPECIFIC_TUNE,
2444 IX86_FUNCTION_SPECIFIC_MAX
2447 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2448 const char *, enum fpmath_unit, bool);
2449 static void ix86_function_specific_save (struct cl_target_option *,
2450 struct gcc_options *opts);
2451 static void ix86_function_specific_restore (struct gcc_options *opts,
2452 struct cl_target_option *);
2453 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
2454 static void ix86_function_specific_print (FILE *, int,
2455 struct cl_target_option *);
2456 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2457 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2458 struct gcc_options *,
2459 struct gcc_options *,
2460 struct gcc_options *);
2461 static bool ix86_can_inline_p (tree, tree);
2462 static void ix86_set_current_function (tree);
2463 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2465 static enum calling_abi ix86_function_abi (const_tree);
2468 #ifndef SUBTARGET32_DEFAULT_CPU
2469 #define SUBTARGET32_DEFAULT_CPU "i386"
2470 #endif
2472 /* Whether -mtune= or -march= were specified */
2473 static int ix86_tune_defaulted;
2474 static int ix86_arch_specified;
2476 /* Vectorization library interface and handlers. */
2477 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2479 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2480 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2482 /* Processor target table, indexed by processor number */
2483 struct ptt
2485 const char *const name; /* processor name */
2486 const struct processor_costs *cost; /* Processor costs */
2487 const int align_loop; /* Default alignments. */
2488 const int align_loop_max_skip;
2489 const int align_jump;
2490 const int align_jump_max_skip;
2491 const int align_func;
2494 /* This table must be in sync with enum processor_type in i386.h. */
2495 static const struct ptt processor_target_table[PROCESSOR_max] =
2497 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2498 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2499 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2500 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2501 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2502 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2503 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2504 {"core2", &core_cost, 16, 10, 16, 10, 16},
2505 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2506 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2507 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2508 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2509 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2510 {"knl", &slm_cost, 16, 15, 16, 7, 16},
2511 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2512 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2513 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2514 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2515 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2516 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2517 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2518 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2519 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2520 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2521 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2522 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2525 static unsigned int
2526 rest_of_handle_insert_vzeroupper (void)
2528 int i;
2530 /* vzeroupper instructions are inserted immediately after reload to
2531 account for possible spills from 256bit registers. The pass
2532 reuses mode switching infrastructure by re-running mode insertion
2533 pass, so disable entities that have already been processed. */
2534 for (i = 0; i < MAX_386_ENTITIES; i++)
2535 ix86_optimize_mode_switching[i] = 0;
2537 ix86_optimize_mode_switching[AVX_U128] = 1;
2539 /* Call optimize_mode_switching. */
2540 g->get_passes ()->execute_pass_mode_switching ();
2541 return 0;
2544 namespace {
2546 const pass_data pass_data_insert_vzeroupper =
2548 RTL_PASS, /* type */
2549 "vzeroupper", /* name */
2550 OPTGROUP_NONE, /* optinfo_flags */
2551 TV_NONE, /* tv_id */
2552 0, /* properties_required */
2553 0, /* properties_provided */
2554 0, /* properties_destroyed */
2555 0, /* todo_flags_start */
2556 TODO_df_finish, /* todo_flags_finish */
2559 class pass_insert_vzeroupper : public rtl_opt_pass
2561 public:
2562 pass_insert_vzeroupper(gcc::context *ctxt)
2563 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2566 /* opt_pass methods: */
2567 virtual bool gate (function *)
2569 return TARGET_AVX && !TARGET_AVX512F
2570 && TARGET_VZEROUPPER && flag_expensive_optimizations
2571 && !optimize_size;
2574 virtual unsigned int execute (function *)
2576 return rest_of_handle_insert_vzeroupper ();
2579 }; // class pass_insert_vzeroupper
2581 } // anon namespace
2583 rtl_opt_pass *
2584 make_pass_insert_vzeroupper (gcc::context *ctxt)
2586 return new pass_insert_vzeroupper (ctxt);
2589 /* Return true if a red-zone is in use. */
2591 static inline bool
2592 ix86_using_red_zone (void)
2594 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2597 /* Return a string that documents the current -m options. The caller is
2598 responsible for freeing the string. */
2600 static char *
2601 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2602 const char *tune, enum fpmath_unit fpmath,
2603 bool add_nl_p)
2605 struct ix86_target_opts
2607 const char *option; /* option string */
2608 HOST_WIDE_INT mask; /* isa mask options */
2611 /* This table is ordered so that options like -msse4.2 that imply
2612 preceding options while match those first. */
2613 static struct ix86_target_opts isa_opts[] =
2615 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2616 { "-mfma", OPTION_MASK_ISA_FMA },
2617 { "-mxop", OPTION_MASK_ISA_XOP },
2618 { "-mlwp", OPTION_MASK_ISA_LWP },
2619 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2620 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2621 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2622 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2623 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2624 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2625 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2626 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2627 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2628 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2629 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2630 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2631 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2632 { "-msse3", OPTION_MASK_ISA_SSE3 },
2633 { "-msse2", OPTION_MASK_ISA_SSE2 },
2634 { "-msse", OPTION_MASK_ISA_SSE },
2635 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2636 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2637 { "-mmmx", OPTION_MASK_ISA_MMX },
2638 { "-mabm", OPTION_MASK_ISA_ABM },
2639 { "-mbmi", OPTION_MASK_ISA_BMI },
2640 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2641 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2642 { "-mhle", OPTION_MASK_ISA_HLE },
2643 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2644 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2645 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2646 { "-madx", OPTION_MASK_ISA_ADX },
2647 { "-mtbm", OPTION_MASK_ISA_TBM },
2648 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2649 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2650 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2651 { "-maes", OPTION_MASK_ISA_AES },
2652 { "-msha", OPTION_MASK_ISA_SHA },
2653 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2654 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2655 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2656 { "-mf16c", OPTION_MASK_ISA_F16C },
2657 { "-mrtm", OPTION_MASK_ISA_RTM },
2658 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2659 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2660 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2661 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2662 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2663 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2664 { "-mmpx", OPTION_MASK_ISA_MPX },
2665 { "-mclwb", OPTION_MASK_ISA_CLWB },
2666 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
2667 { "-mmwaitx", OPTION_MASK_ISA_MWAITX },
2670 /* Flag options. */
2671 static struct ix86_target_opts flag_opts[] =
2673 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2674 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2675 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2676 { "-m80387", MASK_80387 },
2677 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2678 { "-malign-double", MASK_ALIGN_DOUBLE },
2679 { "-mcld", MASK_CLD },
2680 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2681 { "-mieee-fp", MASK_IEEE_FP },
2682 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2683 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2684 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2685 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2686 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2687 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2688 { "-mno-red-zone", MASK_NO_RED_ZONE },
2689 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2690 { "-mrecip", MASK_RECIP },
2691 { "-mrtd", MASK_RTD },
2692 { "-msseregparm", MASK_SSEREGPARM },
2693 { "-mstack-arg-probe", MASK_STACK_PROBE },
2694 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2695 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2696 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2697 { "-mvzeroupper", MASK_VZEROUPPER },
2698 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2699 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2700 { "-mprefer-avx128", MASK_PREFER_AVX128},
2703 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2705 char isa_other[40];
2706 char target_other[40];
2707 unsigned num = 0;
2708 unsigned i, j;
2709 char *ret;
2710 char *ptr;
2711 size_t len;
2712 size_t line_len;
2713 size_t sep_len;
2714 const char *abi;
2716 memset (opts, '\0', sizeof (opts));
2718 /* Add -march= option. */
2719 if (arch)
2721 opts[num][0] = "-march=";
2722 opts[num++][1] = arch;
2725 /* Add -mtune= option. */
2726 if (tune)
2728 opts[num][0] = "-mtune=";
2729 opts[num++][1] = tune;
2732 /* Add -m32/-m64/-mx32. */
2733 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2735 if ((isa & OPTION_MASK_ABI_64) != 0)
2736 abi = "-m64";
2737 else
2738 abi = "-mx32";
2739 isa &= ~ (OPTION_MASK_ISA_64BIT
2740 | OPTION_MASK_ABI_64
2741 | OPTION_MASK_ABI_X32);
2743 else
2744 abi = "-m32";
2745 opts[num++][0] = abi;
2747 /* Pick out the options in isa options. */
2748 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2750 if ((isa & isa_opts[i].mask) != 0)
2752 opts[num++][0] = isa_opts[i].option;
2753 isa &= ~ isa_opts[i].mask;
2757 if (isa && add_nl_p)
2759 opts[num++][0] = isa_other;
2760 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2761 isa);
2764 /* Add flag options. */
2765 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2767 if ((flags & flag_opts[i].mask) != 0)
2769 opts[num++][0] = flag_opts[i].option;
2770 flags &= ~ flag_opts[i].mask;
2774 if (flags && add_nl_p)
2776 opts[num++][0] = target_other;
2777 sprintf (target_other, "(other flags: %#x)", flags);
2780 /* Add -fpmath= option. */
2781 if (fpmath)
2783 opts[num][0] = "-mfpmath=";
2784 switch ((int) fpmath)
2786 case FPMATH_387:
2787 opts[num++][1] = "387";
2788 break;
2790 case FPMATH_SSE:
2791 opts[num++][1] = "sse";
2792 break;
2794 case FPMATH_387 | FPMATH_SSE:
2795 opts[num++][1] = "sse+387";
2796 break;
2798 default:
2799 gcc_unreachable ();
2803 /* Any options? */
2804 if (num == 0)
2805 return NULL;
2807 gcc_assert (num < ARRAY_SIZE (opts));
2809 /* Size the string. */
2810 len = 0;
2811 sep_len = (add_nl_p) ? 3 : 1;
2812 for (i = 0; i < num; i++)
2814 len += sep_len;
2815 for (j = 0; j < 2; j++)
2816 if (opts[i][j])
2817 len += strlen (opts[i][j]);
2820 /* Build the string. */
2821 ret = ptr = (char *) xmalloc (len);
2822 line_len = 0;
2824 for (i = 0; i < num; i++)
2826 size_t len2[2];
2828 for (j = 0; j < 2; j++)
2829 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2831 if (i != 0)
2833 *ptr++ = ' ';
2834 line_len++;
2836 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2838 *ptr++ = '\\';
2839 *ptr++ = '\n';
2840 line_len = 0;
2844 for (j = 0; j < 2; j++)
2845 if (opts[i][j])
2847 memcpy (ptr, opts[i][j], len2[j]);
2848 ptr += len2[j];
2849 line_len += len2[j];
2853 *ptr = '\0';
2854 gcc_assert (ret + len >= ptr);
2856 return ret;
2859 /* Return true, if profiling code should be emitted before
2860 prologue. Otherwise it returns false.
2861 Note: For x86 with "hotfix" it is sorried. */
2862 static bool
2863 ix86_profile_before_prologue (void)
2865 return flag_fentry != 0;
2868 /* Function that is callable from the debugger to print the current
2869 options. */
2870 void ATTRIBUTE_UNUSED
2871 ix86_debug_options (void)
2873 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2874 ix86_arch_string, ix86_tune_string,
2875 ix86_fpmath, true);
2877 if (opts)
2879 fprintf (stderr, "%s\n\n", opts);
2880 free (opts);
2882 else
2883 fputs ("<no options>\n\n", stderr);
2885 return;
2888 static const char *stringop_alg_names[] = {
2889 #define DEF_ENUM
2890 #define DEF_ALG(alg, name) #name,
2891 #include "stringop.def"
2892 #undef DEF_ENUM
2893 #undef DEF_ALG
2896 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2897 The string is of the following form (or comma separated list of it):
2899 strategy_alg:max_size:[align|noalign]
2901 where the full size range for the strategy is either [0, max_size] or
2902 [min_size, max_size], in which min_size is the max_size + 1 of the
2903 preceding range. The last size range must have max_size == -1.
2905 Examples:
2908 -mmemcpy-strategy=libcall:-1:noalign
2910 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2914 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2916 This is to tell the compiler to use the following strategy for memset
2917 1) when the expected size is between [1, 16], use rep_8byte strategy;
2918 2) when the size is between [17, 2048], use vector_loop;
2919 3) when the size is > 2048, use libcall. */
2921 struct stringop_size_range
2923 int max;
2924 stringop_alg alg;
2925 bool noalign;
2928 static void
2929 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2931 const struct stringop_algs *default_algs;
2932 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2933 char *curr_range_str, *next_range_str;
2934 int i = 0, n = 0;
2936 if (is_memset)
2937 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2938 else
2939 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2941 curr_range_str = strategy_str;
2945 int maxs;
2946 char alg_name[128];
2947 char align[16];
2948 next_range_str = strchr (curr_range_str, ',');
2949 if (next_range_str)
2950 *next_range_str++ = '\0';
2952 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2953 alg_name, &maxs, align))
2955 error ("wrong arg %s to option %s", curr_range_str,
2956 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2957 return;
2960 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2962 error ("size ranges of option %s should be increasing",
2963 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2964 return;
2967 for (i = 0; i < last_alg; i++)
2968 if (!strcmp (alg_name, stringop_alg_names[i]))
2969 break;
2971 if (i == last_alg)
2973 error ("wrong stringop strategy name %s specified for option %s",
2974 alg_name,
2975 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2976 return;
2979 if ((stringop_alg) i == rep_prefix_8_byte
2980 && !TARGET_64BIT)
2982 /* rep; movq isn't available in 32-bit code. */
2983 error ("stringop strategy name %s specified for option %s "
2984 "not supported for 32-bit code",
2985 alg_name,
2986 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2987 return;
2990 input_ranges[n].max = maxs;
2991 input_ranges[n].alg = (stringop_alg) i;
2992 if (!strcmp (align, "align"))
2993 input_ranges[n].noalign = false;
2994 else if (!strcmp (align, "noalign"))
2995 input_ranges[n].noalign = true;
2996 else
2998 error ("unknown alignment %s specified for option %s",
2999 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3000 return;
3002 n++;
3003 curr_range_str = next_range_str;
3005 while (curr_range_str);
3007 if (input_ranges[n - 1].max != -1)
3009 error ("the max value for the last size range should be -1"
3010 " for option %s",
3011 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3012 return;
3015 if (n > MAX_STRINGOP_ALGS)
3017 error ("too many size ranges specified in option %s",
3018 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3019 return;
3022 /* Now override the default algs array. */
3023 for (i = 0; i < n; i++)
3025 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3026 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3027 = input_ranges[i].alg;
3028 *const_cast<int *>(&default_algs->size[i].noalign)
3029 = input_ranges[i].noalign;
3034 /* parse -mtune-ctrl= option. When DUMP is true,
3035 print the features that are explicitly set. */
3037 static void
3038 parse_mtune_ctrl_str (bool dump)
3040 if (!ix86_tune_ctrl_string)
3041 return;
3043 char *next_feature_string = NULL;
3044 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3045 char *orig = curr_feature_string;
3046 int i;
3049 bool clear = false;
3051 next_feature_string = strchr (curr_feature_string, ',');
3052 if (next_feature_string)
3053 *next_feature_string++ = '\0';
3054 if (*curr_feature_string == '^')
3056 curr_feature_string++;
3057 clear = true;
3059 for (i = 0; i < X86_TUNE_LAST; i++)
3061 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3063 ix86_tune_features[i] = !clear;
3064 if (dump)
3065 fprintf (stderr, "Explicitly %s feature %s\n",
3066 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3067 break;
3070 if (i == X86_TUNE_LAST)
3071 error ("Unknown parameter to option -mtune-ctrl: %s",
3072 clear ? curr_feature_string - 1 : curr_feature_string);
3073 curr_feature_string = next_feature_string;
3075 while (curr_feature_string);
3076 free (orig);
3079 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3080 processor type. */
3082 static void
3083 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3085 unsigned int ix86_tune_mask = 1u << ix86_tune;
3086 int i;
3088 for (i = 0; i < X86_TUNE_LAST; ++i)
3090 if (ix86_tune_no_default)
3091 ix86_tune_features[i] = 0;
3092 else
3093 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3096 if (dump)
3098 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3099 for (i = 0; i < X86_TUNE_LAST; i++)
3100 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3101 ix86_tune_features[i] ? "on" : "off");
3104 parse_mtune_ctrl_str (dump);
3108 /* Default align_* from the processor table. */
3110 static void
3111 ix86_default_align (struct gcc_options *opts)
3113 if (opts->x_align_loops == 0)
3115 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3116 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3118 if (opts->x_align_jumps == 0)
3120 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3121 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3123 if (opts->x_align_functions == 0)
3125 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3129 /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
3131 static void
3132 ix86_override_options_after_change (void)
3134 ix86_default_align (&global_options);
3137 /* Override various settings based on options. If MAIN_ARGS_P, the
3138 options are from the command line, otherwise they are from
3139 attributes. */
3141 static void
3142 ix86_option_override_internal (bool main_args_p,
3143 struct gcc_options *opts,
3144 struct gcc_options *opts_set)
3146 int i;
3147 unsigned int ix86_arch_mask;
3148 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3149 const char *prefix;
3150 const char *suffix;
3151 const char *sw;
3153 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3154 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3155 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3156 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3157 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3158 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3159 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3160 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3161 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3162 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3163 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3164 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3165 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3166 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3167 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3168 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3169 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3170 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3171 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3172 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3173 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3174 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3175 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3176 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3177 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3178 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3179 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3180 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3181 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3182 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3183 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3184 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3185 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3186 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3187 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3188 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3189 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3190 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3191 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3192 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3193 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3194 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3195 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3196 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3197 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3198 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3199 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3200 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3201 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3202 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3203 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3204 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3205 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3206 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3207 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3208 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3209 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
3210 #define PTA_MWAITX (HOST_WIDE_INT_1 << 57)
3212 #define PTA_CORE2 \
3213 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3214 | PTA_CX16 | PTA_FXSR)
3215 #define PTA_NEHALEM \
3216 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3217 #define PTA_WESTMERE \
3218 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3219 #define PTA_SANDYBRIDGE \
3220 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3221 #define PTA_IVYBRIDGE \
3222 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3223 #define PTA_HASWELL \
3224 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3225 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3226 #define PTA_BROADWELL \
3227 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3228 #define PTA_KNL \
3229 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
3230 #define PTA_BONNELL \
3231 (PTA_CORE2 | PTA_MOVBE)
3232 #define PTA_SILVERMONT \
3233 (PTA_WESTMERE | PTA_MOVBE)
3235 /* if this reaches 64, need to widen struct pta flags below */
3237 static struct pta
3239 const char *const name; /* processor name or nickname. */
3240 const enum processor_type processor;
3241 const enum attr_cpu schedule;
3242 const unsigned HOST_WIDE_INT flags;
3244 const processor_alias_table[] =
3246 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3247 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3248 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3249 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3250 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3251 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3252 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3253 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3254 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3255 PTA_MMX | PTA_SSE | PTA_FXSR},
3256 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3257 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3258 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3259 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3260 PTA_MMX | PTA_SSE | PTA_FXSR},
3261 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3262 PTA_MMX | PTA_SSE | PTA_FXSR},
3263 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3264 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3265 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3266 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3267 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3268 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3269 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3270 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3271 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3272 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3273 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3274 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3275 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3276 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3277 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3278 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3279 PTA_SANDYBRIDGE},
3280 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3281 PTA_SANDYBRIDGE},
3282 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3283 PTA_IVYBRIDGE},
3284 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3285 PTA_IVYBRIDGE},
3286 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3287 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3288 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3289 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3290 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3291 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3292 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3293 {"knl", PROCESSOR_KNL, CPU_KNL, PTA_KNL},
3294 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3295 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3296 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3297 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3298 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3299 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3300 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3301 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3302 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3303 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3304 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3305 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3306 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3307 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3308 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3309 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3310 {"x86-64", PROCESSOR_K8, CPU_K8,
3311 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3312 {"k8", PROCESSOR_K8, CPU_K8,
3313 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3314 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3315 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3316 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3317 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3318 {"opteron", PROCESSOR_K8, CPU_K8,
3319 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3320 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3321 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3322 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3323 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3324 {"athlon64", PROCESSOR_K8, CPU_K8,
3325 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3326 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3327 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3328 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3329 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3330 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3331 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3332 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3333 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3334 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3335 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3336 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3337 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3338 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3339 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3340 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3341 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3342 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3343 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3344 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3345 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3346 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3347 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3348 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3349 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3350 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3351 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3352 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3353 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3354 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3355 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3356 | PTA_XSAVEOPT | PTA_FSGSBASE},
3357 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3358 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3359 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3360 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3361 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3362 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3363 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3364 | PTA_MOVBE | PTA_MWAITX},
3365 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3366 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3367 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3368 | PTA_FXSR | PTA_XSAVE},
3369 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3370 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3371 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3372 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3373 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3374 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3376 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3377 PTA_64BIT
3378 | PTA_HLE /* flags are only used for -march switch. */ },
3381 /* -mrecip options. */
3382 static struct
3384 const char *string; /* option name */
3385 unsigned int mask; /* mask bits to set */
3387 const recip_options[] =
3389 { "all", RECIP_MASK_ALL },
3390 { "none", RECIP_MASK_NONE },
3391 { "div", RECIP_MASK_DIV },
3392 { "sqrt", RECIP_MASK_SQRT },
3393 { "vec-div", RECIP_MASK_VEC_DIV },
3394 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3397 int const pta_size = ARRAY_SIZE (processor_alias_table);
3399 /* Set up prefix/suffix so the error messages refer to either the command
3400 line argument, or the attribute(target). */
3401 if (main_args_p)
3403 prefix = "-m";
3404 suffix = "";
3405 sw = "switch";
3407 else
3409 prefix = "option(\"";
3410 suffix = "\")";
3411 sw = "attribute";
3414 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3415 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3416 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3417 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3418 #ifdef TARGET_BI_ARCH
3419 else
3421 #if TARGET_BI_ARCH == 1
3422 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3423 is on and OPTION_MASK_ABI_X32 is off. We turn off
3424 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3425 -mx32. */
3426 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3427 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3428 #else
3429 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3430 on and OPTION_MASK_ABI_64 is off. We turn off
3431 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3432 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3433 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3434 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3435 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3436 #endif
3438 #endif
3440 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3442 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3443 OPTION_MASK_ABI_64 for TARGET_X32. */
3444 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3445 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3447 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3448 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3449 | OPTION_MASK_ABI_X32
3450 | OPTION_MASK_ABI_64);
3451 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3453 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3454 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3455 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3456 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3459 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3460 SUBTARGET_OVERRIDE_OPTIONS;
3461 #endif
3463 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3464 SUBSUBTARGET_OVERRIDE_OPTIONS;
3465 #endif
3467 /* -fPIC is the default for x86_64. */
3468 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3469 opts->x_flag_pic = 2;
3471 /* Need to check -mtune=generic first. */
3472 if (opts->x_ix86_tune_string)
3474 /* As special support for cross compilers we read -mtune=native
3475 as -mtune=generic. With native compilers we won't see the
3476 -mtune=native, as it was changed by the driver. */
3477 if (!strcmp (opts->x_ix86_tune_string, "native"))
3479 opts->x_ix86_tune_string = "generic";
3481 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3482 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3483 "%stune=k8%s or %stune=generic%s instead as appropriate",
3484 prefix, suffix, prefix, suffix, prefix, suffix);
3486 else
3488 if (opts->x_ix86_arch_string)
3489 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3490 if (!opts->x_ix86_tune_string)
3492 opts->x_ix86_tune_string
3493 = processor_target_table[TARGET_CPU_DEFAULT].name;
3494 ix86_tune_defaulted = 1;
3497 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3498 or defaulted. We need to use a sensible tune option. */
3499 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3501 opts->x_ix86_tune_string = "generic";
3505 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3506 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3508 /* rep; movq isn't available in 32-bit code. */
3509 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3510 opts->x_ix86_stringop_alg = no_stringop;
3513 if (!opts->x_ix86_arch_string)
3514 opts->x_ix86_arch_string
3515 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3516 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3517 else
3518 ix86_arch_specified = 1;
3520 if (opts_set->x_ix86_pmode)
3522 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3523 && opts->x_ix86_pmode == PMODE_SI)
3524 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3525 && opts->x_ix86_pmode == PMODE_DI))
3526 error ("address mode %qs not supported in the %s bit mode",
3527 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3528 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3530 else
3531 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3532 ? PMODE_DI : PMODE_SI;
3534 if (!opts_set->x_ix86_abi)
3535 opts->x_ix86_abi = DEFAULT_ABI;
3537 /* For targets using ms ABI enable ms-extensions, if not
3538 explicit turned off. For non-ms ABI we turn off this
3539 option. */
3540 if (!opts_set->x_flag_ms_extensions)
3541 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3543 if (opts_set->x_ix86_cmodel)
3545 switch (opts->x_ix86_cmodel)
3547 case CM_SMALL:
3548 case CM_SMALL_PIC:
3549 if (opts->x_flag_pic)
3550 opts->x_ix86_cmodel = CM_SMALL_PIC;
3551 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3552 error ("code model %qs not supported in the %s bit mode",
3553 "small", "32");
3554 break;
3556 case CM_MEDIUM:
3557 case CM_MEDIUM_PIC:
3558 if (opts->x_flag_pic)
3559 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3560 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3561 error ("code model %qs not supported in the %s bit mode",
3562 "medium", "32");
3563 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3564 error ("code model %qs not supported in x32 mode",
3565 "medium");
3566 break;
3568 case CM_LARGE:
3569 case CM_LARGE_PIC:
3570 if (opts->x_flag_pic)
3571 opts->x_ix86_cmodel = CM_LARGE_PIC;
3572 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3573 error ("code model %qs not supported in the %s bit mode",
3574 "large", "32");
3575 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3576 error ("code model %qs not supported in x32 mode",
3577 "large");
3578 break;
3580 case CM_32:
3581 if (opts->x_flag_pic)
3582 error ("code model %s does not support PIC mode", "32");
3583 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3584 error ("code model %qs not supported in the %s bit mode",
3585 "32", "64");
3586 break;
3588 case CM_KERNEL:
3589 if (opts->x_flag_pic)
3591 error ("code model %s does not support PIC mode", "kernel");
3592 opts->x_ix86_cmodel = CM_32;
3594 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3595 error ("code model %qs not supported in the %s bit mode",
3596 "kernel", "32");
3597 break;
3599 default:
3600 gcc_unreachable ();
3603 else
3605 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3606 use of rip-relative addressing. This eliminates fixups that
3607 would otherwise be needed if this object is to be placed in a
3608 DLL, and is essentially just as efficient as direct addressing. */
3609 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3610 && (TARGET_RDOS || TARGET_PECOFF))
3611 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3612 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3613 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3614 else
3615 opts->x_ix86_cmodel = CM_32;
3617 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3619 error ("-masm=intel not supported in this configuration");
3620 opts->x_ix86_asm_dialect = ASM_ATT;
3622 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3623 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3624 sorry ("%i-bit mode not compiled in",
3625 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3627 for (i = 0; i < pta_size; i++)
3628 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3630 ix86_schedule = processor_alias_table[i].schedule;
3631 ix86_arch = processor_alias_table[i].processor;
3632 /* Default cpu tuning to the architecture. */
3633 ix86_tune = ix86_arch;
3635 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3636 && !(processor_alias_table[i].flags & PTA_64BIT))
3637 error ("CPU you selected does not support x86-64 "
3638 "instruction set");
3640 if (processor_alias_table[i].flags & PTA_MMX
3641 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3642 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3643 if (processor_alias_table[i].flags & PTA_3DNOW
3644 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3645 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3646 if (processor_alias_table[i].flags & PTA_3DNOW_A
3647 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3648 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3649 if (processor_alias_table[i].flags & PTA_SSE
3650 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3651 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3652 if (processor_alias_table[i].flags & PTA_SSE2
3653 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3654 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3655 if (processor_alias_table[i].flags & PTA_SSE3
3656 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3657 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3658 if (processor_alias_table[i].flags & PTA_SSSE3
3659 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3660 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3661 if (processor_alias_table[i].flags & PTA_SSE4_1
3662 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3663 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3664 if (processor_alias_table[i].flags & PTA_SSE4_2
3665 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3666 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3667 if (processor_alias_table[i].flags & PTA_AVX
3668 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3669 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3670 if (processor_alias_table[i].flags & PTA_AVX2
3671 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3672 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3673 if (processor_alias_table[i].flags & PTA_FMA
3674 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3675 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3676 if (processor_alias_table[i].flags & PTA_SSE4A
3677 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3678 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3679 if (processor_alias_table[i].flags & PTA_FMA4
3680 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3681 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3682 if (processor_alias_table[i].flags & PTA_XOP
3683 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3684 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3685 if (processor_alias_table[i].flags & PTA_LWP
3686 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3687 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3688 if (processor_alias_table[i].flags & PTA_ABM
3689 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3690 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3691 if (processor_alias_table[i].flags & PTA_BMI
3692 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3693 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3694 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3695 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3696 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3697 if (processor_alias_table[i].flags & PTA_TBM
3698 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3699 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3700 if (processor_alias_table[i].flags & PTA_BMI2
3701 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3702 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3703 if (processor_alias_table[i].flags & PTA_CX16
3704 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3705 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3706 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3707 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3708 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3709 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3710 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3711 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3712 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3713 if (processor_alias_table[i].flags & PTA_MOVBE
3714 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3715 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3716 if (processor_alias_table[i].flags & PTA_AES
3717 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3718 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3719 if (processor_alias_table[i].flags & PTA_SHA
3720 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3721 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3722 if (processor_alias_table[i].flags & PTA_PCLMUL
3723 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3724 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3725 if (processor_alias_table[i].flags & PTA_FSGSBASE
3726 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3727 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3728 if (processor_alias_table[i].flags & PTA_RDRND
3729 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3730 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3731 if (processor_alias_table[i].flags & PTA_F16C
3732 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3733 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3734 if (processor_alias_table[i].flags & PTA_RTM
3735 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3736 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3737 if (processor_alias_table[i].flags & PTA_HLE
3738 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3739 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3740 if (processor_alias_table[i].flags & PTA_PRFCHW
3741 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3742 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3743 if (processor_alias_table[i].flags & PTA_RDSEED
3744 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3745 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3746 if (processor_alias_table[i].flags & PTA_ADX
3747 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3748 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3749 if (processor_alias_table[i].flags & PTA_FXSR
3750 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3751 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3752 if (processor_alias_table[i].flags & PTA_XSAVE
3753 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3754 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3755 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3756 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3757 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3758 if (processor_alias_table[i].flags & PTA_AVX512F
3759 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3760 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3761 if (processor_alias_table[i].flags & PTA_AVX512ER
3762 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3763 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3764 if (processor_alias_table[i].flags & PTA_AVX512PF
3765 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3766 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3767 if (processor_alias_table[i].flags & PTA_AVX512CD
3768 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3769 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3770 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3771 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3772 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3773 if (processor_alias_table[i].flags & PTA_PCOMMIT
3774 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
3775 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
3776 if (processor_alias_table[i].flags & PTA_CLWB
3777 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
3778 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
3779 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3780 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3781 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3782 if (processor_alias_table[i].flags & PTA_XSAVEC
3783 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3784 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3785 if (processor_alias_table[i].flags & PTA_XSAVES
3786 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3787 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3788 if (processor_alias_table[i].flags & PTA_AVX512DQ
3789 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3790 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3791 if (processor_alias_table[i].flags & PTA_AVX512BW
3792 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3793 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3794 if (processor_alias_table[i].flags & PTA_AVX512VL
3795 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3796 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3797 if (processor_alias_table[i].flags & PTA_MPX
3798 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3799 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3800 if (processor_alias_table[i].flags & PTA_AVX512VBMI
3801 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
3802 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
3803 if (processor_alias_table[i].flags & PTA_AVX512IFMA
3804 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
3805 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
3806 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3807 x86_prefetch_sse = true;
3808 if (processor_alias_table[i].flags & PTA_MWAITX
3809 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MWAITX))
3810 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MWAITX;
3812 break;
3815 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3816 error ("Intel MPX does not support x32");
3818 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3819 error ("Intel MPX does not support x32");
3821 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3822 error ("generic CPU can be used only for %stune=%s %s",
3823 prefix, suffix, sw);
3824 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3825 error ("intel CPU can be used only for %stune=%s %s",
3826 prefix, suffix, sw);
3827 else if (i == pta_size)
3828 error ("bad value (%s) for %sarch=%s %s",
3829 opts->x_ix86_arch_string, prefix, suffix, sw);
3831 ix86_arch_mask = 1u << ix86_arch;
3832 for (i = 0; i < X86_ARCH_LAST; ++i)
3833 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3835 for (i = 0; i < pta_size; i++)
3836 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3838 ix86_schedule = processor_alias_table[i].schedule;
3839 ix86_tune = processor_alias_table[i].processor;
3840 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3842 if (!(processor_alias_table[i].flags & PTA_64BIT))
3844 if (ix86_tune_defaulted)
3846 opts->x_ix86_tune_string = "x86-64";
3847 for (i = 0; i < pta_size; i++)
3848 if (! strcmp (opts->x_ix86_tune_string,
3849 processor_alias_table[i].name))
3850 break;
3851 ix86_schedule = processor_alias_table[i].schedule;
3852 ix86_tune = processor_alias_table[i].processor;
3854 else
3855 error ("CPU you selected does not support x86-64 "
3856 "instruction set");
3859 /* Intel CPUs have always interpreted SSE prefetch instructions as
3860 NOPs; so, we can enable SSE prefetch instructions even when
3861 -mtune (rather than -march) points us to a processor that has them.
3862 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3863 higher processors. */
3864 if (TARGET_CMOV
3865 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3866 x86_prefetch_sse = true;
3867 break;
3870 if (ix86_tune_specified && i == pta_size)
3871 error ("bad value (%s) for %stune=%s %s",
3872 opts->x_ix86_tune_string, prefix, suffix, sw);
3874 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3876 #ifndef USE_IX86_FRAME_POINTER
3877 #define USE_IX86_FRAME_POINTER 0
3878 #endif
3880 #ifndef USE_X86_64_FRAME_POINTER
3881 #define USE_X86_64_FRAME_POINTER 0
3882 #endif
3884 /* Set the default values for switches whose default depends on TARGET_64BIT
3885 in case they weren't overwritten by command line options. */
3886 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3888 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3889 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3890 if (opts->x_flag_asynchronous_unwind_tables
3891 && !opts_set->x_flag_unwind_tables
3892 && TARGET_64BIT_MS_ABI)
3893 opts->x_flag_unwind_tables = 1;
3894 if (opts->x_flag_asynchronous_unwind_tables == 2)
3895 opts->x_flag_unwind_tables
3896 = opts->x_flag_asynchronous_unwind_tables = 1;
3897 if (opts->x_flag_pcc_struct_return == 2)
3898 opts->x_flag_pcc_struct_return = 0;
3900 else
3902 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3903 opts->x_flag_omit_frame_pointer
3904 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3905 if (opts->x_flag_asynchronous_unwind_tables == 2)
3906 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3907 if (opts->x_flag_pcc_struct_return == 2)
3908 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3911 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3912 /* TODO: ix86_cost should be chosen at instruction or function granuality
3913 so for cold code we use size_cost even in !optimize_size compilation. */
3914 if (opts->x_optimize_size)
3915 ix86_cost = &ix86_size_cost;
3916 else
3917 ix86_cost = ix86_tune_cost;
3919 /* Arrange to set up i386_stack_locals for all functions. */
3920 init_machine_status = ix86_init_machine_status;
3922 /* Validate -mregparm= value. */
3923 if (opts_set->x_ix86_regparm)
3925 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3926 warning (0, "-mregparm is ignored in 64-bit mode");
3927 if (opts->x_ix86_regparm > REGPARM_MAX)
3929 error ("-mregparm=%d is not between 0 and %d",
3930 opts->x_ix86_regparm, REGPARM_MAX);
3931 opts->x_ix86_regparm = 0;
3934 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3935 opts->x_ix86_regparm = REGPARM_MAX;
3937 /* Default align_* from the processor table. */
3938 ix86_default_align (opts);
3940 /* Provide default for -mbranch-cost= value. */
3941 if (!opts_set->x_ix86_branch_cost)
3942 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
3944 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3946 opts->x_target_flags
3947 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3949 /* Enable by default the SSE and MMX builtins. Do allow the user to
3950 explicitly disable any of these. In particular, disabling SSE and
3951 MMX for kernel code is extremely useful. */
3952 if (!ix86_arch_specified)
3953 opts->x_ix86_isa_flags
3954 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3955 | TARGET_SUBTARGET64_ISA_DEFAULT)
3956 & ~opts->x_ix86_isa_flags_explicit);
3958 if (TARGET_RTD_P (opts->x_target_flags))
3959 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3961 else
3963 opts->x_target_flags
3964 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3966 if (!ix86_arch_specified)
3967 opts->x_ix86_isa_flags
3968 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3970 /* i386 ABI does not specify red zone. It still makes sense to use it
3971 when programmer takes care to stack from being destroyed. */
3972 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3973 opts->x_target_flags |= MASK_NO_RED_ZONE;
3976 /* Keep nonleaf frame pointers. */
3977 if (opts->x_flag_omit_frame_pointer)
3978 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3979 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3980 opts->x_flag_omit_frame_pointer = 1;
3982 /* If we're doing fast math, we don't care about comparison order
3983 wrt NaNs. This lets us use a shorter comparison sequence. */
3984 if (opts->x_flag_finite_math_only)
3985 opts->x_target_flags &= ~MASK_IEEE_FP;
3987 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3988 since the insns won't need emulation. */
3989 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3990 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3992 /* Likewise, if the target doesn't have a 387, or we've specified
3993 software floating point, don't use 387 inline intrinsics. */
3994 if (!TARGET_80387_P (opts->x_target_flags))
3995 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3997 /* Turn on MMX builtins for -msse. */
3998 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3999 opts->x_ix86_isa_flags
4000 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
4002 /* Enable SSE prefetch. */
4003 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
4004 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
4005 x86_prefetch_sse = true;
4007 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
4008 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
4009 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
4010 opts->x_ix86_isa_flags
4011 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
4013 /* Enable popcnt instruction for -msse4.2 or -mabm. */
4014 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
4015 || TARGET_ABM_P (opts->x_ix86_isa_flags))
4016 opts->x_ix86_isa_flags
4017 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
4019 /* Enable lzcnt instruction for -mabm. */
4020 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
4021 opts->x_ix86_isa_flags
4022 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
4024 /* Validate -mpreferred-stack-boundary= value or default it to
4025 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4026 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4027 if (opts_set->x_ix86_preferred_stack_boundary_arg)
4029 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4030 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
4031 int max = (TARGET_SEH ? 4 : 12);
4033 if (opts->x_ix86_preferred_stack_boundary_arg < min
4034 || opts->x_ix86_preferred_stack_boundary_arg > max)
4036 if (min == max)
4037 error ("-mpreferred-stack-boundary is not supported "
4038 "for this target");
4039 else
4040 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4041 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4043 else
4044 ix86_preferred_stack_boundary
4045 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4048 /* Set the default value for -mstackrealign. */
4049 if (opts->x_ix86_force_align_arg_pointer == -1)
4050 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4052 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4054 /* Validate -mincoming-stack-boundary= value or default it to
4055 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4056 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4057 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4059 if (opts->x_ix86_incoming_stack_boundary_arg
4060 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
4061 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4062 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4063 opts->x_ix86_incoming_stack_boundary_arg,
4064 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
4065 else
4067 ix86_user_incoming_stack_boundary
4068 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4069 ix86_incoming_stack_boundary
4070 = ix86_user_incoming_stack_boundary;
4074 #ifndef NO_PROFILE_COUNTERS
4075 if (flag_nop_mcount)
4076 error ("-mnop-mcount is not compatible with this target");
4077 #endif
4078 if (flag_nop_mcount && flag_pic)
4079 error ("-mnop-mcount is not implemented for -fPIC");
4081 /* Accept -msseregparm only if at least SSE support is enabled. */
4082 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4083 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4084 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4086 if (opts_set->x_ix86_fpmath)
4088 if (opts->x_ix86_fpmath & FPMATH_SSE)
4090 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4092 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4093 opts->x_ix86_fpmath = FPMATH_387;
4095 else if ((opts->x_ix86_fpmath & FPMATH_387)
4096 && !TARGET_80387_P (opts->x_target_flags))
4098 warning (0, "387 instruction set disabled, using SSE arithmetics");
4099 opts->x_ix86_fpmath = FPMATH_SSE;
4103 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4104 fpmath=387. The second is however default at many targets since the
4105 extra 80bit precision of temporaries is considered to be part of ABI.
4106 Overwrite the default at least for -ffast-math.
4107 TODO: -mfpmath=both seems to produce same performing code with bit
4108 smaller binaries. It is however not clear if register allocation is
4109 ready for this setting.
4110 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4111 codegen. We may switch to 387 with -ffast-math for size optimized
4112 functions. */
4113 else if (fast_math_flags_set_p (&global_options)
4114 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4115 opts->x_ix86_fpmath = FPMATH_SSE;
4116 else
4117 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4119 /* If the i387 is disabled, then do not return values in it. */
4120 if (!TARGET_80387_P (opts->x_target_flags))
4121 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4123 /* Use external vectorized library in vectorizing intrinsics. */
4124 if (opts_set->x_ix86_veclibabi_type)
4125 switch (opts->x_ix86_veclibabi_type)
4127 case ix86_veclibabi_type_svml:
4128 ix86_veclib_handler = ix86_veclibabi_svml;
4129 break;
4131 case ix86_veclibabi_type_acml:
4132 ix86_veclib_handler = ix86_veclibabi_acml;
4133 break;
4135 default:
4136 gcc_unreachable ();
4139 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4140 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4141 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4143 /* If stack probes are required, the space used for large function
4144 arguments on the stack must also be probed, so enable
4145 -maccumulate-outgoing-args so this happens in the prologue. */
4146 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4147 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4149 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4150 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4151 "for correctness", prefix, suffix);
4152 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4155 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4157 char *p;
4158 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4159 p = strchr (internal_label_prefix, 'X');
4160 internal_label_prefix_len = p - internal_label_prefix;
4161 *p = '\0';
4164 /* When scheduling description is not available, disable scheduler pass
4165 so it won't slow down the compilation and make x87 code slower. */
4166 if (!TARGET_SCHEDULE)
4167 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4169 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4170 ix86_tune_cost->simultaneous_prefetches,
4171 opts->x_param_values,
4172 opts_set->x_param_values);
4173 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4174 ix86_tune_cost->prefetch_block,
4175 opts->x_param_values,
4176 opts_set->x_param_values);
4177 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4178 ix86_tune_cost->l1_cache_size,
4179 opts->x_param_values,
4180 opts_set->x_param_values);
4181 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4182 ix86_tune_cost->l2_cache_size,
4183 opts->x_param_values,
4184 opts_set->x_param_values);
4186 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4187 if (opts->x_flag_prefetch_loop_arrays < 0
4188 && HAVE_prefetch
4189 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4190 && !opts->x_optimize_size
4191 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4192 opts->x_flag_prefetch_loop_arrays = 1;
4194 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4195 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4196 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4197 targetm.expand_builtin_va_start = NULL;
4199 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4201 ix86_gen_leave = gen_leave_rex64;
4202 if (Pmode == DImode)
4204 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4205 ix86_gen_tls_local_dynamic_base_64
4206 = gen_tls_local_dynamic_base_64_di;
4208 else
4210 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4211 ix86_gen_tls_local_dynamic_base_64
4212 = gen_tls_local_dynamic_base_64_si;
4215 else
4216 ix86_gen_leave = gen_leave;
4218 if (Pmode == DImode)
4220 ix86_gen_add3 = gen_adddi3;
4221 ix86_gen_sub3 = gen_subdi3;
4222 ix86_gen_sub3_carry = gen_subdi3_carry;
4223 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4224 ix86_gen_andsp = gen_anddi3;
4225 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4226 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4227 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4228 ix86_gen_monitor = gen_sse3_monitor_di;
4229 ix86_gen_monitorx = gen_monitorx_di;
4231 else
4233 ix86_gen_add3 = gen_addsi3;
4234 ix86_gen_sub3 = gen_subsi3;
4235 ix86_gen_sub3_carry = gen_subsi3_carry;
4236 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4237 ix86_gen_andsp = gen_andsi3;
4238 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4239 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4240 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4241 ix86_gen_monitor = gen_sse3_monitor_si;
4242 ix86_gen_monitorx = gen_monitorx_si;
4245 #ifdef USE_IX86_CLD
4246 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4247 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4248 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4249 #endif
4251 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4253 if (opts->x_flag_fentry > 0)
4254 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4255 "with -fpic");
4256 opts->x_flag_fentry = 0;
4258 else if (TARGET_SEH)
4260 if (opts->x_flag_fentry == 0)
4261 sorry ("-mno-fentry isn%'t compatible with SEH");
4262 opts->x_flag_fentry = 1;
4264 else if (opts->x_flag_fentry < 0)
4266 #if defined(PROFILE_BEFORE_PROLOGUE)
4267 opts->x_flag_fentry = 1;
4268 #else
4269 opts->x_flag_fentry = 0;
4270 #endif
4273 if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
4274 opts->x_target_flags |= MASK_VZEROUPPER;
4275 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4276 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4277 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4278 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4279 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4280 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4281 /* Enable 128-bit AVX instruction generation
4282 for the auto-vectorizer. */
4283 if (TARGET_AVX128_OPTIMAL
4284 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4285 opts->x_target_flags |= MASK_PREFER_AVX128;
4287 if (opts->x_ix86_recip_name)
4289 char *p = ASTRDUP (opts->x_ix86_recip_name);
4290 char *q;
4291 unsigned int mask, i;
4292 bool invert;
4294 while ((q = strtok (p, ",")) != NULL)
4296 p = NULL;
4297 if (*q == '!')
4299 invert = true;
4300 q++;
4302 else
4303 invert = false;
4305 if (!strcmp (q, "default"))
4306 mask = RECIP_MASK_ALL;
4307 else
4309 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4310 if (!strcmp (q, recip_options[i].string))
4312 mask = recip_options[i].mask;
4313 break;
4316 if (i == ARRAY_SIZE (recip_options))
4318 error ("unknown option for -mrecip=%s", q);
4319 invert = false;
4320 mask = RECIP_MASK_NONE;
4324 opts->x_recip_mask_explicit |= mask;
4325 if (invert)
4326 opts->x_recip_mask &= ~mask;
4327 else
4328 opts->x_recip_mask |= mask;
4332 if (TARGET_RECIP_P (opts->x_target_flags))
4333 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4334 else if (opts_set->x_target_flags & MASK_RECIP)
4335 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4337 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4338 for 64-bit Bionic. */
4339 if (TARGET_HAS_BIONIC
4340 && !(opts_set->x_target_flags
4341 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4342 opts->x_target_flags |= (TARGET_64BIT
4343 ? MASK_LONG_DOUBLE_128
4344 : MASK_LONG_DOUBLE_64);
4346 /* Only one of them can be active. */
4347 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4348 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4350 /* Save the initial options in case the user does function specific
4351 options. */
4352 if (main_args_p)
4353 target_option_default_node = target_option_current_node
4354 = build_target_option_node (opts);
4356 /* Handle stack protector */
4357 if (!opts_set->x_ix86_stack_protector_guard)
4358 opts->x_ix86_stack_protector_guard
4359 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4361 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4362 if (opts->x_ix86_tune_memcpy_strategy)
4364 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4365 ix86_parse_stringop_strategy_string (str, false);
4366 free (str);
4369 if (opts->x_ix86_tune_memset_strategy)
4371 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4372 ix86_parse_stringop_strategy_string (str, true);
4373 free (str);
4377 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4379 static void
4380 ix86_option_override (void)
4382 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4383 struct register_pass_info insert_vzeroupper_info
4384 = { pass_insert_vzeroupper, "reload",
4385 1, PASS_POS_INSERT_AFTER
4388 ix86_option_override_internal (true, &global_options, &global_options_set);
4391 /* This needs to be done at start up. It's convenient to do it here. */
4392 register_pass (&insert_vzeroupper_info);
4395 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4396 static char *
4397 ix86_offload_options (void)
4399 if (TARGET_LP64)
4400 return xstrdup ("-foffload-abi=lp64");
4401 return xstrdup ("-foffload-abi=ilp32");
4404 /* Update register usage after having seen the compiler flags. */
4406 static void
4407 ix86_conditional_register_usage (void)
4409 int i, c_mask;
4411 /* For 32-bit targets, squash the REX registers. */
4412 if (! TARGET_64BIT)
4414 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4415 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4416 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4417 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4418 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4419 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4422 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4423 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4424 : TARGET_64BIT ? (1 << 2)
4425 : (1 << 1));
4427 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4429 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4431 /* Set/reset conditionally defined registers from
4432 CALL_USED_REGISTERS initializer. */
4433 if (call_used_regs[i] > 1)
4434 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4436 /* Calculate registers of CLOBBERED_REGS register set
4437 as call used registers from GENERAL_REGS register set. */
4438 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4439 && call_used_regs[i])
4440 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4443 /* If MMX is disabled, squash the registers. */
4444 if (! TARGET_MMX)
4445 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4446 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4447 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4449 /* If SSE is disabled, squash the registers. */
4450 if (! TARGET_SSE)
4451 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4452 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4453 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4455 /* If the FPU is disabled, squash the registers. */
4456 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4457 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4458 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4459 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4461 /* If AVX512F is disabled, squash the registers. */
4462 if (! TARGET_AVX512F)
4464 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4465 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4467 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4468 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4471 /* If MPX is disabled, squash the registers. */
4472 if (! TARGET_MPX)
4473 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4474 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4478 /* Save the current options */
4480 static void
4481 ix86_function_specific_save (struct cl_target_option *ptr,
4482 struct gcc_options *opts)
4484 ptr->arch = ix86_arch;
4485 ptr->schedule = ix86_schedule;
4486 ptr->prefetch_sse = x86_prefetch_sse;
4487 ptr->tune = ix86_tune;
4488 ptr->branch_cost = ix86_branch_cost;
4489 ptr->tune_defaulted = ix86_tune_defaulted;
4490 ptr->arch_specified = ix86_arch_specified;
4491 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4492 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4493 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4494 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4495 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4496 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4497 ptr->x_ix86_abi = opts->x_ix86_abi;
4498 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4499 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4500 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4501 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4502 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4503 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4504 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4505 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4506 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4507 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4508 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4509 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4510 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4511 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4512 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4513 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4514 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4515 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4516 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4517 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4519 /* The fields are char but the variables are not; make sure the
4520 values fit in the fields. */
4521 gcc_assert (ptr->arch == ix86_arch);
4522 gcc_assert (ptr->schedule == ix86_schedule);
4523 gcc_assert (ptr->tune == ix86_tune);
4524 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4527 /* Restore the current options */
4529 static void
4530 ix86_function_specific_restore (struct gcc_options *opts,
4531 struct cl_target_option *ptr)
4533 enum processor_type old_tune = ix86_tune;
4534 enum processor_type old_arch = ix86_arch;
4535 unsigned int ix86_arch_mask;
4536 int i;
4538 /* We don't change -fPIC. */
4539 opts->x_flag_pic = flag_pic;
4541 ix86_arch = (enum processor_type) ptr->arch;
4542 ix86_schedule = (enum attr_cpu) ptr->schedule;
4543 ix86_tune = (enum processor_type) ptr->tune;
4544 x86_prefetch_sse = ptr->prefetch_sse;
4545 opts->x_ix86_branch_cost = ptr->branch_cost;
4546 ix86_tune_defaulted = ptr->tune_defaulted;
4547 ix86_arch_specified = ptr->arch_specified;
4548 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4549 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4550 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4551 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4552 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4553 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4554 opts->x_ix86_abi = ptr->x_ix86_abi;
4555 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4556 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4557 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4558 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4559 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4560 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4561 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4562 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4563 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4564 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4565 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4566 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4567 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4568 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4569 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4570 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4571 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4572 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4573 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4574 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4575 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4576 /* TODO: ix86_cost should be chosen at instruction or function granuality
4577 so for cold code we use size_cost even in !optimize_size compilation. */
4578 if (opts->x_optimize_size)
4579 ix86_cost = &ix86_size_cost;
4580 else
4581 ix86_cost = ix86_tune_cost;
4583 /* Recreate the arch feature tests if the arch changed */
4584 if (old_arch != ix86_arch)
4586 ix86_arch_mask = 1u << ix86_arch;
4587 for (i = 0; i < X86_ARCH_LAST; ++i)
4588 ix86_arch_features[i]
4589 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4592 /* Recreate the tune optimization tests */
4593 if (old_tune != ix86_tune)
4594 set_ix86_tune_features (ix86_tune, false);
4597 /* Adjust target options after streaming them in. This is mainly about
4598 reconciling them with global options. */
4600 static void
4601 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
4603 /* flag_pic is a global option, but ix86_cmodel is target saved option
4604 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
4605 for PIC, or error out. */
4606 if (flag_pic)
4607 switch (ptr->x_ix86_cmodel)
4609 case CM_SMALL:
4610 ptr->x_ix86_cmodel = CM_SMALL_PIC;
4611 break;
4613 case CM_MEDIUM:
4614 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
4615 break;
4617 case CM_LARGE:
4618 ptr->x_ix86_cmodel = CM_LARGE_PIC;
4619 break;
4621 case CM_KERNEL:
4622 error ("code model %s does not support PIC mode", "kernel");
4623 break;
4625 default:
4626 break;
4628 else
4629 switch (ptr->x_ix86_cmodel)
4631 case CM_SMALL_PIC:
4632 ptr->x_ix86_cmodel = CM_SMALL;
4633 break;
4635 case CM_MEDIUM_PIC:
4636 ptr->x_ix86_cmodel = CM_MEDIUM;
4637 break;
4639 case CM_LARGE_PIC:
4640 ptr->x_ix86_cmodel = CM_LARGE;
4641 break;
4643 default:
4644 break;
4648 /* Print the current options */
4650 static void
4651 ix86_function_specific_print (FILE *file, int indent,
4652 struct cl_target_option *ptr)
4654 char *target_string
4655 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4656 NULL, NULL, ptr->x_ix86_fpmath, false);
4658 gcc_assert (ptr->arch < PROCESSOR_max);
4659 fprintf (file, "%*sarch = %d (%s)\n",
4660 indent, "",
4661 ptr->arch, processor_target_table[ptr->arch].name);
4663 gcc_assert (ptr->tune < PROCESSOR_max);
4664 fprintf (file, "%*stune = %d (%s)\n",
4665 indent, "",
4666 ptr->tune, processor_target_table[ptr->tune].name);
4668 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4670 if (target_string)
4672 fprintf (file, "%*s%s\n", indent, "", target_string);
4673 free (target_string);
4678 /* Inner function to process the attribute((target(...))), take an argument and
4679 set the current options from the argument. If we have a list, recursively go
4680 over the list. */
4682 static bool
4683 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4684 struct gcc_options *opts,
4685 struct gcc_options *opts_set,
4686 struct gcc_options *enum_opts_set)
4688 char *next_optstr;
4689 bool ret = true;
4691 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4692 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4693 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4694 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4695 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4697 enum ix86_opt_type
4699 ix86_opt_unknown,
4700 ix86_opt_yes,
4701 ix86_opt_no,
4702 ix86_opt_str,
4703 ix86_opt_enum,
4704 ix86_opt_isa
4707 static const struct
4709 const char *string;
4710 size_t len;
4711 enum ix86_opt_type type;
4712 int opt;
4713 int mask;
4714 } attrs[] = {
4715 /* isa options */
4716 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4717 IX86_ATTR_ISA ("abm", OPT_mabm),
4718 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4719 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4720 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4721 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4722 IX86_ATTR_ISA ("aes", OPT_maes),
4723 IX86_ATTR_ISA ("sha", OPT_msha),
4724 IX86_ATTR_ISA ("avx", OPT_mavx),
4725 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4726 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4727 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4728 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4729 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4730 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4731 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4732 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4733 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4734 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4735 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4736 IX86_ATTR_ISA ("sse", OPT_msse),
4737 IX86_ATTR_ISA ("sse2", OPT_msse2),
4738 IX86_ATTR_ISA ("sse3", OPT_msse3),
4739 IX86_ATTR_ISA ("sse4", OPT_msse4),
4740 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4741 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4742 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4743 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4744 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4745 IX86_ATTR_ISA ("fma", OPT_mfma),
4746 IX86_ATTR_ISA ("xop", OPT_mxop),
4747 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4748 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4749 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4750 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4751 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4752 IX86_ATTR_ISA ("hle", OPT_mhle),
4753 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4754 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4755 IX86_ATTR_ISA ("adx", OPT_madx),
4756 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4757 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4758 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4759 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4760 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4761 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4762 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4763 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
4764 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
4765 IX86_ATTR_ISA ("clwb", OPT_mclwb),
4766 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
4767 IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx),
4769 /* enum options */
4770 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4772 /* string options */
4773 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4774 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4776 /* flag options */
4777 IX86_ATTR_YES ("cld",
4778 OPT_mcld,
4779 MASK_CLD),
4781 IX86_ATTR_NO ("fancy-math-387",
4782 OPT_mfancy_math_387,
4783 MASK_NO_FANCY_MATH_387),
4785 IX86_ATTR_YES ("ieee-fp",
4786 OPT_mieee_fp,
4787 MASK_IEEE_FP),
4789 IX86_ATTR_YES ("inline-all-stringops",
4790 OPT_minline_all_stringops,
4791 MASK_INLINE_ALL_STRINGOPS),
4793 IX86_ATTR_YES ("inline-stringops-dynamically",
4794 OPT_minline_stringops_dynamically,
4795 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4797 IX86_ATTR_NO ("align-stringops",
4798 OPT_mno_align_stringops,
4799 MASK_NO_ALIGN_STRINGOPS),
4801 IX86_ATTR_YES ("recip",
4802 OPT_mrecip,
4803 MASK_RECIP),
4807 /* If this is a list, recurse to get the options. */
4808 if (TREE_CODE (args) == TREE_LIST)
4810 bool ret = true;
4812 for (; args; args = TREE_CHAIN (args))
4813 if (TREE_VALUE (args)
4814 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4815 p_strings, opts, opts_set,
4816 enum_opts_set))
4817 ret = false;
4819 return ret;
4822 else if (TREE_CODE (args) != STRING_CST)
4824 error ("attribute %<target%> argument not a string");
4825 return false;
4828 /* Handle multiple arguments separated by commas. */
4829 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4831 while (next_optstr && *next_optstr != '\0')
4833 char *p = next_optstr;
4834 char *orig_p = p;
4835 char *comma = strchr (next_optstr, ',');
4836 const char *opt_string;
4837 size_t len, opt_len;
4838 int opt;
4839 bool opt_set_p;
4840 char ch;
4841 unsigned i;
4842 enum ix86_opt_type type = ix86_opt_unknown;
4843 int mask = 0;
4845 if (comma)
4847 *comma = '\0';
4848 len = comma - next_optstr;
4849 next_optstr = comma + 1;
4851 else
4853 len = strlen (p);
4854 next_optstr = NULL;
4857 /* Recognize no-xxx. */
4858 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4860 opt_set_p = false;
4861 p += 3;
4862 len -= 3;
4864 else
4865 opt_set_p = true;
4867 /* Find the option. */
4868 ch = *p;
4869 opt = N_OPTS;
4870 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4872 type = attrs[i].type;
4873 opt_len = attrs[i].len;
4874 if (ch == attrs[i].string[0]
4875 && ((type != ix86_opt_str && type != ix86_opt_enum)
4876 ? len == opt_len
4877 : len > opt_len)
4878 && memcmp (p, attrs[i].string, opt_len) == 0)
4880 opt = attrs[i].opt;
4881 mask = attrs[i].mask;
4882 opt_string = attrs[i].string;
4883 break;
4887 /* Process the option. */
4888 if (opt == N_OPTS)
4890 error ("attribute(target(\"%s\")) is unknown", orig_p);
4891 ret = false;
4894 else if (type == ix86_opt_isa)
4896 struct cl_decoded_option decoded;
4898 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4899 ix86_handle_option (opts, opts_set,
4900 &decoded, input_location);
4903 else if (type == ix86_opt_yes || type == ix86_opt_no)
4905 if (type == ix86_opt_no)
4906 opt_set_p = !opt_set_p;
4908 if (opt_set_p)
4909 opts->x_target_flags |= mask;
4910 else
4911 opts->x_target_flags &= ~mask;
4914 else if (type == ix86_opt_str)
4916 if (p_strings[opt])
4918 error ("option(\"%s\") was already specified", opt_string);
4919 ret = false;
4921 else
4922 p_strings[opt] = xstrdup (p + opt_len);
4925 else if (type == ix86_opt_enum)
4927 bool arg_ok;
4928 int value;
4930 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4931 if (arg_ok)
4932 set_option (opts, enum_opts_set, opt, value,
4933 p + opt_len, DK_UNSPECIFIED, input_location,
4934 global_dc);
4935 else
4937 error ("attribute(target(\"%s\")) is unknown", orig_p);
4938 ret = false;
4942 else
4943 gcc_unreachable ();
4946 return ret;
4949 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4951 tree
4952 ix86_valid_target_attribute_tree (tree args,
4953 struct gcc_options *opts,
4954 struct gcc_options *opts_set)
4956 const char *orig_arch_string = opts->x_ix86_arch_string;
4957 const char *orig_tune_string = opts->x_ix86_tune_string;
4958 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4959 int orig_tune_defaulted = ix86_tune_defaulted;
4960 int orig_arch_specified = ix86_arch_specified;
4961 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4962 tree t = NULL_TREE;
4963 int i;
4964 struct cl_target_option *def
4965 = TREE_TARGET_OPTION (target_option_default_node);
4966 struct gcc_options enum_opts_set;
4968 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4970 /* Process each of the options on the chain. */
4971 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4972 opts_set, &enum_opts_set))
4973 return error_mark_node;
4975 /* If the changed options are different from the default, rerun
4976 ix86_option_override_internal, and then save the options away.
4977 The string options are are attribute options, and will be undone
4978 when we copy the save structure. */
4979 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4980 || opts->x_target_flags != def->x_target_flags
4981 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4982 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4983 || enum_opts_set.x_ix86_fpmath)
4985 /* If we are using the default tune= or arch=, undo the string assigned,
4986 and use the default. */
4987 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4988 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4989 else if (!orig_arch_specified)
4990 opts->x_ix86_arch_string = NULL;
4992 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4993 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4994 else if (orig_tune_defaulted)
4995 opts->x_ix86_tune_string = NULL;
4997 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4998 if (enum_opts_set.x_ix86_fpmath)
4999 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5000 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
5001 && TARGET_SSE_P (opts->x_ix86_isa_flags))
5003 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
5004 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5007 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
5008 ix86_option_override_internal (false, opts, opts_set);
5010 /* Add any builtin functions with the new isa if any. */
5011 ix86_add_new_builtins (opts->x_ix86_isa_flags);
5013 /* Save the current options unless we are validating options for
5014 #pragma. */
5015 t = build_target_option_node (opts);
5017 opts->x_ix86_arch_string = orig_arch_string;
5018 opts->x_ix86_tune_string = orig_tune_string;
5019 opts_set->x_ix86_fpmath = orig_fpmath_set;
5021 /* Free up memory allocated to hold the strings */
5022 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
5023 free (option_strings[i]);
5026 return t;
5029 /* Hook to validate attribute((target("string"))). */
5031 static bool
5032 ix86_valid_target_attribute_p (tree fndecl,
5033 tree ARG_UNUSED (name),
5034 tree args,
5035 int ARG_UNUSED (flags))
5037 struct gcc_options func_options;
5038 tree new_target, new_optimize;
5039 bool ret = true;
5041 /* attribute((target("default"))) does nothing, beyond
5042 affecting multi-versioning. */
5043 if (TREE_VALUE (args)
5044 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
5045 && TREE_CHAIN (args) == NULL_TREE
5046 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
5047 return true;
5049 tree old_optimize = build_optimization_node (&global_options);
5051 /* Get the optimization options of the current function. */
5052 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
5054 if (!func_optimize)
5055 func_optimize = old_optimize;
5057 /* Init func_options. */
5058 memset (&func_options, 0, sizeof (func_options));
5059 init_options_struct (&func_options, NULL);
5060 lang_hooks.init_options_struct (&func_options);
5062 cl_optimization_restore (&func_options,
5063 TREE_OPTIMIZATION (func_optimize));
5065 /* Initialize func_options to the default before its target options can
5066 be set. */
5067 cl_target_option_restore (&func_options,
5068 TREE_TARGET_OPTION (target_option_default_node));
5070 new_target = ix86_valid_target_attribute_tree (args, &func_options,
5071 &global_options_set);
5073 new_optimize = build_optimization_node (&func_options);
5075 if (new_target == error_mark_node)
5076 ret = false;
5078 else if (fndecl && new_target)
5080 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
5082 if (old_optimize != new_optimize)
5083 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5086 return ret;
5090 /* Hook to determine if one function can safely inline another. */
5092 static bool
5093 ix86_can_inline_p (tree caller, tree callee)
5095 bool ret = false;
5096 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5097 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5099 /* If callee has no option attributes, then it is ok to inline. */
5100 if (!callee_tree)
5101 ret = true;
5103 /* If caller has no option attributes, but callee does then it is not ok to
5104 inline. */
5105 else if (!caller_tree)
5106 ret = false;
5108 else
5110 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5111 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5113 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
5114 can inline a SSE2 function but a SSE2 function can't inline a SSE4
5115 function. */
5116 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5117 != callee_opts->x_ix86_isa_flags)
5118 ret = false;
5120 /* See if we have the same non-isa options. */
5121 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5122 ret = false;
5124 /* See if arch, tune, etc. are the same. */
5125 else if (caller_opts->arch != callee_opts->arch)
5126 ret = false;
5128 else if (caller_opts->tune != callee_opts->tune)
5129 ret = false;
5131 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
5132 ret = false;
5134 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5135 ret = false;
5137 else
5138 ret = true;
5141 return ret;
5145 /* Remember the last target of ix86_set_current_function. */
5146 static GTY(()) tree ix86_previous_fndecl;
5148 /* Set targets globals to the default (or current #pragma GCC target
5149 if active). Invalidate ix86_previous_fndecl cache. */
5151 void
5152 ix86_reset_previous_fndecl (void)
5154 tree new_tree = target_option_current_node;
5155 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5156 if (TREE_TARGET_GLOBALS (new_tree))
5157 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5158 else if (new_tree == target_option_default_node)
5159 restore_target_globals (&default_target_globals);
5160 else
5161 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5162 ix86_previous_fndecl = NULL_TREE;
5165 /* Establish appropriate back-end context for processing the function
5166 FNDECL. The argument might be NULL to indicate processing at top
5167 level, outside of any function scope. */
5168 static void
5169 ix86_set_current_function (tree fndecl)
5171 /* Only change the context if the function changes. This hook is called
5172 several times in the course of compiling a function, and we don't want to
5173 slow things down too much or call target_reinit when it isn't safe. */
5174 if (fndecl == ix86_previous_fndecl)
5175 return;
5177 tree old_tree;
5178 if (ix86_previous_fndecl == NULL_TREE)
5179 old_tree = target_option_current_node;
5180 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
5181 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
5182 else
5183 old_tree = target_option_default_node;
5185 if (fndecl == NULL_TREE)
5187 if (old_tree != target_option_current_node)
5188 ix86_reset_previous_fndecl ();
5189 return;
5192 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
5193 if (new_tree == NULL_TREE)
5194 new_tree = target_option_default_node;
5196 if (old_tree != new_tree)
5198 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5199 if (TREE_TARGET_GLOBALS (new_tree))
5200 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5201 else if (new_tree == target_option_default_node)
5202 restore_target_globals (&default_target_globals);
5203 else
5204 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5206 ix86_previous_fndecl = fndecl;
5210 /* Return true if this goes in large data/bss. */
5212 static bool
5213 ix86_in_large_data_p (tree exp)
5215 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5216 return false;
5218 /* Functions are never large data. */
5219 if (TREE_CODE (exp) == FUNCTION_DECL)
5220 return false;
5222 /* Automatic variables are never large data. */
5223 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5224 return false;
5226 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5228 const char *section = DECL_SECTION_NAME (exp);
5229 if (strcmp (section, ".ldata") == 0
5230 || strcmp (section, ".lbss") == 0)
5231 return true;
5232 return false;
5234 else
5236 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5238 /* If this is an incomplete type with size 0, then we can't put it
5239 in data because it might be too big when completed. Also,
5240 int_size_in_bytes returns -1 if size can vary or is larger than
5241 an integer in which case also it is safer to assume that it goes in
5242 large data. */
5243 if (size <= 0 || size > ix86_section_threshold)
5244 return true;
5247 return false;
5250 /* Switch to the appropriate section for output of DECL.
5251 DECL is either a `VAR_DECL' node or a constant of some sort.
5252 RELOC indicates whether forming the initial value of DECL requires
5253 link-time relocations. */
5255 ATTRIBUTE_UNUSED static section *
5256 x86_64_elf_select_section (tree decl, int reloc,
5257 unsigned HOST_WIDE_INT align)
5259 if (ix86_in_large_data_p (decl))
5261 const char *sname = NULL;
5262 unsigned int flags = SECTION_WRITE;
5263 switch (categorize_decl_for_section (decl, reloc))
5265 case SECCAT_DATA:
5266 sname = ".ldata";
5267 break;
5268 case SECCAT_DATA_REL:
5269 sname = ".ldata.rel";
5270 break;
5271 case SECCAT_DATA_REL_LOCAL:
5272 sname = ".ldata.rel.local";
5273 break;
5274 case SECCAT_DATA_REL_RO:
5275 sname = ".ldata.rel.ro";
5276 break;
5277 case SECCAT_DATA_REL_RO_LOCAL:
5278 sname = ".ldata.rel.ro.local";
5279 break;
5280 case SECCAT_BSS:
5281 sname = ".lbss";
5282 flags |= SECTION_BSS;
5283 break;
5284 case SECCAT_RODATA:
5285 case SECCAT_RODATA_MERGE_STR:
5286 case SECCAT_RODATA_MERGE_STR_INIT:
5287 case SECCAT_RODATA_MERGE_CONST:
5288 sname = ".lrodata";
5289 flags = 0;
5290 break;
5291 case SECCAT_SRODATA:
5292 case SECCAT_SDATA:
5293 case SECCAT_SBSS:
5294 gcc_unreachable ();
5295 case SECCAT_TEXT:
5296 case SECCAT_TDATA:
5297 case SECCAT_TBSS:
5298 /* We don't split these for medium model. Place them into
5299 default sections and hope for best. */
5300 break;
5302 if (sname)
5304 /* We might get called with string constants, but get_named_section
5305 doesn't like them as they are not DECLs. Also, we need to set
5306 flags in that case. */
5307 if (!DECL_P (decl))
5308 return get_section (sname, flags, NULL);
5309 return get_named_section (decl, sname, reloc);
5312 return default_elf_select_section (decl, reloc, align);
5315 /* Select a set of attributes for section NAME based on the properties
5316 of DECL and whether or not RELOC indicates that DECL's initializer
5317 might contain runtime relocations. */
5319 static unsigned int ATTRIBUTE_UNUSED
5320 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5322 unsigned int flags = default_section_type_flags (decl, name, reloc);
5324 if (decl == NULL_TREE
5325 && (strcmp (name, ".ldata.rel.ro") == 0
5326 || strcmp (name, ".ldata.rel.ro.local") == 0))
5327 flags |= SECTION_RELRO;
5329 if (strcmp (name, ".lbss") == 0
5330 || strncmp (name, ".lbss.", 5) == 0
5331 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5332 flags |= SECTION_BSS;
5334 return flags;
5337 /* Build up a unique section name, expressed as a
5338 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5339 RELOC indicates whether the initial value of EXP requires
5340 link-time relocations. */
5342 static void ATTRIBUTE_UNUSED
5343 x86_64_elf_unique_section (tree decl, int reloc)
5345 if (ix86_in_large_data_p (decl))
5347 const char *prefix = NULL;
5348 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5349 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5351 switch (categorize_decl_for_section (decl, reloc))
5353 case SECCAT_DATA:
5354 case SECCAT_DATA_REL:
5355 case SECCAT_DATA_REL_LOCAL:
5356 case SECCAT_DATA_REL_RO:
5357 case SECCAT_DATA_REL_RO_LOCAL:
5358 prefix = one_only ? ".ld" : ".ldata";
5359 break;
5360 case SECCAT_BSS:
5361 prefix = one_only ? ".lb" : ".lbss";
5362 break;
5363 case SECCAT_RODATA:
5364 case SECCAT_RODATA_MERGE_STR:
5365 case SECCAT_RODATA_MERGE_STR_INIT:
5366 case SECCAT_RODATA_MERGE_CONST:
5367 prefix = one_only ? ".lr" : ".lrodata";
5368 break;
5369 case SECCAT_SRODATA:
5370 case SECCAT_SDATA:
5371 case SECCAT_SBSS:
5372 gcc_unreachable ();
5373 case SECCAT_TEXT:
5374 case SECCAT_TDATA:
5375 case SECCAT_TBSS:
5376 /* We don't split these for medium model. Place them into
5377 default sections and hope for best. */
5378 break;
5380 if (prefix)
5382 const char *name, *linkonce;
5383 char *string;
5385 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5386 name = targetm.strip_name_encoding (name);
5388 /* If we're using one_only, then there needs to be a .gnu.linkonce
5389 prefix to the section name. */
5390 linkonce = one_only ? ".gnu.linkonce" : "";
5392 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5394 set_decl_section_name (decl, string);
5395 return;
5398 default_unique_section (decl, reloc);
5401 #ifdef COMMON_ASM_OP
5402 /* This says how to output assembler code to declare an
5403 uninitialized external linkage data object.
5405 For medium model x86-64 we need to use .largecomm opcode for
5406 large objects. */
5407 void
5408 x86_elf_aligned_common (FILE *file,
5409 const char *name, unsigned HOST_WIDE_INT size,
5410 int align)
5412 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5413 && size > (unsigned int)ix86_section_threshold)
5414 fputs ("\t.largecomm\t", file);
5415 else
5416 fputs (COMMON_ASM_OP, file);
5417 assemble_name (file, name);
5418 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5419 size, align / BITS_PER_UNIT);
5421 #endif
5423 /* Utility function for targets to use in implementing
5424 ASM_OUTPUT_ALIGNED_BSS. */
5426 void
5427 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5428 unsigned HOST_WIDE_INT size, int align)
5430 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5431 && size > (unsigned int)ix86_section_threshold)
5432 switch_to_section (get_named_section (decl, ".lbss", 0));
5433 else
5434 switch_to_section (bss_section);
5435 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5436 #ifdef ASM_DECLARE_OBJECT_NAME
5437 last_assemble_variable_decl = decl;
5438 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5439 #else
5440 /* Standard thing is just output label for the object. */
5441 ASM_OUTPUT_LABEL (file, name);
5442 #endif /* ASM_DECLARE_OBJECT_NAME */
5443 ASM_OUTPUT_SKIP (file, size ? size : 1);
5446 /* Decide whether we must probe the stack before any space allocation
5447 on this target. It's essentially TARGET_STACK_PROBE except when
5448 -fstack-check causes the stack to be already probed differently. */
5450 bool
5451 ix86_target_stack_probe (void)
5453 /* Do not probe the stack twice if static stack checking is enabled. */
5454 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5455 return false;
5457 return TARGET_STACK_PROBE;
5460 /* Decide whether we can make a sibling call to a function. DECL is the
5461 declaration of the function being targeted by the call and EXP is the
5462 CALL_EXPR representing the call. */
5464 static bool
5465 ix86_function_ok_for_sibcall (tree decl, tree exp)
5467 tree type, decl_or_type;
5468 rtx a, b;
5470 /* If we are generating position-independent code, we cannot sibcall
5471 optimize direct calls to global functions, as the PLT requires
5472 %ebx be live. (Darwin does not have a PLT.) */
5473 if (!TARGET_MACHO
5474 && !TARGET_64BIT
5475 && flag_pic
5476 && flag_plt
5477 && decl && !targetm.binds_local_p (decl))
5478 return false;
5480 /* If we need to align the outgoing stack, then sibcalling would
5481 unalign the stack, which may break the called function. */
5482 if (ix86_minimum_incoming_stack_boundary (true)
5483 < PREFERRED_STACK_BOUNDARY)
5484 return false;
5486 if (decl)
5488 decl_or_type = decl;
5489 type = TREE_TYPE (decl);
5491 else
5493 /* We're looking at the CALL_EXPR, we need the type of the function. */
5494 type = CALL_EXPR_FN (exp); /* pointer expression */
5495 type = TREE_TYPE (type); /* pointer type */
5496 type = TREE_TYPE (type); /* function type */
5497 decl_or_type = type;
5500 /* Check that the return value locations are the same. Like
5501 if we are returning floats on the 80387 register stack, we cannot
5502 make a sibcall from a function that doesn't return a float to a
5503 function that does or, conversely, from a function that does return
5504 a float to a function that doesn't; the necessary stack adjustment
5505 would not be executed. This is also the place we notice
5506 differences in the return value ABI. Note that it is ok for one
5507 of the functions to have void return type as long as the return
5508 value of the other is passed in a register. */
5509 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5510 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5511 cfun->decl, false);
5512 if (STACK_REG_P (a) || STACK_REG_P (b))
5514 if (!rtx_equal_p (a, b))
5515 return false;
5517 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5519 else if (!rtx_equal_p (a, b))
5520 return false;
5522 if (TARGET_64BIT)
5524 /* The SYSV ABI has more call-clobbered registers;
5525 disallow sibcalls from MS to SYSV. */
5526 if (cfun->machine->call_abi == MS_ABI
5527 && ix86_function_type_abi (type) == SYSV_ABI)
5528 return false;
5530 else
5532 /* If this call is indirect, we'll need to be able to use a
5533 call-clobbered register for the address of the target function.
5534 Make sure that all such registers are not used for passing
5535 parameters. Note that DLLIMPORT functions are indirect. */
5536 if (!decl
5537 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5539 if (ix86_function_regparm (type, NULL) >= 3)
5541 /* ??? Need to count the actual number of registers to be used,
5542 not the possible number of registers. Fix later. */
5543 return false;
5548 /* Otherwise okay. That also includes certain types of indirect calls. */
5549 return true;
5552 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5553 and "sseregparm" calling convention attributes;
5554 arguments as in struct attribute_spec.handler. */
5556 static tree
5557 ix86_handle_cconv_attribute (tree *node, tree name,
5558 tree args,
5559 int,
5560 bool *no_add_attrs)
5562 if (TREE_CODE (*node) != FUNCTION_TYPE
5563 && TREE_CODE (*node) != METHOD_TYPE
5564 && TREE_CODE (*node) != FIELD_DECL
5565 && TREE_CODE (*node) != TYPE_DECL)
5567 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5568 name);
5569 *no_add_attrs = true;
5570 return NULL_TREE;
5573 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5574 if (is_attribute_p ("regparm", name))
5576 tree cst;
5578 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5580 error ("fastcall and regparm attributes are not compatible");
5583 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5585 error ("regparam and thiscall attributes are not compatible");
5588 cst = TREE_VALUE (args);
5589 if (TREE_CODE (cst) != INTEGER_CST)
5591 warning (OPT_Wattributes,
5592 "%qE attribute requires an integer constant argument",
5593 name);
5594 *no_add_attrs = true;
5596 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5598 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5599 name, REGPARM_MAX);
5600 *no_add_attrs = true;
5603 return NULL_TREE;
5606 if (TARGET_64BIT)
5608 /* Do not warn when emulating the MS ABI. */
5609 if ((TREE_CODE (*node) != FUNCTION_TYPE
5610 && TREE_CODE (*node) != METHOD_TYPE)
5611 || ix86_function_type_abi (*node) != MS_ABI)
5612 warning (OPT_Wattributes, "%qE attribute ignored",
5613 name);
5614 *no_add_attrs = true;
5615 return NULL_TREE;
5618 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5619 if (is_attribute_p ("fastcall", name))
5621 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5623 error ("fastcall and cdecl attributes are not compatible");
5625 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5627 error ("fastcall and stdcall attributes are not compatible");
5629 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5631 error ("fastcall and regparm attributes are not compatible");
5633 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5635 error ("fastcall and thiscall attributes are not compatible");
5639 /* Can combine stdcall with fastcall (redundant), regparm and
5640 sseregparm. */
5641 else if (is_attribute_p ("stdcall", name))
5643 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5645 error ("stdcall and cdecl attributes are not compatible");
5647 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5649 error ("stdcall and fastcall attributes are not compatible");
5651 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5653 error ("stdcall and thiscall attributes are not compatible");
5657 /* Can combine cdecl with regparm and sseregparm. */
5658 else if (is_attribute_p ("cdecl", name))
5660 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5662 error ("stdcall and cdecl attributes are not compatible");
5664 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5666 error ("fastcall and cdecl attributes are not compatible");
5668 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5670 error ("cdecl and thiscall attributes are not compatible");
5673 else if (is_attribute_p ("thiscall", name))
5675 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5676 warning (OPT_Wattributes, "%qE attribute is used for non-class method",
5677 name);
5678 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5680 error ("stdcall and thiscall attributes are not compatible");
5682 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5684 error ("fastcall and thiscall attributes are not compatible");
5686 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5688 error ("cdecl and thiscall attributes are not compatible");
5692 /* Can combine sseregparm with all attributes. */
5694 return NULL_TREE;
5697 /* The transactional memory builtins are implicitly regparm or fastcall
5698 depending on the ABI. Override the generic do-nothing attribute that
5699 these builtins were declared with, and replace it with one of the two
5700 attributes that we expect elsewhere. */
5702 static tree
5703 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5704 int flags, bool *no_add_attrs)
5706 tree alt;
5708 /* In no case do we want to add the placeholder attribute. */
5709 *no_add_attrs = true;
5711 /* The 64-bit ABI is unchanged for transactional memory. */
5712 if (TARGET_64BIT)
5713 return NULL_TREE;
5715 /* ??? Is there a better way to validate 32-bit windows? We have
5716 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5717 if (CHECK_STACK_LIMIT > 0)
5718 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5719 else
5721 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5722 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5724 decl_attributes (node, alt, flags);
5726 return NULL_TREE;
5729 /* This function determines from TYPE the calling-convention. */
5731 unsigned int
5732 ix86_get_callcvt (const_tree type)
5734 unsigned int ret = 0;
5735 bool is_stdarg;
5736 tree attrs;
5738 if (TARGET_64BIT)
5739 return IX86_CALLCVT_CDECL;
5741 attrs = TYPE_ATTRIBUTES (type);
5742 if (attrs != NULL_TREE)
5744 if (lookup_attribute ("cdecl", attrs))
5745 ret |= IX86_CALLCVT_CDECL;
5746 else if (lookup_attribute ("stdcall", attrs))
5747 ret |= IX86_CALLCVT_STDCALL;
5748 else if (lookup_attribute ("fastcall", attrs))
5749 ret |= IX86_CALLCVT_FASTCALL;
5750 else if (lookup_attribute ("thiscall", attrs))
5751 ret |= IX86_CALLCVT_THISCALL;
5753 /* Regparam isn't allowed for thiscall and fastcall. */
5754 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5756 if (lookup_attribute ("regparm", attrs))
5757 ret |= IX86_CALLCVT_REGPARM;
5758 if (lookup_attribute ("sseregparm", attrs))
5759 ret |= IX86_CALLCVT_SSEREGPARM;
5762 if (IX86_BASE_CALLCVT(ret) != 0)
5763 return ret;
5766 is_stdarg = stdarg_p (type);
5767 if (TARGET_RTD && !is_stdarg)
5768 return IX86_CALLCVT_STDCALL | ret;
5770 if (ret != 0
5771 || is_stdarg
5772 || TREE_CODE (type) != METHOD_TYPE
5773 || ix86_function_type_abi (type) != MS_ABI)
5774 return IX86_CALLCVT_CDECL | ret;
5776 return IX86_CALLCVT_THISCALL;
5779 /* Return 0 if the attributes for two types are incompatible, 1 if they
5780 are compatible, and 2 if they are nearly compatible (which causes a
5781 warning to be generated). */
5783 static int
5784 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5786 unsigned int ccvt1, ccvt2;
5788 if (TREE_CODE (type1) != FUNCTION_TYPE
5789 && TREE_CODE (type1) != METHOD_TYPE)
5790 return 1;
5792 ccvt1 = ix86_get_callcvt (type1);
5793 ccvt2 = ix86_get_callcvt (type2);
5794 if (ccvt1 != ccvt2)
5795 return 0;
5796 if (ix86_function_regparm (type1, NULL)
5797 != ix86_function_regparm (type2, NULL))
5798 return 0;
5800 return 1;
5803 /* Return the regparm value for a function with the indicated TYPE and DECL.
5804 DECL may be NULL when calling function indirectly
5805 or considering a libcall. */
5807 static int
5808 ix86_function_regparm (const_tree type, const_tree decl)
5810 tree attr;
5811 int regparm;
5812 unsigned int ccvt;
5814 if (TARGET_64BIT)
5815 return (ix86_function_type_abi (type) == SYSV_ABI
5816 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5817 ccvt = ix86_get_callcvt (type);
5818 regparm = ix86_regparm;
5820 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5822 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5823 if (attr)
5825 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5826 return regparm;
5829 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5830 return 2;
5831 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5832 return 1;
5834 /* Use register calling convention for local functions when possible. */
5835 if (decl
5836 && TREE_CODE (decl) == FUNCTION_DECL)
5838 cgraph_node *target = cgraph_node::get (decl);
5839 if (target)
5840 target = target->function_symbol ();
5842 /* Caller and callee must agree on the calling convention, so
5843 checking here just optimize means that with
5844 __attribute__((optimize (...))) caller could use regparm convention
5845 and callee not, or vice versa. Instead look at whether the callee
5846 is optimized or not. */
5847 if (target && opt_for_fn (target->decl, optimize)
5848 && !(profile_flag && !flag_fentry))
5850 cgraph_local_info *i = &target->local;
5851 if (i && i->local && i->can_change_signature)
5853 int local_regparm, globals = 0, regno;
5855 /* Make sure no regparm register is taken by a
5856 fixed register variable. */
5857 for (local_regparm = 0; local_regparm < REGPARM_MAX;
5858 local_regparm++)
5859 if (fixed_regs[local_regparm])
5860 break;
5862 /* We don't want to use regparm(3) for nested functions as
5863 these use a static chain pointer in the third argument. */
5864 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
5865 local_regparm = 2;
5867 /* Save a register for the split stack. */
5868 if (local_regparm == 3 && flag_split_stack)
5869 local_regparm = 2;
5871 /* Each fixed register usage increases register pressure,
5872 so less registers should be used for argument passing.
5873 This functionality can be overriden by an explicit
5874 regparm value. */
5875 for (regno = AX_REG; regno <= DI_REG; regno++)
5876 if (fixed_regs[regno])
5877 globals++;
5879 local_regparm
5880 = globals < local_regparm ? local_regparm - globals : 0;
5882 if (local_regparm > regparm)
5883 regparm = local_regparm;
5888 return regparm;
5891 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5892 DFmode (2) arguments in SSE registers for a function with the
5893 indicated TYPE and DECL. DECL may be NULL when calling function
5894 indirectly or considering a libcall. Return -1 if any FP parameter
5895 should be rejected by error. This is used in siutation we imply SSE
5896 calling convetion but the function is called from another function with
5897 SSE disabled. Otherwise return 0. */
5899 static int
5900 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5902 gcc_assert (!TARGET_64BIT);
5904 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5905 by the sseregparm attribute. */
5906 if (TARGET_SSEREGPARM
5907 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5909 if (!TARGET_SSE)
5911 if (warn)
5913 if (decl)
5914 error ("calling %qD with attribute sseregparm without "
5915 "SSE/SSE2 enabled", decl);
5916 else
5917 error ("calling %qT with attribute sseregparm without "
5918 "SSE/SSE2 enabled", type);
5920 return 0;
5923 return 2;
5926 if (!decl)
5927 return 0;
5929 cgraph_node *target = cgraph_node::get (decl);
5930 if (target)
5931 target = target->function_symbol ();
5933 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5934 (and DFmode for SSE2) arguments in SSE registers. */
5935 if (target
5936 /* TARGET_SSE_MATH */
5937 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
5938 && opt_for_fn (target->decl, optimize)
5939 && !(profile_flag && !flag_fentry))
5941 cgraph_local_info *i = &target->local;
5942 if (i && i->local && i->can_change_signature)
5944 /* Refuse to produce wrong code when local function with SSE enabled
5945 is called from SSE disabled function.
5946 FIXME: We need a way to detect these cases cross-ltrans partition
5947 and avoid using SSE calling conventions on local functions called
5948 from function with SSE disabled. For now at least delay the
5949 warning until we know we are going to produce wrong code.
5950 See PR66047 */
5951 if (!TARGET_SSE && warn)
5952 return -1;
5953 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
5954 ->x_ix86_isa_flags) ? 2 : 1;
5958 return 0;
5961 /* Return true if EAX is live at the start of the function. Used by
5962 ix86_expand_prologue to determine if we need special help before
5963 calling allocate_stack_worker. */
5965 static bool
5966 ix86_eax_live_at_start_p (void)
5968 /* Cheat. Don't bother working forward from ix86_function_regparm
5969 to the function type to whether an actual argument is located in
5970 eax. Instead just look at cfg info, which is still close enough
5971 to correct at this point. This gives false positives for broken
5972 functions that might use uninitialized data that happens to be
5973 allocated in eax, but who cares? */
5974 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5977 static bool
5978 ix86_keep_aggregate_return_pointer (tree fntype)
5980 tree attr;
5982 if (!TARGET_64BIT)
5984 attr = lookup_attribute ("callee_pop_aggregate_return",
5985 TYPE_ATTRIBUTES (fntype));
5986 if (attr)
5987 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5989 /* For 32-bit MS-ABI the default is to keep aggregate
5990 return pointer. */
5991 if (ix86_function_type_abi (fntype) == MS_ABI)
5992 return true;
5994 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5997 /* Value is the number of bytes of arguments automatically
5998 popped when returning from a subroutine call.
5999 FUNDECL is the declaration node of the function (as a tree),
6000 FUNTYPE is the data type of the function (as a tree),
6001 or for a library call it is an identifier node for the subroutine name.
6002 SIZE is the number of bytes of arguments passed on the stack.
6004 On the 80386, the RTD insn may be used to pop them if the number
6005 of args is fixed, but if the number is variable then the caller
6006 must pop them all. RTD can't be used for library calls now
6007 because the library is compiled with the Unix compiler.
6008 Use of RTD is a selectable option, since it is incompatible with
6009 standard Unix calling sequences. If the option is not selected,
6010 the caller must always pop the args.
6012 The attribute stdcall is equivalent to RTD on a per module basis. */
6014 static int
6015 ix86_return_pops_args (tree fundecl, tree funtype, int size)
6017 unsigned int ccvt;
6019 /* None of the 64-bit ABIs pop arguments. */
6020 if (TARGET_64BIT)
6021 return 0;
6023 ccvt = ix86_get_callcvt (funtype);
6025 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
6026 | IX86_CALLCVT_THISCALL)) != 0
6027 && ! stdarg_p (funtype))
6028 return size;
6030 /* Lose any fake structure return argument if it is passed on the stack. */
6031 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
6032 && !ix86_keep_aggregate_return_pointer (funtype))
6034 int nregs = ix86_function_regparm (funtype, fundecl);
6035 if (nregs == 0)
6036 return GET_MODE_SIZE (Pmode);
6039 return 0;
6042 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
6044 static bool
6045 ix86_legitimate_combined_insn (rtx_insn *insn)
6047 /* Check operand constraints in case hard registers were propagated
6048 into insn pattern. This check prevents combine pass from
6049 generating insn patterns with invalid hard register operands.
6050 These invalid insns can eventually confuse reload to error out
6051 with a spill failure. See also PRs 46829 and 46843. */
6052 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
6054 int i;
6056 extract_insn (insn);
6057 preprocess_constraints (insn);
6059 int n_operands = recog_data.n_operands;
6060 int n_alternatives = recog_data.n_alternatives;
6061 for (i = 0; i < n_operands; i++)
6063 rtx op = recog_data.operand[i];
6064 machine_mode mode = GET_MODE (op);
6065 const operand_alternative *op_alt;
6066 int offset = 0;
6067 bool win;
6068 int j;
6070 /* For pre-AVX disallow unaligned loads/stores where the
6071 instructions don't support it. */
6072 if (!TARGET_AVX
6073 && VECTOR_MODE_P (GET_MODE (op))
6074 && misaligned_operand (op, GET_MODE (op)))
6076 int min_align = get_attr_ssememalign (insn);
6077 if (min_align == 0)
6078 return false;
6081 /* A unary operator may be accepted by the predicate, but it
6082 is irrelevant for matching constraints. */
6083 if (UNARY_P (op))
6084 op = XEXP (op, 0);
6086 if (GET_CODE (op) == SUBREG)
6088 if (REG_P (SUBREG_REG (op))
6089 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
6090 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
6091 GET_MODE (SUBREG_REG (op)),
6092 SUBREG_BYTE (op),
6093 GET_MODE (op));
6094 op = SUBREG_REG (op);
6097 if (!(REG_P (op) && HARD_REGISTER_P (op)))
6098 continue;
6100 op_alt = recog_op_alt;
6102 /* Operand has no constraints, anything is OK. */
6103 win = !n_alternatives;
6105 alternative_mask preferred = get_preferred_alternatives (insn);
6106 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6108 if (!TEST_BIT (preferred, j))
6109 continue;
6110 if (op_alt[i].anything_ok
6111 || (op_alt[i].matches != -1
6112 && operands_match_p
6113 (recog_data.operand[i],
6114 recog_data.operand[op_alt[i].matches]))
6115 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6117 win = true;
6118 break;
6122 if (!win)
6123 return false;
6127 return true;
6130 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6132 static unsigned HOST_WIDE_INT
6133 ix86_asan_shadow_offset (void)
6135 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6136 : HOST_WIDE_INT_C (0x7fff8000))
6137 : (HOST_WIDE_INT_1 << 29);
6140 /* Argument support functions. */
6142 /* Return true when register may be used to pass function parameters. */
6143 bool
6144 ix86_function_arg_regno_p (int regno)
6146 int i;
6147 enum calling_abi call_abi;
6148 const int *parm_regs;
6150 if (TARGET_MPX && BND_REGNO_P (regno))
6151 return true;
6153 if (!TARGET_64BIT)
6155 if (TARGET_MACHO)
6156 return (regno < REGPARM_MAX
6157 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6158 else
6159 return (regno < REGPARM_MAX
6160 || (TARGET_MMX && MMX_REGNO_P (regno)
6161 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6162 || (TARGET_SSE && SSE_REGNO_P (regno)
6163 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6166 if (TARGET_SSE && SSE_REGNO_P (regno)
6167 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6168 return true;
6170 /* TODO: The function should depend on current function ABI but
6171 builtins.c would need updating then. Therefore we use the
6172 default ABI. */
6173 call_abi = ix86_cfun_abi ();
6175 /* RAX is used as hidden argument to va_arg functions. */
6176 if (call_abi == SYSV_ABI && regno == AX_REG)
6177 return true;
6179 if (call_abi == MS_ABI)
6180 parm_regs = x86_64_ms_abi_int_parameter_registers;
6181 else
6182 parm_regs = x86_64_int_parameter_registers;
6184 for (i = 0; i < (call_abi == MS_ABI
6185 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6186 if (regno == parm_regs[i])
6187 return true;
6188 return false;
6191 /* Return if we do not know how to pass TYPE solely in registers. */
6193 static bool
6194 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6196 if (must_pass_in_stack_var_size_or_pad (mode, type))
6197 return true;
6199 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6200 The layout_type routine is crafty and tries to trick us into passing
6201 currently unsupported vector types on the stack by using TImode. */
6202 return (!TARGET_64BIT && mode == TImode
6203 && type && TREE_CODE (type) != VECTOR_TYPE);
6206 /* It returns the size, in bytes, of the area reserved for arguments passed
6207 in registers for the function represented by fndecl dependent to the used
6208 abi format. */
6210 ix86_reg_parm_stack_space (const_tree fndecl)
6212 enum calling_abi call_abi = SYSV_ABI;
6213 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6214 call_abi = ix86_function_abi (fndecl);
6215 else
6216 call_abi = ix86_function_type_abi (fndecl);
6217 if (TARGET_64BIT && call_abi == MS_ABI)
6218 return 32;
6219 return 0;
6222 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6223 call abi used. */
6224 enum calling_abi
6225 ix86_function_type_abi (const_tree fntype)
6227 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6229 enum calling_abi abi = ix86_abi;
6230 if (abi == SYSV_ABI)
6232 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6234 if (TARGET_X32)
6236 static bool warned = false;
6237 if (!warned)
6239 error ("X32 does not support ms_abi attribute");
6240 warned = true;
6243 abi = MS_ABI;
6246 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6247 abi = SYSV_ABI;
6248 return abi;
6250 return ix86_abi;
6253 /* We add this as a workaround in order to use libc_has_function
6254 hook in i386.md. */
6255 bool
6256 ix86_libc_has_function (enum function_class fn_class)
6258 return targetm.libc_has_function (fn_class);
6261 static bool
6262 ix86_function_ms_hook_prologue (const_tree fn)
6264 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6266 if (decl_function_context (fn) != NULL_TREE)
6267 error_at (DECL_SOURCE_LOCATION (fn),
6268 "ms_hook_prologue is not compatible with nested function");
6269 else
6270 return true;
6272 return false;
6275 static enum calling_abi
6276 ix86_function_abi (const_tree fndecl)
6278 if (! fndecl)
6279 return ix86_abi;
6280 return ix86_function_type_abi (TREE_TYPE (fndecl));
6283 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6284 call abi used. */
6285 enum calling_abi
6286 ix86_cfun_abi (void)
6288 if (! cfun)
6289 return ix86_abi;
6290 return cfun->machine->call_abi;
6293 /* Write the extra assembler code needed to declare a function properly. */
6295 void
6296 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6297 tree decl)
6299 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6301 if (is_ms_hook)
6303 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6304 unsigned int filler_cc = 0xcccccccc;
6306 for (i = 0; i < filler_count; i += 4)
6307 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6310 #ifdef SUBTARGET_ASM_UNWIND_INIT
6311 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6312 #endif
6314 ASM_OUTPUT_LABEL (asm_out_file, fname);
6316 /* Output magic byte marker, if hot-patch attribute is set. */
6317 if (is_ms_hook)
6319 if (TARGET_64BIT)
6321 /* leaq [%rsp + 0], %rsp */
6322 asm_fprintf (asm_out_file, ASM_BYTE
6323 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6325 else
6327 /* movl.s %edi, %edi
6328 push %ebp
6329 movl.s %esp, %ebp */
6330 asm_fprintf (asm_out_file, ASM_BYTE
6331 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6336 /* regclass.c */
6337 extern void init_regs (void);
6339 /* Implementation of call abi switching target hook. Specific to FNDECL
6340 the specific call register sets are set. See also
6341 ix86_conditional_register_usage for more details. */
6342 void
6343 ix86_call_abi_override (const_tree fndecl)
6345 if (fndecl == NULL_TREE)
6346 cfun->machine->call_abi = ix86_abi;
6347 else
6348 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6351 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6352 expensive re-initialization of init_regs each time we switch function context
6353 since this is needed only during RTL expansion. */
6354 static void
6355 ix86_maybe_switch_abi (void)
6357 if (TARGET_64BIT &&
6358 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6359 reinit_regs ();
6362 /* Return 1 if pseudo register should be created and used to hold
6363 GOT address for PIC code. */
6364 bool
6365 ix86_use_pseudo_pic_reg (void)
6367 if ((TARGET_64BIT
6368 && (ix86_cmodel == CM_SMALL_PIC
6369 || TARGET_PECOFF))
6370 || !flag_pic)
6371 return false;
6372 return true;
6375 /* Initialize large model PIC register. */
6377 static void
6378 ix86_init_large_pic_reg (unsigned int tmp_regno)
6380 rtx_code_label *label;
6381 rtx tmp_reg;
6383 gcc_assert (Pmode == DImode);
6384 label = gen_label_rtx ();
6385 emit_label (label);
6386 LABEL_PRESERVE_P (label) = 1;
6387 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
6388 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
6389 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6390 label));
6391 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6392 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6393 pic_offset_table_rtx, tmp_reg));
6396 /* Create and initialize PIC register if required. */
6397 static void
6398 ix86_init_pic_reg (void)
6400 edge entry_edge;
6401 rtx_insn *seq;
6403 if (!ix86_use_pseudo_pic_reg ())
6404 return;
6406 start_sequence ();
6408 if (TARGET_64BIT)
6410 if (ix86_cmodel == CM_LARGE_PIC)
6411 ix86_init_large_pic_reg (R11_REG);
6412 else
6413 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6415 else
6417 /* If there is future mcount call in the function it is more profitable
6418 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
6419 rtx reg = crtl->profile
6420 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
6421 : pic_offset_table_rtx;
6422 rtx_insn *insn = emit_insn (gen_set_got (reg));
6423 RTX_FRAME_RELATED_P (insn) = 1;
6424 if (crtl->profile)
6425 emit_move_insn (pic_offset_table_rtx, reg);
6426 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6429 seq = get_insns ();
6430 end_sequence ();
6432 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6433 insert_insn_on_edge (seq, entry_edge);
6434 commit_one_edge_insertion (entry_edge);
6437 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6438 for a call to a function whose data type is FNTYPE.
6439 For a library call, FNTYPE is 0. */
6441 void
6442 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6443 tree fntype, /* tree ptr for function decl */
6444 rtx libname, /* SYMBOL_REF of library name or 0 */
6445 tree fndecl,
6446 int caller)
6448 struct cgraph_local_info *i = NULL;
6449 struct cgraph_node *target = NULL;
6451 memset (cum, 0, sizeof (*cum));
6453 if (fndecl)
6455 target = cgraph_node::get (fndecl);
6456 if (target)
6458 target = target->function_symbol ();
6459 i = cgraph_node::local_info (target->decl);
6460 cum->call_abi = ix86_function_abi (target->decl);
6462 else
6463 cum->call_abi = ix86_function_abi (fndecl);
6465 else
6466 cum->call_abi = ix86_function_type_abi (fntype);
6468 cum->caller = caller;
6470 /* Set up the number of registers to use for passing arguments. */
6471 cum->nregs = ix86_regparm;
6472 if (TARGET_64BIT)
6474 cum->nregs = (cum->call_abi == SYSV_ABI
6475 ? X86_64_REGPARM_MAX
6476 : X86_64_MS_REGPARM_MAX);
6478 if (TARGET_SSE)
6480 cum->sse_nregs = SSE_REGPARM_MAX;
6481 if (TARGET_64BIT)
6483 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6484 ? X86_64_SSE_REGPARM_MAX
6485 : X86_64_MS_SSE_REGPARM_MAX);
6488 if (TARGET_MMX)
6489 cum->mmx_nregs = MMX_REGPARM_MAX;
6490 cum->warn_avx512f = true;
6491 cum->warn_avx = true;
6492 cum->warn_sse = true;
6493 cum->warn_mmx = true;
6495 /* Because type might mismatch in between caller and callee, we need to
6496 use actual type of function for local calls.
6497 FIXME: cgraph_analyze can be told to actually record if function uses
6498 va_start so for local functions maybe_vaarg can be made aggressive
6499 helping K&R code.
6500 FIXME: once typesytem is fixed, we won't need this code anymore. */
6501 if (i && i->local && i->can_change_signature)
6502 fntype = TREE_TYPE (target->decl);
6503 cum->stdarg = stdarg_p (fntype);
6504 cum->maybe_vaarg = (fntype
6505 ? (!prototype_p (fntype) || stdarg_p (fntype))
6506 : !libname);
6508 cum->bnd_regno = FIRST_BND_REG;
6509 cum->bnds_in_bt = 0;
6510 cum->force_bnd_pass = 0;
6511 cum->decl = fndecl;
6513 if (!TARGET_64BIT)
6515 /* If there are variable arguments, then we won't pass anything
6516 in registers in 32-bit mode. */
6517 if (stdarg_p (fntype))
6519 cum->nregs = 0;
6520 cum->sse_nregs = 0;
6521 cum->mmx_nregs = 0;
6522 cum->warn_avx512f = false;
6523 cum->warn_avx = false;
6524 cum->warn_sse = false;
6525 cum->warn_mmx = false;
6526 return;
6529 /* Use ecx and edx registers if function has fastcall attribute,
6530 else look for regparm information. */
6531 if (fntype)
6533 unsigned int ccvt = ix86_get_callcvt (fntype);
6534 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6536 cum->nregs = 1;
6537 cum->fastcall = 1; /* Same first register as in fastcall. */
6539 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6541 cum->nregs = 2;
6542 cum->fastcall = 1;
6544 else
6545 cum->nregs = ix86_function_regparm (fntype, fndecl);
6548 /* Set up the number of SSE registers used for passing SFmode
6549 and DFmode arguments. Warn for mismatching ABI. */
6550 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6554 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6555 But in the case of vector types, it is some vector mode.
6557 When we have only some of our vector isa extensions enabled, then there
6558 are some modes for which vector_mode_supported_p is false. For these
6559 modes, the generic vector support in gcc will choose some non-vector mode
6560 in order to implement the type. By computing the natural mode, we'll
6561 select the proper ABI location for the operand and not depend on whatever
6562 the middle-end decides to do with these vector types.
6564 The midde-end can't deal with the vector types > 16 bytes. In this
6565 case, we return the original mode and warn ABI change if CUM isn't
6566 NULL.
6568 If INT_RETURN is true, warn ABI change if the vector mode isn't
6569 available for function return value. */
6571 static machine_mode
6572 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6573 bool in_return)
6575 machine_mode mode = TYPE_MODE (type);
6577 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6579 HOST_WIDE_INT size = int_size_in_bytes (type);
6580 if ((size == 8 || size == 16 || size == 32 || size == 64)
6581 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6582 && TYPE_VECTOR_SUBPARTS (type) > 1)
6584 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6586 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6587 mode = MIN_MODE_VECTOR_FLOAT;
6588 else
6589 mode = MIN_MODE_VECTOR_INT;
6591 /* Get the mode which has this inner mode and number of units. */
6592 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6593 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6594 && GET_MODE_INNER (mode) == innermode)
6596 if (size == 64 && !TARGET_AVX512F)
6598 static bool warnedavx512f;
6599 static bool warnedavx512f_ret;
6601 if (cum && cum->warn_avx512f && !warnedavx512f)
6603 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6604 "without AVX512F enabled changes the ABI"))
6605 warnedavx512f = true;
6607 else if (in_return && !warnedavx512f_ret)
6609 if (warning (OPT_Wpsabi, "AVX512F vector return "
6610 "without AVX512F enabled changes the ABI"))
6611 warnedavx512f_ret = true;
6614 return TYPE_MODE (type);
6616 else if (size == 32 && !TARGET_AVX)
6618 static bool warnedavx;
6619 static bool warnedavx_ret;
6621 if (cum && cum->warn_avx && !warnedavx)
6623 if (warning (OPT_Wpsabi, "AVX vector argument "
6624 "without AVX enabled changes the ABI"))
6625 warnedavx = true;
6627 else if (in_return && !warnedavx_ret)
6629 if (warning (OPT_Wpsabi, "AVX vector return "
6630 "without AVX enabled changes the ABI"))
6631 warnedavx_ret = true;
6634 return TYPE_MODE (type);
6636 else if (((size == 8 && TARGET_64BIT) || size == 16)
6637 && !TARGET_SSE)
6639 static bool warnedsse;
6640 static bool warnedsse_ret;
6642 if (cum && cum->warn_sse && !warnedsse)
6644 if (warning (OPT_Wpsabi, "SSE vector argument "
6645 "without SSE enabled changes the ABI"))
6646 warnedsse = true;
6648 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6650 if (warning (OPT_Wpsabi, "SSE vector return "
6651 "without SSE enabled changes the ABI"))
6652 warnedsse_ret = true;
6655 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6657 static bool warnedmmx;
6658 static bool warnedmmx_ret;
6660 if (cum && cum->warn_mmx && !warnedmmx)
6662 if (warning (OPT_Wpsabi, "MMX vector argument "
6663 "without MMX enabled changes the ABI"))
6664 warnedmmx = true;
6666 else if (in_return && !warnedmmx_ret)
6668 if (warning (OPT_Wpsabi, "MMX vector return "
6669 "without MMX enabled changes the ABI"))
6670 warnedmmx_ret = true;
6673 return mode;
6676 gcc_unreachable ();
6680 return mode;
6683 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6684 this may not agree with the mode that the type system has chosen for the
6685 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6686 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6688 static rtx
6689 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
6690 unsigned int regno)
6692 rtx tmp;
6694 if (orig_mode != BLKmode)
6695 tmp = gen_rtx_REG (orig_mode, regno);
6696 else
6698 tmp = gen_rtx_REG (mode, regno);
6699 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6700 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6703 return tmp;
6706 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6707 of this code is to classify each 8bytes of incoming argument by the register
6708 class and assign registers accordingly. */
6710 /* Return the union class of CLASS1 and CLASS2.
6711 See the x86-64 PS ABI for details. */
6713 static enum x86_64_reg_class
6714 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6716 /* Rule #1: If both classes are equal, this is the resulting class. */
6717 if (class1 == class2)
6718 return class1;
6720 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6721 the other class. */
6722 if (class1 == X86_64_NO_CLASS)
6723 return class2;
6724 if (class2 == X86_64_NO_CLASS)
6725 return class1;
6727 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6728 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6729 return X86_64_MEMORY_CLASS;
6731 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6732 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6733 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6734 return X86_64_INTEGERSI_CLASS;
6735 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6736 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6737 return X86_64_INTEGER_CLASS;
6739 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6740 MEMORY is used. */
6741 if (class1 == X86_64_X87_CLASS
6742 || class1 == X86_64_X87UP_CLASS
6743 || class1 == X86_64_COMPLEX_X87_CLASS
6744 || class2 == X86_64_X87_CLASS
6745 || class2 == X86_64_X87UP_CLASS
6746 || class2 == X86_64_COMPLEX_X87_CLASS)
6747 return X86_64_MEMORY_CLASS;
6749 /* Rule #6: Otherwise class SSE is used. */
6750 return X86_64_SSE_CLASS;
6753 /* Classify the argument of type TYPE and mode MODE.
6754 CLASSES will be filled by the register class used to pass each word
6755 of the operand. The number of words is returned. In case the parameter
6756 should be passed in memory, 0 is returned. As a special case for zero
6757 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6759 BIT_OFFSET is used internally for handling records and specifies offset
6760 of the offset in bits modulo 512 to avoid overflow cases.
6762 See the x86-64 PS ABI for details.
6765 static int
6766 classify_argument (machine_mode mode, const_tree type,
6767 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6769 HOST_WIDE_INT bytes =
6770 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6771 int words
6772 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6774 /* Variable sized entities are always passed/returned in memory. */
6775 if (bytes < 0)
6776 return 0;
6778 if (mode != VOIDmode
6779 && targetm.calls.must_pass_in_stack (mode, type))
6780 return 0;
6782 if (type && AGGREGATE_TYPE_P (type))
6784 int i;
6785 tree field;
6786 enum x86_64_reg_class subclasses[MAX_CLASSES];
6788 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6789 if (bytes > 64)
6790 return 0;
6792 for (i = 0; i < words; i++)
6793 classes[i] = X86_64_NO_CLASS;
6795 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6796 signalize memory class, so handle it as special case. */
6797 if (!words)
6799 classes[0] = X86_64_NO_CLASS;
6800 return 1;
6803 /* Classify each field of record and merge classes. */
6804 switch (TREE_CODE (type))
6806 case RECORD_TYPE:
6807 /* And now merge the fields of structure. */
6808 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6810 if (TREE_CODE (field) == FIELD_DECL)
6812 int num;
6814 if (TREE_TYPE (field) == error_mark_node)
6815 continue;
6817 /* Bitfields are always classified as integer. Handle them
6818 early, since later code would consider them to be
6819 misaligned integers. */
6820 if (DECL_BIT_FIELD (field))
6822 for (i = (int_bit_position (field)
6823 + (bit_offset % 64)) / 8 / 8;
6824 i < ((int_bit_position (field) + (bit_offset % 64))
6825 + tree_to_shwi (DECL_SIZE (field))
6826 + 63) / 8 / 8; i++)
6827 classes[i] =
6828 merge_classes (X86_64_INTEGER_CLASS,
6829 classes[i]);
6831 else
6833 int pos;
6835 type = TREE_TYPE (field);
6837 /* Flexible array member is ignored. */
6838 if (TYPE_MODE (type) == BLKmode
6839 && TREE_CODE (type) == ARRAY_TYPE
6840 && TYPE_SIZE (type) == NULL_TREE
6841 && TYPE_DOMAIN (type) != NULL_TREE
6842 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6843 == NULL_TREE))
6845 static bool warned;
6847 if (!warned && warn_psabi)
6849 warned = true;
6850 inform (input_location,
6851 "the ABI of passing struct with"
6852 " a flexible array member has"
6853 " changed in GCC 4.4");
6855 continue;
6857 num = classify_argument (TYPE_MODE (type), type,
6858 subclasses,
6859 (int_bit_position (field)
6860 + bit_offset) % 512);
6861 if (!num)
6862 return 0;
6863 pos = (int_bit_position (field)
6864 + (bit_offset % 64)) / 8 / 8;
6865 for (i = 0; i < num && (i + pos) < words; i++)
6866 classes[i + pos] =
6867 merge_classes (subclasses[i], classes[i + pos]);
6871 break;
6873 case ARRAY_TYPE:
6874 /* Arrays are handled as small records. */
6876 int num;
6877 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6878 TREE_TYPE (type), subclasses, bit_offset);
6879 if (!num)
6880 return 0;
6882 /* The partial classes are now full classes. */
6883 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6884 subclasses[0] = X86_64_SSE_CLASS;
6885 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6886 && !((bit_offset % 64) == 0 && bytes == 4))
6887 subclasses[0] = X86_64_INTEGER_CLASS;
6889 for (i = 0; i < words; i++)
6890 classes[i] = subclasses[i % num];
6892 break;
6894 case UNION_TYPE:
6895 case QUAL_UNION_TYPE:
6896 /* Unions are similar to RECORD_TYPE but offset is always 0.
6898 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6900 if (TREE_CODE (field) == FIELD_DECL)
6902 int num;
6904 if (TREE_TYPE (field) == error_mark_node)
6905 continue;
6907 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6908 TREE_TYPE (field), subclasses,
6909 bit_offset);
6910 if (!num)
6911 return 0;
6912 for (i = 0; i < num && i < words; i++)
6913 classes[i] = merge_classes (subclasses[i], classes[i]);
6916 break;
6918 default:
6919 gcc_unreachable ();
6922 if (words > 2)
6924 /* When size > 16 bytes, if the first one isn't
6925 X86_64_SSE_CLASS or any other ones aren't
6926 X86_64_SSEUP_CLASS, everything should be passed in
6927 memory. */
6928 if (classes[0] != X86_64_SSE_CLASS)
6929 return 0;
6931 for (i = 1; i < words; i++)
6932 if (classes[i] != X86_64_SSEUP_CLASS)
6933 return 0;
6936 /* Final merger cleanup. */
6937 for (i = 0; i < words; i++)
6939 /* If one class is MEMORY, everything should be passed in
6940 memory. */
6941 if (classes[i] == X86_64_MEMORY_CLASS)
6942 return 0;
6944 /* The X86_64_SSEUP_CLASS should be always preceded by
6945 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6946 if (classes[i] == X86_64_SSEUP_CLASS
6947 && classes[i - 1] != X86_64_SSE_CLASS
6948 && classes[i - 1] != X86_64_SSEUP_CLASS)
6950 /* The first one should never be X86_64_SSEUP_CLASS. */
6951 gcc_assert (i != 0);
6952 classes[i] = X86_64_SSE_CLASS;
6955 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6956 everything should be passed in memory. */
6957 if (classes[i] == X86_64_X87UP_CLASS
6958 && (classes[i - 1] != X86_64_X87_CLASS))
6960 static bool warned;
6962 /* The first one should never be X86_64_X87UP_CLASS. */
6963 gcc_assert (i != 0);
6964 if (!warned && warn_psabi)
6966 warned = true;
6967 inform (input_location,
6968 "the ABI of passing union with long double"
6969 " has changed in GCC 4.4");
6971 return 0;
6974 return words;
6977 /* Compute alignment needed. We align all types to natural boundaries with
6978 exception of XFmode that is aligned to 64bits. */
6979 if (mode != VOIDmode && mode != BLKmode)
6981 int mode_alignment = GET_MODE_BITSIZE (mode);
6983 if (mode == XFmode)
6984 mode_alignment = 128;
6985 else if (mode == XCmode)
6986 mode_alignment = 256;
6987 if (COMPLEX_MODE_P (mode))
6988 mode_alignment /= 2;
6989 /* Misaligned fields are always returned in memory. */
6990 if (bit_offset % mode_alignment)
6991 return 0;
6994 /* for V1xx modes, just use the base mode */
6995 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6996 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6997 mode = GET_MODE_INNER (mode);
6999 /* Classification of atomic types. */
7000 switch (mode)
7002 case SDmode:
7003 case DDmode:
7004 classes[0] = X86_64_SSE_CLASS;
7005 return 1;
7006 case TDmode:
7007 classes[0] = X86_64_SSE_CLASS;
7008 classes[1] = X86_64_SSEUP_CLASS;
7009 return 2;
7010 case DImode:
7011 case SImode:
7012 case HImode:
7013 case QImode:
7014 case CSImode:
7015 case CHImode:
7016 case CQImode:
7018 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
7020 /* Analyze last 128 bits only. */
7021 size = (size - 1) & 0x7f;
7023 if (size < 32)
7025 classes[0] = X86_64_INTEGERSI_CLASS;
7026 return 1;
7028 else if (size < 64)
7030 classes[0] = X86_64_INTEGER_CLASS;
7031 return 1;
7033 else if (size < 64+32)
7035 classes[0] = X86_64_INTEGER_CLASS;
7036 classes[1] = X86_64_INTEGERSI_CLASS;
7037 return 2;
7039 else if (size < 64+64)
7041 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7042 return 2;
7044 else
7045 gcc_unreachable ();
7047 case CDImode:
7048 case TImode:
7049 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7050 return 2;
7051 case COImode:
7052 case OImode:
7053 /* OImode shouldn't be used directly. */
7054 gcc_unreachable ();
7055 case CTImode:
7056 return 0;
7057 case SFmode:
7058 if (!(bit_offset % 64))
7059 classes[0] = X86_64_SSESF_CLASS;
7060 else
7061 classes[0] = X86_64_SSE_CLASS;
7062 return 1;
7063 case DFmode:
7064 classes[0] = X86_64_SSEDF_CLASS;
7065 return 1;
7066 case XFmode:
7067 classes[0] = X86_64_X87_CLASS;
7068 classes[1] = X86_64_X87UP_CLASS;
7069 return 2;
7070 case TFmode:
7071 classes[0] = X86_64_SSE_CLASS;
7072 classes[1] = X86_64_SSEUP_CLASS;
7073 return 2;
7074 case SCmode:
7075 classes[0] = X86_64_SSE_CLASS;
7076 if (!(bit_offset % 64))
7077 return 1;
7078 else
7080 static bool warned;
7082 if (!warned && warn_psabi)
7084 warned = true;
7085 inform (input_location,
7086 "the ABI of passing structure with complex float"
7087 " member has changed in GCC 4.4");
7089 classes[1] = X86_64_SSESF_CLASS;
7090 return 2;
7092 case DCmode:
7093 classes[0] = X86_64_SSEDF_CLASS;
7094 classes[1] = X86_64_SSEDF_CLASS;
7095 return 2;
7096 case XCmode:
7097 classes[0] = X86_64_COMPLEX_X87_CLASS;
7098 return 1;
7099 case TCmode:
7100 /* This modes is larger than 16 bytes. */
7101 return 0;
7102 case V8SFmode:
7103 case V8SImode:
7104 case V32QImode:
7105 case V16HImode:
7106 case V4DFmode:
7107 case V4DImode:
7108 classes[0] = X86_64_SSE_CLASS;
7109 classes[1] = X86_64_SSEUP_CLASS;
7110 classes[2] = X86_64_SSEUP_CLASS;
7111 classes[3] = X86_64_SSEUP_CLASS;
7112 return 4;
7113 case V8DFmode:
7114 case V16SFmode:
7115 case V8DImode:
7116 case V16SImode:
7117 case V32HImode:
7118 case V64QImode:
7119 classes[0] = X86_64_SSE_CLASS;
7120 classes[1] = X86_64_SSEUP_CLASS;
7121 classes[2] = X86_64_SSEUP_CLASS;
7122 classes[3] = X86_64_SSEUP_CLASS;
7123 classes[4] = X86_64_SSEUP_CLASS;
7124 classes[5] = X86_64_SSEUP_CLASS;
7125 classes[6] = X86_64_SSEUP_CLASS;
7126 classes[7] = X86_64_SSEUP_CLASS;
7127 return 8;
7128 case V4SFmode:
7129 case V4SImode:
7130 case V16QImode:
7131 case V8HImode:
7132 case V2DFmode:
7133 case V2DImode:
7134 classes[0] = X86_64_SSE_CLASS;
7135 classes[1] = X86_64_SSEUP_CLASS;
7136 return 2;
7137 case V1TImode:
7138 case V1DImode:
7139 case V2SFmode:
7140 case V2SImode:
7141 case V4HImode:
7142 case V8QImode:
7143 classes[0] = X86_64_SSE_CLASS;
7144 return 1;
7145 case BLKmode:
7146 case VOIDmode:
7147 return 0;
7148 default:
7149 gcc_assert (VECTOR_MODE_P (mode));
7151 if (bytes > 16)
7152 return 0;
7154 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7156 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7157 classes[0] = X86_64_INTEGERSI_CLASS;
7158 else
7159 classes[0] = X86_64_INTEGER_CLASS;
7160 classes[1] = X86_64_INTEGER_CLASS;
7161 return 1 + (bytes > 8);
7165 /* Examine the argument and return set number of register required in each
7166 class. Return true iff parameter should be passed in memory. */
7168 static bool
7169 examine_argument (machine_mode mode, const_tree type, int in_return,
7170 int *int_nregs, int *sse_nregs)
7172 enum x86_64_reg_class regclass[MAX_CLASSES];
7173 int n = classify_argument (mode, type, regclass, 0);
7175 *int_nregs = 0;
7176 *sse_nregs = 0;
7178 if (!n)
7179 return true;
7180 for (n--; n >= 0; n--)
7181 switch (regclass[n])
7183 case X86_64_INTEGER_CLASS:
7184 case X86_64_INTEGERSI_CLASS:
7185 (*int_nregs)++;
7186 break;
7187 case X86_64_SSE_CLASS:
7188 case X86_64_SSESF_CLASS:
7189 case X86_64_SSEDF_CLASS:
7190 (*sse_nregs)++;
7191 break;
7192 case X86_64_NO_CLASS:
7193 case X86_64_SSEUP_CLASS:
7194 break;
7195 case X86_64_X87_CLASS:
7196 case X86_64_X87UP_CLASS:
7197 case X86_64_COMPLEX_X87_CLASS:
7198 if (!in_return)
7199 return true;
7200 break;
7201 case X86_64_MEMORY_CLASS:
7202 gcc_unreachable ();
7205 return false;
7208 /* Construct container for the argument used by GCC interface. See
7209 FUNCTION_ARG for the detailed description. */
7211 static rtx
7212 construct_container (machine_mode mode, machine_mode orig_mode,
7213 const_tree type, int in_return, int nintregs, int nsseregs,
7214 const int *intreg, int sse_regno)
7216 /* The following variables hold the static issued_error state. */
7217 static bool issued_sse_arg_error;
7218 static bool issued_sse_ret_error;
7219 static bool issued_x87_ret_error;
7221 machine_mode tmpmode;
7222 int bytes =
7223 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7224 enum x86_64_reg_class regclass[MAX_CLASSES];
7225 int n;
7226 int i;
7227 int nexps = 0;
7228 int needed_sseregs, needed_intregs;
7229 rtx exp[MAX_CLASSES];
7230 rtx ret;
7232 n = classify_argument (mode, type, regclass, 0);
7233 if (!n)
7234 return NULL;
7235 if (examine_argument (mode, type, in_return, &needed_intregs,
7236 &needed_sseregs))
7237 return NULL;
7238 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7239 return NULL;
7241 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7242 some less clueful developer tries to use floating-point anyway. */
7243 if (needed_sseregs && !TARGET_SSE)
7245 if (in_return)
7247 if (!issued_sse_ret_error)
7249 error ("SSE register return with SSE disabled");
7250 issued_sse_ret_error = true;
7253 else if (!issued_sse_arg_error)
7255 error ("SSE register argument with SSE disabled");
7256 issued_sse_arg_error = true;
7258 return NULL;
7261 /* Likewise, error if the ABI requires us to return values in the
7262 x87 registers and the user specified -mno-80387. */
7263 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7264 for (i = 0; i < n; i++)
7265 if (regclass[i] == X86_64_X87_CLASS
7266 || regclass[i] == X86_64_X87UP_CLASS
7267 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7269 if (!issued_x87_ret_error)
7271 error ("x87 register return with x87 disabled");
7272 issued_x87_ret_error = true;
7274 return NULL;
7277 /* First construct simple cases. Avoid SCmode, since we want to use
7278 single register to pass this type. */
7279 if (n == 1 && mode != SCmode)
7280 switch (regclass[0])
7282 case X86_64_INTEGER_CLASS:
7283 case X86_64_INTEGERSI_CLASS:
7284 return gen_rtx_REG (mode, intreg[0]);
7285 case X86_64_SSE_CLASS:
7286 case X86_64_SSESF_CLASS:
7287 case X86_64_SSEDF_CLASS:
7288 if (mode != BLKmode)
7289 return gen_reg_or_parallel (mode, orig_mode,
7290 SSE_REGNO (sse_regno));
7291 break;
7292 case X86_64_X87_CLASS:
7293 case X86_64_COMPLEX_X87_CLASS:
7294 return gen_rtx_REG (mode, FIRST_STACK_REG);
7295 case X86_64_NO_CLASS:
7296 /* Zero sized array, struct or class. */
7297 return NULL;
7298 default:
7299 gcc_unreachable ();
7301 if (n == 2
7302 && regclass[0] == X86_64_SSE_CLASS
7303 && regclass[1] == X86_64_SSEUP_CLASS
7304 && mode != BLKmode)
7305 return gen_reg_or_parallel (mode, orig_mode,
7306 SSE_REGNO (sse_regno));
7307 if (n == 4
7308 && regclass[0] == X86_64_SSE_CLASS
7309 && regclass[1] == X86_64_SSEUP_CLASS
7310 && regclass[2] == X86_64_SSEUP_CLASS
7311 && regclass[3] == X86_64_SSEUP_CLASS
7312 && mode != BLKmode)
7313 return gen_reg_or_parallel (mode, orig_mode,
7314 SSE_REGNO (sse_regno));
7315 if (n == 8
7316 && regclass[0] == X86_64_SSE_CLASS
7317 && regclass[1] == X86_64_SSEUP_CLASS
7318 && regclass[2] == X86_64_SSEUP_CLASS
7319 && regclass[3] == X86_64_SSEUP_CLASS
7320 && regclass[4] == X86_64_SSEUP_CLASS
7321 && regclass[5] == X86_64_SSEUP_CLASS
7322 && regclass[6] == X86_64_SSEUP_CLASS
7323 && regclass[7] == X86_64_SSEUP_CLASS
7324 && mode != BLKmode)
7325 return gen_reg_or_parallel (mode, orig_mode,
7326 SSE_REGNO (sse_regno));
7327 if (n == 2
7328 && regclass[0] == X86_64_X87_CLASS
7329 && regclass[1] == X86_64_X87UP_CLASS)
7330 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7332 if (n == 2
7333 && regclass[0] == X86_64_INTEGER_CLASS
7334 && regclass[1] == X86_64_INTEGER_CLASS
7335 && (mode == CDImode || mode == TImode)
7336 && intreg[0] + 1 == intreg[1])
7337 return gen_rtx_REG (mode, intreg[0]);
7339 /* Otherwise figure out the entries of the PARALLEL. */
7340 for (i = 0; i < n; i++)
7342 int pos;
7344 switch (regclass[i])
7346 case X86_64_NO_CLASS:
7347 break;
7348 case X86_64_INTEGER_CLASS:
7349 case X86_64_INTEGERSI_CLASS:
7350 /* Merge TImodes on aligned occasions here too. */
7351 if (i * 8 + 8 > bytes)
7352 tmpmode
7353 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7354 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7355 tmpmode = SImode;
7356 else
7357 tmpmode = DImode;
7358 /* We've requested 24 bytes we
7359 don't have mode for. Use DImode. */
7360 if (tmpmode == BLKmode)
7361 tmpmode = DImode;
7362 exp [nexps++]
7363 = gen_rtx_EXPR_LIST (VOIDmode,
7364 gen_rtx_REG (tmpmode, *intreg),
7365 GEN_INT (i*8));
7366 intreg++;
7367 break;
7368 case X86_64_SSESF_CLASS:
7369 exp [nexps++]
7370 = gen_rtx_EXPR_LIST (VOIDmode,
7371 gen_rtx_REG (SFmode,
7372 SSE_REGNO (sse_regno)),
7373 GEN_INT (i*8));
7374 sse_regno++;
7375 break;
7376 case X86_64_SSEDF_CLASS:
7377 exp [nexps++]
7378 = gen_rtx_EXPR_LIST (VOIDmode,
7379 gen_rtx_REG (DFmode,
7380 SSE_REGNO (sse_regno)),
7381 GEN_INT (i*8));
7382 sse_regno++;
7383 break;
7384 case X86_64_SSE_CLASS:
7385 pos = i;
7386 switch (n)
7388 case 1:
7389 tmpmode = DImode;
7390 break;
7391 case 2:
7392 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7394 tmpmode = TImode;
7395 i++;
7397 else
7398 tmpmode = DImode;
7399 break;
7400 case 4:
7401 gcc_assert (i == 0
7402 && regclass[1] == X86_64_SSEUP_CLASS
7403 && regclass[2] == X86_64_SSEUP_CLASS
7404 && regclass[3] == X86_64_SSEUP_CLASS);
7405 tmpmode = OImode;
7406 i += 3;
7407 break;
7408 case 8:
7409 gcc_assert (i == 0
7410 && regclass[1] == X86_64_SSEUP_CLASS
7411 && regclass[2] == X86_64_SSEUP_CLASS
7412 && regclass[3] == X86_64_SSEUP_CLASS
7413 && regclass[4] == X86_64_SSEUP_CLASS
7414 && regclass[5] == X86_64_SSEUP_CLASS
7415 && regclass[6] == X86_64_SSEUP_CLASS
7416 && regclass[7] == X86_64_SSEUP_CLASS);
7417 tmpmode = XImode;
7418 i += 7;
7419 break;
7420 default:
7421 gcc_unreachable ();
7423 exp [nexps++]
7424 = gen_rtx_EXPR_LIST (VOIDmode,
7425 gen_rtx_REG (tmpmode,
7426 SSE_REGNO (sse_regno)),
7427 GEN_INT (pos*8));
7428 sse_regno++;
7429 break;
7430 default:
7431 gcc_unreachable ();
7435 /* Empty aligned struct, union or class. */
7436 if (nexps == 0)
7437 return NULL;
7439 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7440 for (i = 0; i < nexps; i++)
7441 XVECEXP (ret, 0, i) = exp [i];
7442 return ret;
7445 /* Update the data in CUM to advance over an argument of mode MODE
7446 and data type TYPE. (TYPE is null for libcalls where that information
7447 may not be available.)
7449 Return a number of integer regsiters advanced over. */
7451 static int
7452 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7453 const_tree type, HOST_WIDE_INT bytes,
7454 HOST_WIDE_INT words)
7456 int res = 0;
7457 bool error_p = NULL;
7459 switch (mode)
7461 default:
7462 break;
7464 case BLKmode:
7465 if (bytes < 0)
7466 break;
7467 /* FALLTHRU */
7469 case DImode:
7470 case SImode:
7471 case HImode:
7472 case QImode:
7473 cum->words += words;
7474 cum->nregs -= words;
7475 cum->regno += words;
7476 if (cum->nregs >= 0)
7477 res = words;
7478 if (cum->nregs <= 0)
7480 cum->nregs = 0;
7481 cum->regno = 0;
7483 break;
7485 case OImode:
7486 /* OImode shouldn't be used directly. */
7487 gcc_unreachable ();
7489 case DFmode:
7490 if (cum->float_in_sse == -1)
7491 error_p = 1;
7492 if (cum->float_in_sse < 2)
7493 break;
7494 case SFmode:
7495 if (cum->float_in_sse == -1)
7496 error_p = 1;
7497 if (cum->float_in_sse < 1)
7498 break;
7499 /* FALLTHRU */
7501 case V8SFmode:
7502 case V8SImode:
7503 case V64QImode:
7504 case V32HImode:
7505 case V16SImode:
7506 case V8DImode:
7507 case V16SFmode:
7508 case V8DFmode:
7509 case V32QImode:
7510 case V16HImode:
7511 case V4DFmode:
7512 case V4DImode:
7513 case TImode:
7514 case V16QImode:
7515 case V8HImode:
7516 case V4SImode:
7517 case V2DImode:
7518 case V4SFmode:
7519 case V2DFmode:
7520 if (!type || !AGGREGATE_TYPE_P (type))
7522 cum->sse_words += words;
7523 cum->sse_nregs -= 1;
7524 cum->sse_regno += 1;
7525 if (cum->sse_nregs <= 0)
7527 cum->sse_nregs = 0;
7528 cum->sse_regno = 0;
7531 break;
7533 case V8QImode:
7534 case V4HImode:
7535 case V2SImode:
7536 case V2SFmode:
7537 case V1TImode:
7538 case V1DImode:
7539 if (!type || !AGGREGATE_TYPE_P (type))
7541 cum->mmx_words += words;
7542 cum->mmx_nregs -= 1;
7543 cum->mmx_regno += 1;
7544 if (cum->mmx_nregs <= 0)
7546 cum->mmx_nregs = 0;
7547 cum->mmx_regno = 0;
7550 break;
7552 if (error_p)
7554 cum->float_in_sse = 0;
7555 error ("calling %qD with SSE calling convention without "
7556 "SSE/SSE2 enabled", cum->decl);
7557 sorry ("this is a GCC bug that can be worked around by adding "
7558 "attribute used to function called");
7561 return res;
7564 static int
7565 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
7566 const_tree type, HOST_WIDE_INT words, bool named)
7568 int int_nregs, sse_nregs;
7570 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7571 if (!named && (VALID_AVX512F_REG_MODE (mode)
7572 || VALID_AVX256_REG_MODE (mode)))
7573 return 0;
7575 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7576 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7578 cum->nregs -= int_nregs;
7579 cum->sse_nregs -= sse_nregs;
7580 cum->regno += int_nregs;
7581 cum->sse_regno += sse_nregs;
7582 return int_nregs;
7584 else
7586 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7587 cum->words = (cum->words + align - 1) & ~(align - 1);
7588 cum->words += words;
7589 return 0;
7593 static int
7594 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7595 HOST_WIDE_INT words)
7597 /* Otherwise, this should be passed indirect. */
7598 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7600 cum->words += words;
7601 if (cum->nregs > 0)
7603 cum->nregs -= 1;
7604 cum->regno += 1;
7605 return 1;
7607 return 0;
7610 /* Update the data in CUM to advance over an argument of mode MODE and
7611 data type TYPE. (TYPE is null for libcalls where that information
7612 may not be available.) */
7614 static void
7615 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7616 const_tree type, bool named)
7618 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7619 HOST_WIDE_INT bytes, words;
7620 int nregs;
7622 if (mode == BLKmode)
7623 bytes = int_size_in_bytes (type);
7624 else
7625 bytes = GET_MODE_SIZE (mode);
7626 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7628 if (type)
7629 mode = type_natural_mode (type, NULL, false);
7631 if ((type && POINTER_BOUNDS_TYPE_P (type))
7632 || POINTER_BOUNDS_MODE_P (mode))
7634 /* If we pass bounds in BT then just update remained bounds count. */
7635 if (cum->bnds_in_bt)
7637 cum->bnds_in_bt--;
7638 return;
7641 /* Update remained number of bounds to force. */
7642 if (cum->force_bnd_pass)
7643 cum->force_bnd_pass--;
7645 cum->bnd_regno++;
7647 return;
7650 /* The first arg not going to Bounds Tables resets this counter. */
7651 cum->bnds_in_bt = 0;
7652 /* For unnamed args we always pass bounds to avoid bounds mess when
7653 passed and received types do not match. If bounds do not follow
7654 unnamed arg, still pretend required number of bounds were passed. */
7655 if (cum->force_bnd_pass)
7657 cum->bnd_regno += cum->force_bnd_pass;
7658 cum->force_bnd_pass = 0;
7661 if (TARGET_64BIT)
7663 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
7665 if (call_abi == MS_ABI)
7666 nregs = function_arg_advance_ms_64 (cum, bytes, words);
7667 else
7668 nregs = function_arg_advance_64 (cum, mode, type, words, named);
7670 else
7671 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
7673 /* For stdarg we expect bounds to be passed for each value passed
7674 in register. */
7675 if (cum->stdarg)
7676 cum->force_bnd_pass = nregs;
7677 /* For pointers passed in memory we expect bounds passed in Bounds
7678 Table. */
7679 if (!nregs)
7680 cum->bnds_in_bt = chkp_type_bounds_count (type);
7683 /* Define where to put the arguments to a function.
7684 Value is zero to push the argument on the stack,
7685 or a hard register in which to store the argument.
7687 MODE is the argument's machine mode.
7688 TYPE is the data type of the argument (as a tree).
7689 This is null for libcalls where that information may
7690 not be available.
7691 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7692 the preceding args and about the function being called.
7693 NAMED is nonzero if this argument is a named parameter
7694 (otherwise it is an extra parameter matching an ellipsis). */
7696 static rtx
7697 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7698 machine_mode orig_mode, const_tree type,
7699 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7701 bool error_p = false;
7702 /* Avoid the AL settings for the Unix64 ABI. */
7703 if (mode == VOIDmode)
7704 return constm1_rtx;
7706 switch (mode)
7708 default:
7709 break;
7711 case BLKmode:
7712 if (bytes < 0)
7713 break;
7714 /* FALLTHRU */
7715 case DImode:
7716 case SImode:
7717 case HImode:
7718 case QImode:
7719 if (words <= cum->nregs)
7721 int regno = cum->regno;
7723 /* Fastcall allocates the first two DWORD (SImode) or
7724 smaller arguments to ECX and EDX if it isn't an
7725 aggregate type . */
7726 if (cum->fastcall)
7728 if (mode == BLKmode
7729 || mode == DImode
7730 || (type && AGGREGATE_TYPE_P (type)))
7731 break;
7733 /* ECX not EAX is the first allocated register. */
7734 if (regno == AX_REG)
7735 regno = CX_REG;
7737 return gen_rtx_REG (mode, regno);
7739 break;
7741 case DFmode:
7742 if (cum->float_in_sse == -1)
7743 error_p = 1;
7744 if (cum->float_in_sse < 2)
7745 break;
7746 case SFmode:
7747 if (cum->float_in_sse == -1)
7748 error_p = 1;
7749 if (cum->float_in_sse < 1)
7750 break;
7751 /* FALLTHRU */
7752 case TImode:
7753 /* In 32bit, we pass TImode in xmm registers. */
7754 case V16QImode:
7755 case V8HImode:
7756 case V4SImode:
7757 case V2DImode:
7758 case V4SFmode:
7759 case V2DFmode:
7760 if (!type || !AGGREGATE_TYPE_P (type))
7762 if (cum->sse_nregs)
7763 return gen_reg_or_parallel (mode, orig_mode,
7764 cum->sse_regno + FIRST_SSE_REG);
7766 break;
7768 case OImode:
7769 case XImode:
7770 /* OImode and XImode shouldn't be used directly. */
7771 gcc_unreachable ();
7773 case V64QImode:
7774 case V32HImode:
7775 case V16SImode:
7776 case V8DImode:
7777 case V16SFmode:
7778 case V8DFmode:
7779 case V8SFmode:
7780 case V8SImode:
7781 case V32QImode:
7782 case V16HImode:
7783 case V4DFmode:
7784 case V4DImode:
7785 if (!type || !AGGREGATE_TYPE_P (type))
7787 if (cum->sse_nregs)
7788 return gen_reg_or_parallel (mode, orig_mode,
7789 cum->sse_regno + FIRST_SSE_REG);
7791 break;
7793 case V8QImode:
7794 case V4HImode:
7795 case V2SImode:
7796 case V2SFmode:
7797 case V1TImode:
7798 case V1DImode:
7799 if (!type || !AGGREGATE_TYPE_P (type))
7801 if (cum->mmx_nregs)
7802 return gen_reg_or_parallel (mode, orig_mode,
7803 cum->mmx_regno + FIRST_MMX_REG);
7805 break;
7807 if (error_p)
7809 cum->float_in_sse = 0;
7810 error ("calling %qD with SSE calling convention without "
7811 "SSE/SSE2 enabled", cum->decl);
7812 sorry ("this is a GCC bug that can be worked around by adding "
7813 "attribute used to function called");
7816 return NULL_RTX;
7819 static rtx
7820 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7821 machine_mode orig_mode, const_tree type, bool named)
7823 /* Handle a hidden AL argument containing number of registers
7824 for varargs x86-64 functions. */
7825 if (mode == VOIDmode)
7826 return GEN_INT (cum->maybe_vaarg
7827 ? (cum->sse_nregs < 0
7828 ? X86_64_SSE_REGPARM_MAX
7829 : cum->sse_regno)
7830 : -1);
7832 switch (mode)
7834 default:
7835 break;
7837 case V8SFmode:
7838 case V8SImode:
7839 case V32QImode:
7840 case V16HImode:
7841 case V4DFmode:
7842 case V4DImode:
7843 case V16SFmode:
7844 case V16SImode:
7845 case V64QImode:
7846 case V32HImode:
7847 case V8DFmode:
7848 case V8DImode:
7849 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7850 if (!named)
7851 return NULL;
7852 break;
7855 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7856 cum->sse_nregs,
7857 &x86_64_int_parameter_registers [cum->regno],
7858 cum->sse_regno);
7861 static rtx
7862 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7863 machine_mode orig_mode, bool named,
7864 HOST_WIDE_INT bytes)
7866 unsigned int regno;
7868 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7869 We use value of -2 to specify that current function call is MSABI. */
7870 if (mode == VOIDmode)
7871 return GEN_INT (-2);
7873 /* If we've run out of registers, it goes on the stack. */
7874 if (cum->nregs == 0)
7875 return NULL_RTX;
7877 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7879 /* Only floating point modes are passed in anything but integer regs. */
7880 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7882 if (named)
7883 regno = cum->regno + FIRST_SSE_REG;
7884 else
7886 rtx t1, t2;
7888 /* Unnamed floating parameters are passed in both the
7889 SSE and integer registers. */
7890 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7891 t2 = gen_rtx_REG (mode, regno);
7892 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7893 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7894 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7897 /* Handle aggregated types passed in register. */
7898 if (orig_mode == BLKmode)
7900 if (bytes > 0 && bytes <= 8)
7901 mode = (bytes > 4 ? DImode : SImode);
7902 if (mode == BLKmode)
7903 mode = DImode;
7906 return gen_reg_or_parallel (mode, orig_mode, regno);
7909 /* Return where to put the arguments to a function.
7910 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7912 MODE is the argument's machine mode. TYPE is the data type of the
7913 argument. It is null for libcalls where that information may not be
7914 available. CUM gives information about the preceding args and about
7915 the function being called. NAMED is nonzero if this argument is a
7916 named parameter (otherwise it is an extra parameter matching an
7917 ellipsis). */
7919 static rtx
7920 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
7921 const_tree type, bool named)
7923 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7924 machine_mode mode = omode;
7925 HOST_WIDE_INT bytes, words;
7926 rtx arg;
7928 /* All pointer bounds argumntas are handled separately here. */
7929 if ((type && POINTER_BOUNDS_TYPE_P (type))
7930 || POINTER_BOUNDS_MODE_P (mode))
7932 /* Return NULL if bounds are forced to go in Bounds Table. */
7933 if (cum->bnds_in_bt)
7934 arg = NULL;
7935 /* Return the next available bound reg if any. */
7936 else if (cum->bnd_regno <= LAST_BND_REG)
7937 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
7938 /* Return the next special slot number otherwise. */
7939 else
7940 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
7942 return arg;
7945 if (mode == BLKmode)
7946 bytes = int_size_in_bytes (type);
7947 else
7948 bytes = GET_MODE_SIZE (mode);
7949 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7951 /* To simplify the code below, represent vector types with a vector mode
7952 even if MMX/SSE are not active. */
7953 if (type && TREE_CODE (type) == VECTOR_TYPE)
7954 mode = type_natural_mode (type, cum, false);
7956 if (TARGET_64BIT)
7958 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
7960 if (call_abi == MS_ABI)
7961 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7962 else
7963 arg = function_arg_64 (cum, mode, omode, type, named);
7965 else
7966 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7968 return arg;
7971 /* A C expression that indicates when an argument must be passed by
7972 reference. If nonzero for an argument, a copy of that argument is
7973 made in memory and a pointer to the argument is passed instead of
7974 the argument itself. The pointer is passed in whatever way is
7975 appropriate for passing a pointer to that type. */
7977 static bool
7978 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7979 const_tree type, bool)
7981 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7983 /* Bounds are never passed by reference. */
7984 if ((type && POINTER_BOUNDS_TYPE_P (type))
7985 || POINTER_BOUNDS_MODE_P (mode))
7986 return false;
7988 if (TARGET_64BIT)
7990 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
7992 /* See Windows x64 Software Convention. */
7993 if (call_abi == MS_ABI)
7995 HOST_WIDE_INT msize = GET_MODE_SIZE (mode);
7997 if (type)
7999 /* Arrays are passed by reference. */
8000 if (TREE_CODE (type) == ARRAY_TYPE)
8001 return true;
8003 if (RECORD_OR_UNION_TYPE_P (type))
8005 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
8006 are passed by reference. */
8007 msize = int_size_in_bytes (type);
8011 /* __m128 is passed by reference. */
8012 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
8014 else if (type && int_size_in_bytes (type) == -1)
8015 return true;
8018 return false;
8021 /* Return true when TYPE should be 128bit aligned for 32bit argument
8022 passing ABI. XXX: This function is obsolete and is only used for
8023 checking psABI compatibility with previous versions of GCC. */
8025 static bool
8026 ix86_compat_aligned_value_p (const_tree type)
8028 machine_mode mode = TYPE_MODE (type);
8029 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
8030 || mode == TDmode
8031 || mode == TFmode
8032 || mode == TCmode)
8033 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
8034 return true;
8035 if (TYPE_ALIGN (type) < 128)
8036 return false;
8038 if (AGGREGATE_TYPE_P (type))
8040 /* Walk the aggregates recursively. */
8041 switch (TREE_CODE (type))
8043 case RECORD_TYPE:
8044 case UNION_TYPE:
8045 case QUAL_UNION_TYPE:
8047 tree field;
8049 /* Walk all the structure fields. */
8050 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8052 if (TREE_CODE (field) == FIELD_DECL
8053 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
8054 return true;
8056 break;
8059 case ARRAY_TYPE:
8060 /* Just for use if some languages passes arrays by value. */
8061 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
8062 return true;
8063 break;
8065 default:
8066 gcc_unreachable ();
8069 return false;
8072 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
8073 XXX: This function is obsolete and is only used for checking psABI
8074 compatibility with previous versions of GCC. */
8076 static unsigned int
8077 ix86_compat_function_arg_boundary (machine_mode mode,
8078 const_tree type, unsigned int align)
8080 /* In 32bit, only _Decimal128 and __float128 are aligned to their
8081 natural boundaries. */
8082 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
8084 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
8085 make an exception for SSE modes since these require 128bit
8086 alignment.
8088 The handling here differs from field_alignment. ICC aligns MMX
8089 arguments to 4 byte boundaries, while structure fields are aligned
8090 to 8 byte boundaries. */
8091 if (!type)
8093 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
8094 align = PARM_BOUNDARY;
8096 else
8098 if (!ix86_compat_aligned_value_p (type))
8099 align = PARM_BOUNDARY;
8102 if (align > BIGGEST_ALIGNMENT)
8103 align = BIGGEST_ALIGNMENT;
8104 return align;
8107 /* Return true when TYPE should be 128bit aligned for 32bit argument
8108 passing ABI. */
8110 static bool
8111 ix86_contains_aligned_value_p (const_tree type)
8113 machine_mode mode = TYPE_MODE (type);
8115 if (mode == XFmode || mode == XCmode)
8116 return false;
8118 if (TYPE_ALIGN (type) < 128)
8119 return false;
8121 if (AGGREGATE_TYPE_P (type))
8123 /* Walk the aggregates recursively. */
8124 switch (TREE_CODE (type))
8126 case RECORD_TYPE:
8127 case UNION_TYPE:
8128 case QUAL_UNION_TYPE:
8130 tree field;
8132 /* Walk all the structure fields. */
8133 for (field = TYPE_FIELDS (type);
8134 field;
8135 field = DECL_CHAIN (field))
8137 if (TREE_CODE (field) == FIELD_DECL
8138 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
8139 return true;
8141 break;
8144 case ARRAY_TYPE:
8145 /* Just for use if some languages passes arrays by value. */
8146 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
8147 return true;
8148 break;
8150 default:
8151 gcc_unreachable ();
8154 else
8155 return TYPE_ALIGN (type) >= 128;
8157 return false;
8160 /* Gives the alignment boundary, in bits, of an argument with the
8161 specified mode and type. */
8163 static unsigned int
8164 ix86_function_arg_boundary (machine_mode mode, const_tree type)
8166 unsigned int align;
8167 if (type)
8169 /* Since the main variant type is used for call, we convert it to
8170 the main variant type. */
8171 type = TYPE_MAIN_VARIANT (type);
8172 align = TYPE_ALIGN (type);
8174 else
8175 align = GET_MODE_ALIGNMENT (mode);
8176 if (align < PARM_BOUNDARY)
8177 align = PARM_BOUNDARY;
8178 else
8180 static bool warned;
8181 unsigned int saved_align = align;
8183 if (!TARGET_64BIT)
8185 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8186 if (!type)
8188 if (mode == XFmode || mode == XCmode)
8189 align = PARM_BOUNDARY;
8191 else if (!ix86_contains_aligned_value_p (type))
8192 align = PARM_BOUNDARY;
8194 if (align < 128)
8195 align = PARM_BOUNDARY;
8198 if (warn_psabi
8199 && !warned
8200 && align != ix86_compat_function_arg_boundary (mode, type,
8201 saved_align))
8203 warned = true;
8204 inform (input_location,
8205 "The ABI for passing parameters with %d-byte"
8206 " alignment has changed in GCC 4.6",
8207 align / BITS_PER_UNIT);
8211 return align;
8214 /* Return true if N is a possible register number of function value. */
8216 static bool
8217 ix86_function_value_regno_p (const unsigned int regno)
8219 switch (regno)
8221 case AX_REG:
8222 return true;
8223 case DX_REG:
8224 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
8225 case DI_REG:
8226 case SI_REG:
8227 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
8229 case BND0_REG:
8230 case BND1_REG:
8231 return chkp_function_instrumented_p (current_function_decl);
8233 /* Complex values are returned in %st(0)/%st(1) pair. */
8234 case ST0_REG:
8235 case ST1_REG:
8236 /* TODO: The function should depend on current function ABI but
8237 builtins.c would need updating then. Therefore we use the
8238 default ABI. */
8239 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
8240 return false;
8241 return TARGET_FLOAT_RETURNS_IN_80387;
8243 /* Complex values are returned in %xmm0/%xmm1 pair. */
8244 case XMM0_REG:
8245 case XMM1_REG:
8246 return TARGET_SSE;
8248 case MM0_REG:
8249 if (TARGET_MACHO || TARGET_64BIT)
8250 return false;
8251 return TARGET_MMX;
8254 return false;
8257 /* Define how to find the value returned by a function.
8258 VALTYPE is the data type of the value (as a tree).
8259 If the precise function being called is known, FUNC is its FUNCTION_DECL;
8260 otherwise, FUNC is 0. */
8262 static rtx
8263 function_value_32 (machine_mode orig_mode, machine_mode mode,
8264 const_tree fntype, const_tree fn)
8266 unsigned int regno;
8268 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8269 we normally prevent this case when mmx is not available. However
8270 some ABIs may require the result to be returned like DImode. */
8271 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8272 regno = FIRST_MMX_REG;
8274 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
8275 we prevent this case when sse is not available. However some ABIs
8276 may require the result to be returned like integer TImode. */
8277 else if (mode == TImode
8278 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8279 regno = FIRST_SSE_REG;
8281 /* 32-byte vector modes in %ymm0. */
8282 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8283 regno = FIRST_SSE_REG;
8285 /* 64-byte vector modes in %zmm0. */
8286 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8287 regno = FIRST_SSE_REG;
8289 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
8290 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8291 regno = FIRST_FLOAT_REG;
8292 else
8293 /* Most things go in %eax. */
8294 regno = AX_REG;
8296 /* Override FP return register with %xmm0 for local functions when
8297 SSE math is enabled or for functions with sseregparm attribute. */
8298 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8300 int sse_level = ix86_function_sseregparm (fntype, fn, false);
8301 if (sse_level == -1)
8303 error ("calling %qD with SSE caling convention without "
8304 "SSE/SSE2 enabled", fn);
8305 sorry ("this is a GCC bug that can be worked around by adding "
8306 "attribute used to function called");
8308 else if ((sse_level >= 1 && mode == SFmode)
8309 || (sse_level == 2 && mode == DFmode))
8310 regno = FIRST_SSE_REG;
8313 /* OImode shouldn't be used directly. */
8314 gcc_assert (mode != OImode);
8316 return gen_rtx_REG (orig_mode, regno);
8319 static rtx
8320 function_value_64 (machine_mode orig_mode, machine_mode mode,
8321 const_tree valtype)
8323 rtx ret;
8325 /* Handle libcalls, which don't provide a type node. */
8326 if (valtype == NULL)
8328 unsigned int regno;
8330 switch (mode)
8332 case SFmode:
8333 case SCmode:
8334 case DFmode:
8335 case DCmode:
8336 case TFmode:
8337 case SDmode:
8338 case DDmode:
8339 case TDmode:
8340 regno = FIRST_SSE_REG;
8341 break;
8342 case XFmode:
8343 case XCmode:
8344 regno = FIRST_FLOAT_REG;
8345 break;
8346 case TCmode:
8347 return NULL;
8348 default:
8349 regno = AX_REG;
8352 return gen_rtx_REG (mode, regno);
8354 else if (POINTER_TYPE_P (valtype))
8356 /* Pointers are always returned in word_mode. */
8357 mode = word_mode;
8360 ret = construct_container (mode, orig_mode, valtype, 1,
8361 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8362 x86_64_int_return_registers, 0);
8364 /* For zero sized structures, construct_container returns NULL, but we
8365 need to keep rest of compiler happy by returning meaningful value. */
8366 if (!ret)
8367 ret = gen_rtx_REG (orig_mode, AX_REG);
8369 return ret;
8372 static rtx
8373 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
8374 const_tree valtype)
8376 unsigned int regno = AX_REG;
8378 if (TARGET_SSE)
8380 switch (GET_MODE_SIZE (mode))
8382 case 16:
8383 if (valtype != NULL_TREE
8384 && !VECTOR_INTEGER_TYPE_P (valtype)
8385 && !VECTOR_INTEGER_TYPE_P (valtype)
8386 && !INTEGRAL_TYPE_P (valtype)
8387 && !VECTOR_FLOAT_TYPE_P (valtype))
8388 break;
8389 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8390 && !COMPLEX_MODE_P (mode))
8391 regno = FIRST_SSE_REG;
8392 break;
8393 case 8:
8394 case 4:
8395 if (mode == SFmode || mode == DFmode)
8396 regno = FIRST_SSE_REG;
8397 break;
8398 default:
8399 break;
8402 return gen_rtx_REG (orig_mode, regno);
8405 static rtx
8406 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8407 machine_mode orig_mode, machine_mode mode)
8409 const_tree fn, fntype;
8411 fn = NULL_TREE;
8412 if (fntype_or_decl && DECL_P (fntype_or_decl))
8413 fn = fntype_or_decl;
8414 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8416 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8417 || POINTER_BOUNDS_MODE_P (mode))
8418 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8419 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8420 return function_value_ms_64 (orig_mode, mode, valtype);
8421 else if (TARGET_64BIT)
8422 return function_value_64 (orig_mode, mode, valtype);
8423 else
8424 return function_value_32 (orig_mode, mode, fntype, fn);
8427 static rtx
8428 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8430 machine_mode mode, orig_mode;
8432 orig_mode = TYPE_MODE (valtype);
8433 mode = type_natural_mode (valtype, NULL, true);
8434 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8437 /* Return an RTX representing a place where a function returns
8438 or recieves pointer bounds or NULL if no bounds are returned.
8440 VALTYPE is a data type of a value returned by the function.
8442 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
8443 or FUNCTION_TYPE of the function.
8445 If OUTGOING is false, return a place in which the caller will
8446 see the return value. Otherwise, return a place where a
8447 function returns a value. */
8449 static rtx
8450 ix86_function_value_bounds (const_tree valtype,
8451 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
8452 bool outgoing ATTRIBUTE_UNUSED)
8454 rtx res = NULL_RTX;
8456 if (BOUNDED_TYPE_P (valtype))
8457 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
8458 else if (chkp_type_has_pointer (valtype))
8460 bitmap slots;
8461 rtx bounds[2];
8462 bitmap_iterator bi;
8463 unsigned i, bnd_no = 0;
8465 bitmap_obstack_initialize (NULL);
8466 slots = BITMAP_ALLOC (NULL);
8467 chkp_find_bound_slots (valtype, slots);
8469 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
8471 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
8472 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
8473 gcc_assert (bnd_no < 2);
8474 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
8477 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
8479 BITMAP_FREE (slots);
8480 bitmap_obstack_release (NULL);
8482 else
8483 res = NULL_RTX;
8485 return res;
8488 /* Pointer function arguments and return values are promoted to
8489 word_mode. */
8491 static machine_mode
8492 ix86_promote_function_mode (const_tree type, machine_mode mode,
8493 int *punsignedp, const_tree fntype,
8494 int for_return)
8496 if (type != NULL_TREE && POINTER_TYPE_P (type))
8498 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8499 return word_mode;
8501 return default_promote_function_mode (type, mode, punsignedp, fntype,
8502 for_return);
8505 /* Return true if a structure, union or array with MODE containing FIELD
8506 should be accessed using BLKmode. */
8508 static bool
8509 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
8511 /* Union with XFmode must be in BLKmode. */
8512 return (mode == XFmode
8513 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8514 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8518 ix86_libcall_value (machine_mode mode)
8520 return ix86_function_value_1 (NULL, NULL, mode, mode);
8523 /* Return true iff type is returned in memory. */
8525 static bool
8526 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8528 #ifdef SUBTARGET_RETURN_IN_MEMORY
8529 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8530 #else
8531 const machine_mode mode = type_natural_mode (type, NULL, true);
8532 HOST_WIDE_INT size;
8534 if (POINTER_BOUNDS_TYPE_P (type))
8535 return false;
8537 if (TARGET_64BIT)
8539 if (ix86_function_type_abi (fntype) == MS_ABI)
8541 size = int_size_in_bytes (type);
8543 /* __m128 is returned in xmm0. */
8544 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8545 || INTEGRAL_TYPE_P (type)
8546 || VECTOR_FLOAT_TYPE_P (type))
8547 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8548 && !COMPLEX_MODE_P (mode)
8549 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8550 return false;
8552 /* Otherwise, the size must be exactly in [1248]. */
8553 return size != 1 && size != 2 && size != 4 && size != 8;
8555 else
8557 int needed_intregs, needed_sseregs;
8559 return examine_argument (mode, type, 1,
8560 &needed_intregs, &needed_sseregs);
8563 else
8565 if (mode == BLKmode)
8566 return true;
8568 size = int_size_in_bytes (type);
8570 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8571 return false;
8573 if (VECTOR_MODE_P (mode) || mode == TImode)
8575 /* User-created vectors small enough to fit in EAX. */
8576 if (size < 8)
8577 return false;
8579 /* Unless ABI prescibes otherwise,
8580 MMX/3dNow values are returned in MM0 if available. */
8582 if (size == 8)
8583 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8585 /* SSE values are returned in XMM0 if available. */
8586 if (size == 16)
8587 return !TARGET_SSE;
8589 /* AVX values are returned in YMM0 if available. */
8590 if (size == 32)
8591 return !TARGET_AVX;
8593 /* AVX512F values are returned in ZMM0 if available. */
8594 if (size == 64)
8595 return !TARGET_AVX512F;
8598 if (mode == XFmode)
8599 return false;
8601 if (size > 12)
8602 return true;
8604 /* OImode shouldn't be used directly. */
8605 gcc_assert (mode != OImode);
8607 return false;
8609 #endif
8613 /* Create the va_list data type. */
8615 /* Returns the calling convention specific va_list date type.
8616 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8618 static tree
8619 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8621 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8623 /* For i386 we use plain pointer to argument area. */
8624 if (!TARGET_64BIT || abi == MS_ABI)
8625 return build_pointer_type (char_type_node);
8627 record = lang_hooks.types.make_type (RECORD_TYPE);
8628 type_decl = build_decl (BUILTINS_LOCATION,
8629 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8631 f_gpr = build_decl (BUILTINS_LOCATION,
8632 FIELD_DECL, get_identifier ("gp_offset"),
8633 unsigned_type_node);
8634 f_fpr = build_decl (BUILTINS_LOCATION,
8635 FIELD_DECL, get_identifier ("fp_offset"),
8636 unsigned_type_node);
8637 f_ovf = build_decl (BUILTINS_LOCATION,
8638 FIELD_DECL, get_identifier ("overflow_arg_area"),
8639 ptr_type_node);
8640 f_sav = build_decl (BUILTINS_LOCATION,
8641 FIELD_DECL, get_identifier ("reg_save_area"),
8642 ptr_type_node);
8644 va_list_gpr_counter_field = f_gpr;
8645 va_list_fpr_counter_field = f_fpr;
8647 DECL_FIELD_CONTEXT (f_gpr) = record;
8648 DECL_FIELD_CONTEXT (f_fpr) = record;
8649 DECL_FIELD_CONTEXT (f_ovf) = record;
8650 DECL_FIELD_CONTEXT (f_sav) = record;
8652 TYPE_STUB_DECL (record) = type_decl;
8653 TYPE_NAME (record) = type_decl;
8654 TYPE_FIELDS (record) = f_gpr;
8655 DECL_CHAIN (f_gpr) = f_fpr;
8656 DECL_CHAIN (f_fpr) = f_ovf;
8657 DECL_CHAIN (f_ovf) = f_sav;
8659 layout_type (record);
8661 /* The correct type is an array type of one element. */
8662 return build_array_type (record, build_index_type (size_zero_node));
8665 /* Setup the builtin va_list data type and for 64-bit the additional
8666 calling convention specific va_list data types. */
8668 static tree
8669 ix86_build_builtin_va_list (void)
8671 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8673 /* Initialize abi specific va_list builtin types. */
8674 if (TARGET_64BIT)
8676 tree t;
8677 if (ix86_abi == MS_ABI)
8679 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8680 if (TREE_CODE (t) != RECORD_TYPE)
8681 t = build_variant_type_copy (t);
8682 sysv_va_list_type_node = t;
8684 else
8686 t = ret;
8687 if (TREE_CODE (t) != RECORD_TYPE)
8688 t = build_variant_type_copy (t);
8689 sysv_va_list_type_node = t;
8691 if (ix86_abi != MS_ABI)
8693 t = ix86_build_builtin_va_list_abi (MS_ABI);
8694 if (TREE_CODE (t) != RECORD_TYPE)
8695 t = build_variant_type_copy (t);
8696 ms_va_list_type_node = t;
8698 else
8700 t = ret;
8701 if (TREE_CODE (t) != RECORD_TYPE)
8702 t = build_variant_type_copy (t);
8703 ms_va_list_type_node = t;
8707 return ret;
8710 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8712 static void
8713 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8715 rtx save_area, mem;
8716 alias_set_type set;
8717 int i, max;
8719 /* GPR size of varargs save area. */
8720 if (cfun->va_list_gpr_size)
8721 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8722 else
8723 ix86_varargs_gpr_size = 0;
8725 /* FPR size of varargs save area. We don't need it if we don't pass
8726 anything in SSE registers. */
8727 if (TARGET_SSE && cfun->va_list_fpr_size)
8728 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8729 else
8730 ix86_varargs_fpr_size = 0;
8732 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8733 return;
8735 save_area = frame_pointer_rtx;
8736 set = get_varargs_alias_set ();
8738 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8739 if (max > X86_64_REGPARM_MAX)
8740 max = X86_64_REGPARM_MAX;
8742 for (i = cum->regno; i < max; i++)
8744 mem = gen_rtx_MEM (word_mode,
8745 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8746 MEM_NOTRAP_P (mem) = 1;
8747 set_mem_alias_set (mem, set);
8748 emit_move_insn (mem,
8749 gen_rtx_REG (word_mode,
8750 x86_64_int_parameter_registers[i]));
8753 if (ix86_varargs_fpr_size)
8755 machine_mode smode;
8756 rtx_code_label *label;
8757 rtx test;
8759 /* Now emit code to save SSE registers. The AX parameter contains number
8760 of SSE parameter registers used to call this function, though all we
8761 actually check here is the zero/non-zero status. */
8763 label = gen_label_rtx ();
8764 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8765 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8766 label));
8768 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8769 we used movdqa (i.e. TImode) instead? Perhaps even better would
8770 be if we could determine the real mode of the data, via a hook
8771 into pass_stdarg. Ignore all that for now. */
8772 smode = V4SFmode;
8773 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8774 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8776 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8777 if (max > X86_64_SSE_REGPARM_MAX)
8778 max = X86_64_SSE_REGPARM_MAX;
8780 for (i = cum->sse_regno; i < max; ++i)
8782 mem = plus_constant (Pmode, save_area,
8783 i * 16 + ix86_varargs_gpr_size);
8784 mem = gen_rtx_MEM (smode, mem);
8785 MEM_NOTRAP_P (mem) = 1;
8786 set_mem_alias_set (mem, set);
8787 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8789 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8792 emit_label (label);
8796 static void
8797 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8799 alias_set_type set = get_varargs_alias_set ();
8800 int i;
8802 /* Reset to zero, as there might be a sysv vaarg used
8803 before. */
8804 ix86_varargs_gpr_size = 0;
8805 ix86_varargs_fpr_size = 0;
8807 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8809 rtx reg, mem;
8811 mem = gen_rtx_MEM (Pmode,
8812 plus_constant (Pmode, virtual_incoming_args_rtx,
8813 i * UNITS_PER_WORD));
8814 MEM_NOTRAP_P (mem) = 1;
8815 set_mem_alias_set (mem, set);
8817 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8818 emit_move_insn (mem, reg);
8822 static void
8823 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
8824 tree type, int *, int no_rtl)
8826 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8827 CUMULATIVE_ARGS next_cum;
8828 tree fntype;
8830 /* This argument doesn't appear to be used anymore. Which is good,
8831 because the old code here didn't suppress rtl generation. */
8832 gcc_assert (!no_rtl);
8834 if (!TARGET_64BIT)
8835 return;
8837 fntype = TREE_TYPE (current_function_decl);
8839 /* For varargs, we do not want to skip the dummy va_dcl argument.
8840 For stdargs, we do want to skip the last named argument. */
8841 next_cum = *cum;
8842 if (stdarg_p (fntype))
8843 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8844 true);
8846 if (cum->call_abi == MS_ABI)
8847 setup_incoming_varargs_ms_64 (&next_cum);
8848 else
8849 setup_incoming_varargs_64 (&next_cum);
8852 static void
8853 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
8854 enum machine_mode mode,
8855 tree type,
8856 int *pretend_size ATTRIBUTE_UNUSED,
8857 int no_rtl)
8859 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8860 CUMULATIVE_ARGS next_cum;
8861 tree fntype;
8862 rtx save_area;
8863 int bnd_reg, i, max;
8865 gcc_assert (!no_rtl);
8867 /* Do nothing if we use plain pointer to argument area. */
8868 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
8869 return;
8871 fntype = TREE_TYPE (current_function_decl);
8873 /* For varargs, we do not want to skip the dummy va_dcl argument.
8874 For stdargs, we do want to skip the last named argument. */
8875 next_cum = *cum;
8876 if (stdarg_p (fntype))
8877 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8878 true);
8879 save_area = frame_pointer_rtx;
8881 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8882 if (max > X86_64_REGPARM_MAX)
8883 max = X86_64_REGPARM_MAX;
8885 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
8886 if (chkp_function_instrumented_p (current_function_decl))
8887 for (i = cum->regno; i < max; i++)
8889 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
8890 rtx reg = gen_rtx_REG (DImode,
8891 x86_64_int_parameter_registers[i]);
8892 rtx ptr = reg;
8893 rtx bounds;
8895 if (bnd_reg <= LAST_BND_REG)
8896 bounds = gen_rtx_REG (BNDmode, bnd_reg);
8897 else
8899 rtx ldx_addr =
8900 plus_constant (Pmode, arg_pointer_rtx,
8901 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
8902 bounds = gen_reg_rtx (BNDmode);
8903 emit_insn (BNDmode == BND64mode
8904 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
8905 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
8908 emit_insn (BNDmode == BND64mode
8909 ? gen_bnd64_stx (addr, ptr, bounds)
8910 : gen_bnd32_stx (addr, ptr, bounds));
8912 bnd_reg++;
8917 /* Checks if TYPE is of kind va_list char *. */
8919 static bool
8920 is_va_list_char_pointer (tree type)
8922 tree canonic;
8924 /* For 32-bit it is always true. */
8925 if (!TARGET_64BIT)
8926 return true;
8927 canonic = ix86_canonical_va_list_type (type);
8928 return (canonic == ms_va_list_type_node
8929 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8932 /* Implement va_start. */
8934 static void
8935 ix86_va_start (tree valist, rtx nextarg)
8937 HOST_WIDE_INT words, n_gpr, n_fpr;
8938 tree f_gpr, f_fpr, f_ovf, f_sav;
8939 tree gpr, fpr, ovf, sav, t;
8940 tree type;
8941 rtx ovf_rtx;
8943 if (flag_split_stack
8944 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8946 unsigned int scratch_regno;
8948 /* When we are splitting the stack, we can't refer to the stack
8949 arguments using internal_arg_pointer, because they may be on
8950 the old stack. The split stack prologue will arrange to
8951 leave a pointer to the old stack arguments in a scratch
8952 register, which we here copy to a pseudo-register. The split
8953 stack prologue can't set the pseudo-register directly because
8954 it (the prologue) runs before any registers have been saved. */
8956 scratch_regno = split_stack_prologue_scratch_regno ();
8957 if (scratch_regno != INVALID_REGNUM)
8959 rtx reg;
8960 rtx_insn *seq;
8962 reg = gen_reg_rtx (Pmode);
8963 cfun->machine->split_stack_varargs_pointer = reg;
8965 start_sequence ();
8966 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8967 seq = get_insns ();
8968 end_sequence ();
8970 push_topmost_sequence ();
8971 emit_insn_after (seq, entry_of_function ());
8972 pop_topmost_sequence ();
8976 /* Only 64bit target needs something special. */
8977 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8979 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8980 std_expand_builtin_va_start (valist, nextarg);
8981 else
8983 rtx va_r, next;
8985 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8986 next = expand_binop (ptr_mode, add_optab,
8987 cfun->machine->split_stack_varargs_pointer,
8988 crtl->args.arg_offset_rtx,
8989 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8990 convert_move (va_r, next, 0);
8992 /* Store zero bounds for va_list. */
8993 if (chkp_function_instrumented_p (current_function_decl))
8994 chkp_expand_bounds_reset_for_mem (valist,
8995 make_tree (TREE_TYPE (valist),
8996 next));
8999 return;
9002 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9003 f_fpr = DECL_CHAIN (f_gpr);
9004 f_ovf = DECL_CHAIN (f_fpr);
9005 f_sav = DECL_CHAIN (f_ovf);
9007 valist = build_simple_mem_ref (valist);
9008 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
9009 /* The following should be folded into the MEM_REF offset. */
9010 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
9011 f_gpr, NULL_TREE);
9012 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
9013 f_fpr, NULL_TREE);
9014 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
9015 f_ovf, NULL_TREE);
9016 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
9017 f_sav, NULL_TREE);
9019 /* Count number of gp and fp argument registers used. */
9020 words = crtl->args.info.words;
9021 n_gpr = crtl->args.info.regno;
9022 n_fpr = crtl->args.info.sse_regno;
9024 if (cfun->va_list_gpr_size)
9026 type = TREE_TYPE (gpr);
9027 t = build2 (MODIFY_EXPR, type,
9028 gpr, build_int_cst (type, n_gpr * 8));
9029 TREE_SIDE_EFFECTS (t) = 1;
9030 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9033 if (TARGET_SSE && cfun->va_list_fpr_size)
9035 type = TREE_TYPE (fpr);
9036 t = build2 (MODIFY_EXPR, type, fpr,
9037 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
9038 TREE_SIDE_EFFECTS (t) = 1;
9039 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9042 /* Find the overflow area. */
9043 type = TREE_TYPE (ovf);
9044 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9045 ovf_rtx = crtl->args.internal_arg_pointer;
9046 else
9047 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
9048 t = make_tree (type, ovf_rtx);
9049 if (words != 0)
9050 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
9052 /* Store zero bounds for overflow area pointer. */
9053 if (chkp_function_instrumented_p (current_function_decl))
9054 chkp_expand_bounds_reset_for_mem (ovf, t);
9056 t = build2 (MODIFY_EXPR, type, ovf, t);
9057 TREE_SIDE_EFFECTS (t) = 1;
9058 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9060 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
9062 /* Find the register save area.
9063 Prologue of the function save it right above stack frame. */
9064 type = TREE_TYPE (sav);
9065 t = make_tree (type, frame_pointer_rtx);
9066 if (!ix86_varargs_gpr_size)
9067 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
9069 /* Store zero bounds for save area pointer. */
9070 if (chkp_function_instrumented_p (current_function_decl))
9071 chkp_expand_bounds_reset_for_mem (sav, t);
9073 t = build2 (MODIFY_EXPR, type, sav, t);
9074 TREE_SIDE_EFFECTS (t) = 1;
9075 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9079 /* Implement va_arg. */
9081 static tree
9082 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9083 gimple_seq *post_p)
9085 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
9086 tree f_gpr, f_fpr, f_ovf, f_sav;
9087 tree gpr, fpr, ovf, sav, t;
9088 int size, rsize;
9089 tree lab_false, lab_over = NULL_TREE;
9090 tree addr, t2;
9091 rtx container;
9092 int indirect_p = 0;
9093 tree ptrtype;
9094 machine_mode nat_mode;
9095 unsigned int arg_boundary;
9097 /* Only 64bit target needs something special. */
9098 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
9099 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
9101 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9102 f_fpr = DECL_CHAIN (f_gpr);
9103 f_ovf = DECL_CHAIN (f_fpr);
9104 f_sav = DECL_CHAIN (f_ovf);
9106 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
9107 valist, f_gpr, NULL_TREE);
9109 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9110 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9111 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9113 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
9114 if (indirect_p)
9115 type = build_pointer_type (type);
9116 size = int_size_in_bytes (type);
9117 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
9119 nat_mode = type_natural_mode (type, NULL, false);
9120 switch (nat_mode)
9122 case V8SFmode:
9123 case V8SImode:
9124 case V32QImode:
9125 case V16HImode:
9126 case V4DFmode:
9127 case V4DImode:
9128 case V16SFmode:
9129 case V16SImode:
9130 case V64QImode:
9131 case V32HImode:
9132 case V8DFmode:
9133 case V8DImode:
9134 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9135 if (!TARGET_64BIT_MS_ABI)
9137 container = NULL;
9138 break;
9141 default:
9142 container = construct_container (nat_mode, TYPE_MODE (type),
9143 type, 0, X86_64_REGPARM_MAX,
9144 X86_64_SSE_REGPARM_MAX, intreg,
9146 break;
9149 /* Pull the value out of the saved registers. */
9151 addr = create_tmp_var (ptr_type_node, "addr");
9153 if (container)
9155 int needed_intregs, needed_sseregs;
9156 bool need_temp;
9157 tree int_addr, sse_addr;
9159 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9160 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9162 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
9164 need_temp = (!REG_P (container)
9165 && ((needed_intregs && TYPE_ALIGN (type) > 64)
9166 || TYPE_ALIGN (type) > 128));
9168 /* In case we are passing structure, verify that it is consecutive block
9169 on the register save area. If not we need to do moves. */
9170 if (!need_temp && !REG_P (container))
9172 /* Verify that all registers are strictly consecutive */
9173 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
9175 int i;
9177 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9179 rtx slot = XVECEXP (container, 0, i);
9180 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
9181 || INTVAL (XEXP (slot, 1)) != i * 16)
9182 need_temp = true;
9185 else
9187 int i;
9189 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9191 rtx slot = XVECEXP (container, 0, i);
9192 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9193 || INTVAL (XEXP (slot, 1)) != i * 8)
9194 need_temp = true;
9198 if (!need_temp)
9200 int_addr = addr;
9201 sse_addr = addr;
9203 else
9205 int_addr = create_tmp_var (ptr_type_node, "int_addr");
9206 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9209 /* First ensure that we fit completely in registers. */
9210 if (needed_intregs)
9212 t = build_int_cst (TREE_TYPE (gpr),
9213 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9214 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9215 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9216 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9217 gimplify_and_add (t, pre_p);
9219 if (needed_sseregs)
9221 t = build_int_cst (TREE_TYPE (fpr),
9222 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9223 + X86_64_REGPARM_MAX * 8);
9224 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9225 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9226 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9227 gimplify_and_add (t, pre_p);
9230 /* Compute index to start of area used for integer regs. */
9231 if (needed_intregs)
9233 /* int_addr = gpr + sav; */
9234 t = fold_build_pointer_plus (sav, gpr);
9235 gimplify_assign (int_addr, t, pre_p);
9237 if (needed_sseregs)
9239 /* sse_addr = fpr + sav; */
9240 t = fold_build_pointer_plus (sav, fpr);
9241 gimplify_assign (sse_addr, t, pre_p);
9243 if (need_temp)
9245 int i, prev_size = 0;
9246 tree temp = create_tmp_var (type, "va_arg_tmp");
9248 /* addr = &temp; */
9249 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9250 gimplify_assign (addr, t, pre_p);
9252 for (i = 0; i < XVECLEN (container, 0); i++)
9254 rtx slot = XVECEXP (container, 0, i);
9255 rtx reg = XEXP (slot, 0);
9256 machine_mode mode = GET_MODE (reg);
9257 tree piece_type;
9258 tree addr_type;
9259 tree daddr_type;
9260 tree src_addr, src;
9261 int src_offset;
9262 tree dest_addr, dest;
9263 int cur_size = GET_MODE_SIZE (mode);
9265 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9266 prev_size = INTVAL (XEXP (slot, 1));
9267 if (prev_size + cur_size > size)
9269 cur_size = size - prev_size;
9270 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
9271 if (mode == BLKmode)
9272 mode = QImode;
9274 piece_type = lang_hooks.types.type_for_mode (mode, 1);
9275 if (mode == GET_MODE (reg))
9276 addr_type = build_pointer_type (piece_type);
9277 else
9278 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9279 true);
9280 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9281 true);
9283 if (SSE_REGNO_P (REGNO (reg)))
9285 src_addr = sse_addr;
9286 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9288 else
9290 src_addr = int_addr;
9291 src_offset = REGNO (reg) * 8;
9293 src_addr = fold_convert (addr_type, src_addr);
9294 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9296 dest_addr = fold_convert (daddr_type, addr);
9297 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9298 if (cur_size == GET_MODE_SIZE (mode))
9300 src = build_va_arg_indirect_ref (src_addr);
9301 dest = build_va_arg_indirect_ref (dest_addr);
9303 gimplify_assign (dest, src, pre_p);
9305 else
9307 tree copy
9308 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9309 3, dest_addr, src_addr,
9310 size_int (cur_size));
9311 gimplify_and_add (copy, pre_p);
9313 prev_size += cur_size;
9317 if (needed_intregs)
9319 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9320 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9321 gimplify_assign (gpr, t, pre_p);
9324 if (needed_sseregs)
9326 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9327 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9328 gimplify_assign (unshare_expr (fpr), t, pre_p);
9331 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9333 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9336 /* ... otherwise out of the overflow area. */
9338 /* When we align parameter on stack for caller, if the parameter
9339 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9340 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
9341 here with caller. */
9342 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9343 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9344 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9346 /* Care for on-stack alignment if needed. */
9347 if (arg_boundary <= 64 || size == 0)
9348 t = ovf;
9349 else
9351 HOST_WIDE_INT align = arg_boundary / 8;
9352 t = fold_build_pointer_plus_hwi (ovf, align - 1);
9353 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9354 build_int_cst (TREE_TYPE (t), -align));
9357 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9358 gimplify_assign (addr, t, pre_p);
9360 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9361 gimplify_assign (unshare_expr (ovf), t, pre_p);
9363 if (container)
9364 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9366 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9367 addr = fold_convert (ptrtype, addr);
9369 if (indirect_p)
9370 addr = build_va_arg_indirect_ref (addr);
9371 return build_va_arg_indirect_ref (addr);
9374 /* Return true if OPNUM's MEM should be matched
9375 in movabs* patterns. */
9377 bool
9378 ix86_check_movabs (rtx insn, int opnum)
9380 rtx set, mem;
9382 set = PATTERN (insn);
9383 if (GET_CODE (set) == PARALLEL)
9384 set = XVECEXP (set, 0, 0);
9385 gcc_assert (GET_CODE (set) == SET);
9386 mem = XEXP (set, opnum);
9387 while (GET_CODE (mem) == SUBREG)
9388 mem = SUBREG_REG (mem);
9389 gcc_assert (MEM_P (mem));
9390 return volatile_ok || !MEM_VOLATILE_P (mem);
9393 /* Initialize the table of extra 80387 mathematical constants. */
9395 static void
9396 init_ext_80387_constants (void)
9398 static const char * cst[5] =
9400 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
9401 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
9402 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
9403 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
9404 "3.1415926535897932385128089594061862044", /* 4: fldpi */
9406 int i;
9408 for (i = 0; i < 5; i++)
9410 real_from_string (&ext_80387_constants_table[i], cst[i]);
9411 /* Ensure each constant is rounded to XFmode precision. */
9412 real_convert (&ext_80387_constants_table[i],
9413 XFmode, &ext_80387_constants_table[i]);
9416 ext_80387_constants_init = 1;
9419 /* Return non-zero if the constant is something that
9420 can be loaded with a special instruction. */
9423 standard_80387_constant_p (rtx x)
9425 machine_mode mode = GET_MODE (x);
9427 REAL_VALUE_TYPE r;
9429 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
9430 return -1;
9432 if (x == CONST0_RTX (mode))
9433 return 1;
9434 if (x == CONST1_RTX (mode))
9435 return 2;
9437 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9439 /* For XFmode constants, try to find a special 80387 instruction when
9440 optimizing for size or on those CPUs that benefit from them. */
9441 if (mode == XFmode
9442 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
9444 int i;
9446 if (! ext_80387_constants_init)
9447 init_ext_80387_constants ();
9449 for (i = 0; i < 5; i++)
9450 if (real_identical (&r, &ext_80387_constants_table[i]))
9451 return i + 3;
9454 /* Load of the constant -0.0 or -1.0 will be split as
9455 fldz;fchs or fld1;fchs sequence. */
9456 if (real_isnegzero (&r))
9457 return 8;
9458 if (real_identical (&r, &dconstm1))
9459 return 9;
9461 return 0;
9464 /* Return the opcode of the special instruction to be used to load
9465 the constant X. */
9467 const char *
9468 standard_80387_constant_opcode (rtx x)
9470 switch (standard_80387_constant_p (x))
9472 case 1:
9473 return "fldz";
9474 case 2:
9475 return "fld1";
9476 case 3:
9477 return "fldlg2";
9478 case 4:
9479 return "fldln2";
9480 case 5:
9481 return "fldl2e";
9482 case 6:
9483 return "fldl2t";
9484 case 7:
9485 return "fldpi";
9486 case 8:
9487 case 9:
9488 return "#";
9489 default:
9490 gcc_unreachable ();
9494 /* Return the CONST_DOUBLE representing the 80387 constant that is
9495 loaded by the specified special instruction. The argument IDX
9496 matches the return value from standard_80387_constant_p. */
9499 standard_80387_constant_rtx (int idx)
9501 int i;
9503 if (! ext_80387_constants_init)
9504 init_ext_80387_constants ();
9506 switch (idx)
9508 case 3:
9509 case 4:
9510 case 5:
9511 case 6:
9512 case 7:
9513 i = idx - 3;
9514 break;
9516 default:
9517 gcc_unreachable ();
9520 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9521 XFmode);
9524 /* Return 1 if X is all 0s and 2 if x is all 1s
9525 in supported SSE/AVX vector mode. */
9528 standard_sse_constant_p (rtx x)
9530 machine_mode mode;
9532 if (!TARGET_SSE)
9533 return 0;
9535 mode = GET_MODE (x);
9537 if (x == const0_rtx || x == CONST0_RTX (mode))
9538 return 1;
9539 if (vector_all_ones_operand (x, mode))
9540 switch (mode)
9542 case V16QImode:
9543 case V8HImode:
9544 case V4SImode:
9545 case V2DImode:
9546 if (TARGET_SSE2)
9547 return 2;
9548 case V32QImode:
9549 case V16HImode:
9550 case V8SImode:
9551 case V4DImode:
9552 if (TARGET_AVX2)
9553 return 2;
9554 case V64QImode:
9555 case V32HImode:
9556 case V16SImode:
9557 case V8DImode:
9558 if (TARGET_AVX512F)
9559 return 2;
9560 default:
9561 break;
9564 return 0;
9567 /* Return the opcode of the special instruction to be used to load
9568 the constant X. */
9570 const char *
9571 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9573 switch (standard_sse_constant_p (x))
9575 case 1:
9576 switch (get_attr_mode (insn))
9578 case MODE_XI:
9579 return "vpxord\t%g0, %g0, %g0";
9580 case MODE_V16SF:
9581 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9582 : "vpxord\t%g0, %g0, %g0";
9583 case MODE_V8DF:
9584 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9585 : "vpxorq\t%g0, %g0, %g0";
9586 case MODE_TI:
9587 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9588 : "%vpxor\t%0, %d0";
9589 case MODE_V2DF:
9590 return "%vxorpd\t%0, %d0";
9591 case MODE_V4SF:
9592 return "%vxorps\t%0, %d0";
9594 case MODE_OI:
9595 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9596 : "vpxor\t%x0, %x0, %x0";
9597 case MODE_V4DF:
9598 return "vxorpd\t%x0, %x0, %x0";
9599 case MODE_V8SF:
9600 return "vxorps\t%x0, %x0, %x0";
9602 default:
9603 break;
9606 case 2:
9607 if (TARGET_AVX512VL
9608 || get_attr_mode (insn) == MODE_XI
9609 || get_attr_mode (insn) == MODE_V8DF
9610 || get_attr_mode (insn) == MODE_V16SF)
9611 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9612 if (TARGET_AVX)
9613 return "vpcmpeqd\t%0, %0, %0";
9614 else
9615 return "pcmpeqd\t%0, %0";
9617 default:
9618 break;
9620 gcc_unreachable ();
9623 /* Returns true if OP contains a symbol reference */
9625 bool
9626 symbolic_reference_mentioned_p (rtx op)
9628 const char *fmt;
9629 int i;
9631 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9632 return true;
9634 fmt = GET_RTX_FORMAT (GET_CODE (op));
9635 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9637 if (fmt[i] == 'E')
9639 int j;
9641 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9642 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9643 return true;
9646 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9647 return true;
9650 return false;
9653 /* Return true if it is appropriate to emit `ret' instructions in the
9654 body of a function. Do this only if the epilogue is simple, needing a
9655 couple of insns. Prior to reloading, we can't tell how many registers
9656 must be saved, so return false then. Return false if there is no frame
9657 marker to de-allocate. */
9659 bool
9660 ix86_can_use_return_insn_p (void)
9662 struct ix86_frame frame;
9664 if (! reload_completed || frame_pointer_needed)
9665 return 0;
9667 /* Don't allow more than 32k pop, since that's all we can do
9668 with one instruction. */
9669 if (crtl->args.pops_args && crtl->args.size >= 32768)
9670 return 0;
9672 ix86_compute_frame_layout (&frame);
9673 return (frame.stack_pointer_offset == UNITS_PER_WORD
9674 && (frame.nregs + frame.nsseregs) == 0);
9677 /* Value should be nonzero if functions must have frame pointers.
9678 Zero means the frame pointer need not be set up (and parms may
9679 be accessed via the stack pointer) in functions that seem suitable. */
9681 static bool
9682 ix86_frame_pointer_required (void)
9684 /* If we accessed previous frames, then the generated code expects
9685 to be able to access the saved ebp value in our frame. */
9686 if (cfun->machine->accesses_prev_frame)
9687 return true;
9689 /* Several x86 os'es need a frame pointer for other reasons,
9690 usually pertaining to setjmp. */
9691 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9692 return true;
9694 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9695 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9696 return true;
9698 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9699 allocation is 4GB. */
9700 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9701 return true;
9703 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9704 turns off the frame pointer by default. Turn it back on now if
9705 we've not got a leaf function. */
9706 if (TARGET_OMIT_LEAF_FRAME_POINTER
9707 && (!crtl->is_leaf
9708 || ix86_current_function_calls_tls_descriptor))
9709 return true;
9711 if (crtl->profile && !flag_fentry)
9712 return true;
9714 return false;
9717 /* Record that the current function accesses previous call frames. */
9719 void
9720 ix86_setup_frame_addresses (void)
9722 cfun->machine->accesses_prev_frame = 1;
9725 #ifndef USE_HIDDEN_LINKONCE
9726 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9727 # define USE_HIDDEN_LINKONCE 1
9728 # else
9729 # define USE_HIDDEN_LINKONCE 0
9730 # endif
9731 #endif
9733 static int pic_labels_used;
9735 /* Fills in the label name that should be used for a pc thunk for
9736 the given register. */
9738 static void
9739 get_pc_thunk_name (char name[32], unsigned int regno)
9741 gcc_assert (!TARGET_64BIT);
9743 if (USE_HIDDEN_LINKONCE)
9744 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9745 else
9746 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9750 /* This function generates code for -fpic that loads %ebx with
9751 the return address of the caller and then returns. */
9753 static void
9754 ix86_code_end (void)
9756 rtx xops[2];
9757 int regno;
9759 for (regno = AX_REG; regno <= SP_REG; regno++)
9761 char name[32];
9762 tree decl;
9764 if (!(pic_labels_used & (1 << regno)))
9765 continue;
9767 get_pc_thunk_name (name, regno);
9769 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9770 get_identifier (name),
9771 build_function_type_list (void_type_node, NULL_TREE));
9772 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9773 NULL_TREE, void_type_node);
9774 TREE_PUBLIC (decl) = 1;
9775 TREE_STATIC (decl) = 1;
9776 DECL_IGNORED_P (decl) = 1;
9778 #if TARGET_MACHO
9779 if (TARGET_MACHO)
9781 switch_to_section (darwin_sections[text_coal_section]);
9782 fputs ("\t.weak_definition\t", asm_out_file);
9783 assemble_name (asm_out_file, name);
9784 fputs ("\n\t.private_extern\t", asm_out_file);
9785 assemble_name (asm_out_file, name);
9786 putc ('\n', asm_out_file);
9787 ASM_OUTPUT_LABEL (asm_out_file, name);
9788 DECL_WEAK (decl) = 1;
9790 else
9791 #endif
9792 if (USE_HIDDEN_LINKONCE)
9794 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9796 targetm.asm_out.unique_section (decl, 0);
9797 switch_to_section (get_named_section (decl, NULL, 0));
9799 targetm.asm_out.globalize_label (asm_out_file, name);
9800 fputs ("\t.hidden\t", asm_out_file);
9801 assemble_name (asm_out_file, name);
9802 putc ('\n', asm_out_file);
9803 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9805 else
9807 switch_to_section (text_section);
9808 ASM_OUTPUT_LABEL (asm_out_file, name);
9811 DECL_INITIAL (decl) = make_node (BLOCK);
9812 current_function_decl = decl;
9813 init_function_start (decl);
9814 first_function_block_is_cold = false;
9815 /* Make sure unwind info is emitted for the thunk if needed. */
9816 final_start_function (emit_barrier (), asm_out_file, 1);
9818 /* Pad stack IP move with 4 instructions (two NOPs count
9819 as one instruction). */
9820 if (TARGET_PAD_SHORT_FUNCTION)
9822 int i = 8;
9824 while (i--)
9825 fputs ("\tnop\n", asm_out_file);
9828 xops[0] = gen_rtx_REG (Pmode, regno);
9829 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9830 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9831 output_asm_insn ("%!ret", NULL);
9832 final_end_function ();
9833 init_insn_lengths ();
9834 free_after_compilation (cfun);
9835 set_cfun (NULL);
9836 current_function_decl = NULL;
9839 if (flag_split_stack)
9840 file_end_indicate_split_stack ();
9843 /* Emit code for the SET_GOT patterns. */
9845 const char *
9846 output_set_got (rtx dest, rtx label)
9848 rtx xops[3];
9850 xops[0] = dest;
9852 if (TARGET_VXWORKS_RTP && flag_pic)
9854 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9855 xops[2] = gen_rtx_MEM (Pmode,
9856 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9857 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9859 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9860 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9861 an unadorned address. */
9862 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9863 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9864 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9865 return "";
9868 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9870 if (!flag_pic)
9872 if (TARGET_MACHO)
9873 /* We don't need a pic base, we're not producing pic. */
9874 gcc_unreachable ();
9876 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9877 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9878 targetm.asm_out.internal_label (asm_out_file, "L",
9879 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9881 else
9883 char name[32];
9884 get_pc_thunk_name (name, REGNO (dest));
9885 pic_labels_used |= 1 << REGNO (dest);
9887 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9888 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9889 output_asm_insn ("%!call\t%X2", xops);
9891 #if TARGET_MACHO
9892 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9893 This is what will be referenced by the Mach-O PIC subsystem. */
9894 if (machopic_should_output_picbase_label () || !label)
9895 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9897 /* When we are restoring the pic base at the site of a nonlocal label,
9898 and we decided to emit the pic base above, we will still output a
9899 local label used for calculating the correction offset (even though
9900 the offset will be 0 in that case). */
9901 if (label)
9902 targetm.asm_out.internal_label (asm_out_file, "L",
9903 CODE_LABEL_NUMBER (label));
9904 #endif
9907 if (!TARGET_MACHO)
9908 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9910 return "";
9913 /* Generate an "push" pattern for input ARG. */
9915 static rtx
9916 gen_push (rtx arg)
9918 struct machine_function *m = cfun->machine;
9920 if (m->fs.cfa_reg == stack_pointer_rtx)
9921 m->fs.cfa_offset += UNITS_PER_WORD;
9922 m->fs.sp_offset += UNITS_PER_WORD;
9924 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9925 arg = gen_rtx_REG (word_mode, REGNO (arg));
9927 return gen_rtx_SET (gen_rtx_MEM (word_mode,
9928 gen_rtx_PRE_DEC (Pmode,
9929 stack_pointer_rtx)),
9930 arg);
9933 /* Generate an "pop" pattern for input ARG. */
9935 static rtx
9936 gen_pop (rtx arg)
9938 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9939 arg = gen_rtx_REG (word_mode, REGNO (arg));
9941 return gen_rtx_SET (arg,
9942 gen_rtx_MEM (word_mode,
9943 gen_rtx_POST_INC (Pmode,
9944 stack_pointer_rtx)));
9947 /* Return >= 0 if there is an unused call-clobbered register available
9948 for the entire function. */
9950 static unsigned int
9951 ix86_select_alt_pic_regnum (void)
9953 if (ix86_use_pseudo_pic_reg ())
9954 return INVALID_REGNUM;
9956 if (crtl->is_leaf
9957 && !crtl->profile
9958 && !ix86_current_function_calls_tls_descriptor)
9960 int i, drap;
9961 /* Can't use the same register for both PIC and DRAP. */
9962 if (crtl->drap_reg)
9963 drap = REGNO (crtl->drap_reg);
9964 else
9965 drap = -1;
9966 for (i = 2; i >= 0; --i)
9967 if (i != drap && !df_regs_ever_live_p (i))
9968 return i;
9971 return INVALID_REGNUM;
9974 /* Return TRUE if we need to save REGNO. */
9976 static bool
9977 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9979 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
9980 && pic_offset_table_rtx)
9982 if (ix86_use_pseudo_pic_reg ())
9984 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
9985 _mcount in prologue. */
9986 if (!TARGET_64BIT && flag_pic && crtl->profile)
9987 return true;
9989 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9990 || crtl->profile
9991 || crtl->calls_eh_return
9992 || crtl->uses_const_pool
9993 || cfun->has_nonlocal_label)
9994 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9997 if (crtl->calls_eh_return && maybe_eh_return)
9999 unsigned i;
10000 for (i = 0; ; i++)
10002 unsigned test = EH_RETURN_DATA_REGNO (i);
10003 if (test == INVALID_REGNUM)
10004 break;
10005 if (test == regno)
10006 return true;
10010 if (crtl->drap_reg
10011 && regno == REGNO (crtl->drap_reg)
10012 && !cfun->machine->no_drap_save_restore)
10013 return true;
10015 return (df_regs_ever_live_p (regno)
10016 && !call_used_regs[regno]
10017 && !fixed_regs[regno]
10018 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
10021 /* Return number of saved general prupose registers. */
10023 static int
10024 ix86_nsaved_regs (void)
10026 int nregs = 0;
10027 int regno;
10029 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10030 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10031 nregs ++;
10032 return nregs;
10035 /* Return number of saved SSE registrers. */
10037 static int
10038 ix86_nsaved_sseregs (void)
10040 int nregs = 0;
10041 int regno;
10043 if (!TARGET_64BIT_MS_ABI)
10044 return 0;
10045 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10046 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10047 nregs ++;
10048 return nregs;
10051 /* Given FROM and TO register numbers, say whether this elimination is
10052 allowed. If stack alignment is needed, we can only replace argument
10053 pointer with hard frame pointer, or replace frame pointer with stack
10054 pointer. Otherwise, frame pointer elimination is automatically
10055 handled and all other eliminations are valid. */
10057 static bool
10058 ix86_can_eliminate (const int from, const int to)
10060 if (stack_realign_fp)
10061 return ((from == ARG_POINTER_REGNUM
10062 && to == HARD_FRAME_POINTER_REGNUM)
10063 || (from == FRAME_POINTER_REGNUM
10064 && to == STACK_POINTER_REGNUM));
10065 else
10066 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
10069 /* Return the offset between two registers, one to be eliminated, and the other
10070 its replacement, at the start of a routine. */
10072 HOST_WIDE_INT
10073 ix86_initial_elimination_offset (int from, int to)
10075 struct ix86_frame frame;
10076 ix86_compute_frame_layout (&frame);
10078 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10079 return frame.hard_frame_pointer_offset;
10080 else if (from == FRAME_POINTER_REGNUM
10081 && to == HARD_FRAME_POINTER_REGNUM)
10082 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
10083 else
10085 gcc_assert (to == STACK_POINTER_REGNUM);
10087 if (from == ARG_POINTER_REGNUM)
10088 return frame.stack_pointer_offset;
10090 gcc_assert (from == FRAME_POINTER_REGNUM);
10091 return frame.stack_pointer_offset - frame.frame_pointer_offset;
10095 /* In a dynamically-aligned function, we can't know the offset from
10096 stack pointer to frame pointer, so we must ensure that setjmp
10097 eliminates fp against the hard fp (%ebp) rather than trying to
10098 index from %esp up to the top of the frame across a gap that is
10099 of unknown (at compile-time) size. */
10100 static rtx
10101 ix86_builtin_setjmp_frame_value (void)
10103 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
10106 /* When using -fsplit-stack, the allocation routines set a field in
10107 the TCB to the bottom of the stack plus this much space, measured
10108 in bytes. */
10110 #define SPLIT_STACK_AVAILABLE 256
10112 /* Fill structure ix86_frame about frame of currently computed function. */
10114 static void
10115 ix86_compute_frame_layout (struct ix86_frame *frame)
10117 unsigned HOST_WIDE_INT stack_alignment_needed;
10118 HOST_WIDE_INT offset;
10119 unsigned HOST_WIDE_INT preferred_alignment;
10120 HOST_WIDE_INT size = get_frame_size ();
10121 HOST_WIDE_INT to_allocate;
10123 frame->nregs = ix86_nsaved_regs ();
10124 frame->nsseregs = ix86_nsaved_sseregs ();
10126 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
10127 function prologues and leaf. */
10128 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
10129 && (!crtl->is_leaf || cfun->calls_alloca != 0
10130 || ix86_current_function_calls_tls_descriptor))
10132 crtl->preferred_stack_boundary = 128;
10133 crtl->stack_alignment_needed = 128;
10135 /* preferred_stack_boundary is never updated for call
10136 expanded from tls descriptor. Update it here. We don't update it in
10137 expand stage because according to the comments before
10138 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
10139 away. */
10140 else if (ix86_current_function_calls_tls_descriptor
10141 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
10143 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
10144 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
10145 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
10148 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
10149 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
10151 gcc_assert (!size || stack_alignment_needed);
10152 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
10153 gcc_assert (preferred_alignment <= stack_alignment_needed);
10155 /* For SEH we have to limit the amount of code movement into the prologue.
10156 At present we do this via a BLOCKAGE, at which point there's very little
10157 scheduling that can be done, which means that there's very little point
10158 in doing anything except PUSHs. */
10159 if (TARGET_SEH)
10160 cfun->machine->use_fast_prologue_epilogue = false;
10162 /* During reload iteration the amount of registers saved can change.
10163 Recompute the value as needed. Do not recompute when amount of registers
10164 didn't change as reload does multiple calls to the function and does not
10165 expect the decision to change within single iteration. */
10166 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
10167 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
10169 int count = frame->nregs;
10170 struct cgraph_node *node = cgraph_node::get (current_function_decl);
10172 cfun->machine->use_fast_prologue_epilogue_nregs = count;
10174 /* The fast prologue uses move instead of push to save registers. This
10175 is significantly longer, but also executes faster as modern hardware
10176 can execute the moves in parallel, but can't do that for push/pop.
10178 Be careful about choosing what prologue to emit: When function takes
10179 many instructions to execute we may use slow version as well as in
10180 case function is known to be outside hot spot (this is known with
10181 feedback only). Weight the size of function by number of registers
10182 to save as it is cheap to use one or two push instructions but very
10183 slow to use many of them. */
10184 if (count)
10185 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
10186 if (node->frequency < NODE_FREQUENCY_NORMAL
10187 || (flag_branch_probabilities
10188 && node->frequency < NODE_FREQUENCY_HOT))
10189 cfun->machine->use_fast_prologue_epilogue = false;
10190 else
10191 cfun->machine->use_fast_prologue_epilogue
10192 = !expensive_function_p (count);
10195 frame->save_regs_using_mov
10196 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
10197 /* If static stack checking is enabled and done with probes,
10198 the registers need to be saved before allocating the frame. */
10199 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
10201 /* Skip return address. */
10202 offset = UNITS_PER_WORD;
10204 /* Skip pushed static chain. */
10205 if (ix86_static_chain_on_stack)
10206 offset += UNITS_PER_WORD;
10208 /* Skip saved base pointer. */
10209 if (frame_pointer_needed)
10210 offset += UNITS_PER_WORD;
10211 frame->hfp_save_offset = offset;
10213 /* The traditional frame pointer location is at the top of the frame. */
10214 frame->hard_frame_pointer_offset = offset;
10216 /* Register save area */
10217 offset += frame->nregs * UNITS_PER_WORD;
10218 frame->reg_save_offset = offset;
10220 /* On SEH target, registers are pushed just before the frame pointer
10221 location. */
10222 if (TARGET_SEH)
10223 frame->hard_frame_pointer_offset = offset;
10225 /* Align and set SSE register save area. */
10226 if (frame->nsseregs)
10228 /* The only ABI that has saved SSE registers (Win64) also has a
10229 16-byte aligned default stack, and thus we don't need to be
10230 within the re-aligned local stack frame to save them. */
10231 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
10232 offset = (offset + 16 - 1) & -16;
10233 offset += frame->nsseregs * 16;
10235 frame->sse_reg_save_offset = offset;
10237 /* The re-aligned stack starts here. Values before this point are not
10238 directly comparable with values below this point. In order to make
10239 sure that no value happens to be the same before and after, force
10240 the alignment computation below to add a non-zero value. */
10241 if (stack_realign_fp)
10242 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
10244 /* Va-arg area */
10245 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
10246 offset += frame->va_arg_size;
10248 /* Align start of frame for local function. */
10249 if (stack_realign_fp
10250 || offset != frame->sse_reg_save_offset
10251 || size != 0
10252 || !crtl->is_leaf
10253 || cfun->calls_alloca
10254 || ix86_current_function_calls_tls_descriptor)
10255 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10257 /* Frame pointer points here. */
10258 frame->frame_pointer_offset = offset;
10260 offset += size;
10262 /* Add outgoing arguments area. Can be skipped if we eliminated
10263 all the function calls as dead code.
10264 Skipping is however impossible when function calls alloca. Alloca
10265 expander assumes that last crtl->outgoing_args_size
10266 of stack frame are unused. */
10267 if (ACCUMULATE_OUTGOING_ARGS
10268 && (!crtl->is_leaf || cfun->calls_alloca
10269 || ix86_current_function_calls_tls_descriptor))
10271 offset += crtl->outgoing_args_size;
10272 frame->outgoing_arguments_size = crtl->outgoing_args_size;
10274 else
10275 frame->outgoing_arguments_size = 0;
10277 /* Align stack boundary. Only needed if we're calling another function
10278 or using alloca. */
10279 if (!crtl->is_leaf || cfun->calls_alloca
10280 || ix86_current_function_calls_tls_descriptor)
10281 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
10283 /* We've reached end of stack frame. */
10284 frame->stack_pointer_offset = offset;
10286 /* Size prologue needs to allocate. */
10287 to_allocate = offset - frame->sse_reg_save_offset;
10289 if ((!to_allocate && frame->nregs <= 1)
10290 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
10291 frame->save_regs_using_mov = false;
10293 if (ix86_using_red_zone ()
10294 && crtl->sp_is_unchanging
10295 && crtl->is_leaf
10296 && !ix86_current_function_calls_tls_descriptor)
10298 frame->red_zone_size = to_allocate;
10299 if (frame->save_regs_using_mov)
10300 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
10301 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
10302 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
10304 else
10305 frame->red_zone_size = 0;
10306 frame->stack_pointer_offset -= frame->red_zone_size;
10308 /* The SEH frame pointer location is near the bottom of the frame.
10309 This is enforced by the fact that the difference between the
10310 stack pointer and the frame pointer is limited to 240 bytes in
10311 the unwind data structure. */
10312 if (TARGET_SEH)
10314 HOST_WIDE_INT diff;
10316 /* If we can leave the frame pointer where it is, do so. Also, returns
10317 the establisher frame for __builtin_frame_address (0). */
10318 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
10319 if (diff <= SEH_MAX_FRAME_SIZE
10320 && (diff > 240 || (diff & 15) != 0)
10321 && !crtl->accesses_prior_frames)
10323 /* Ideally we'd determine what portion of the local stack frame
10324 (within the constraint of the lowest 240) is most heavily used.
10325 But without that complication, simply bias the frame pointer
10326 by 128 bytes so as to maximize the amount of the local stack
10327 frame that is addressable with 8-bit offsets. */
10328 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
10333 /* This is semi-inlined memory_address_length, but simplified
10334 since we know that we're always dealing with reg+offset, and
10335 to avoid having to create and discard all that rtl. */
10337 static inline int
10338 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
10340 int len = 4;
10342 if (offset == 0)
10344 /* EBP and R13 cannot be encoded without an offset. */
10345 len = (regno == BP_REG || regno == R13_REG);
10347 else if (IN_RANGE (offset, -128, 127))
10348 len = 1;
10350 /* ESP and R12 must be encoded with a SIB byte. */
10351 if (regno == SP_REG || regno == R12_REG)
10352 len++;
10354 return len;
10357 /* Return an RTX that points to CFA_OFFSET within the stack frame.
10358 The valid base registers are taken from CFUN->MACHINE->FS. */
10360 static rtx
10361 choose_baseaddr (HOST_WIDE_INT cfa_offset)
10363 const struct machine_function *m = cfun->machine;
10364 rtx base_reg = NULL;
10365 HOST_WIDE_INT base_offset = 0;
10367 if (m->use_fast_prologue_epilogue)
10369 /* Choose the base register most likely to allow the most scheduling
10370 opportunities. Generally FP is valid throughout the function,
10371 while DRAP must be reloaded within the epilogue. But choose either
10372 over the SP due to increased encoding size. */
10374 if (m->fs.fp_valid)
10376 base_reg = hard_frame_pointer_rtx;
10377 base_offset = m->fs.fp_offset - cfa_offset;
10379 else if (m->fs.drap_valid)
10381 base_reg = crtl->drap_reg;
10382 base_offset = 0 - cfa_offset;
10384 else if (m->fs.sp_valid)
10386 base_reg = stack_pointer_rtx;
10387 base_offset = m->fs.sp_offset - cfa_offset;
10390 else
10392 HOST_WIDE_INT toffset;
10393 int len = 16, tlen;
10395 /* Choose the base register with the smallest address encoding.
10396 With a tie, choose FP > DRAP > SP. */
10397 if (m->fs.sp_valid)
10399 base_reg = stack_pointer_rtx;
10400 base_offset = m->fs.sp_offset - cfa_offset;
10401 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
10403 if (m->fs.drap_valid)
10405 toffset = 0 - cfa_offset;
10406 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
10407 if (tlen <= len)
10409 base_reg = crtl->drap_reg;
10410 base_offset = toffset;
10411 len = tlen;
10414 if (m->fs.fp_valid)
10416 toffset = m->fs.fp_offset - cfa_offset;
10417 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
10418 if (tlen <= len)
10420 base_reg = hard_frame_pointer_rtx;
10421 base_offset = toffset;
10422 len = tlen;
10426 gcc_assert (base_reg != NULL);
10428 return plus_constant (Pmode, base_reg, base_offset);
10431 /* Emit code to save registers in the prologue. */
10433 static void
10434 ix86_emit_save_regs (void)
10436 unsigned int regno;
10437 rtx_insn *insn;
10439 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
10440 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10442 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
10443 RTX_FRAME_RELATED_P (insn) = 1;
10447 /* Emit a single register save at CFA - CFA_OFFSET. */
10449 static void
10450 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
10451 HOST_WIDE_INT cfa_offset)
10453 struct machine_function *m = cfun->machine;
10454 rtx reg = gen_rtx_REG (mode, regno);
10455 rtx mem, addr, base, insn;
10457 addr = choose_baseaddr (cfa_offset);
10458 mem = gen_frame_mem (mode, addr);
10460 /* For SSE saves, we need to indicate the 128-bit alignment. */
10461 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
10463 insn = emit_move_insn (mem, reg);
10464 RTX_FRAME_RELATED_P (insn) = 1;
10466 base = addr;
10467 if (GET_CODE (base) == PLUS)
10468 base = XEXP (base, 0);
10469 gcc_checking_assert (REG_P (base));
10471 /* When saving registers into a re-aligned local stack frame, avoid
10472 any tricky guessing by dwarf2out. */
10473 if (m->fs.realigned)
10475 gcc_checking_assert (stack_realign_drap);
10477 if (regno == REGNO (crtl->drap_reg))
10479 /* A bit of a hack. We force the DRAP register to be saved in
10480 the re-aligned stack frame, which provides us with a copy
10481 of the CFA that will last past the prologue. Install it. */
10482 gcc_checking_assert (cfun->machine->fs.fp_valid);
10483 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10484 cfun->machine->fs.fp_offset - cfa_offset);
10485 mem = gen_rtx_MEM (mode, addr);
10486 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
10488 else
10490 /* The frame pointer is a stable reference within the
10491 aligned frame. Use it. */
10492 gcc_checking_assert (cfun->machine->fs.fp_valid);
10493 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10494 cfun->machine->fs.fp_offset - cfa_offset);
10495 mem = gen_rtx_MEM (mode, addr);
10496 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
10500 /* The memory may not be relative to the current CFA register,
10501 which means that we may need to generate a new pattern for
10502 use by the unwind info. */
10503 else if (base != m->fs.cfa_reg)
10505 addr = plus_constant (Pmode, m->fs.cfa_reg,
10506 m->fs.cfa_offset - cfa_offset);
10507 mem = gen_rtx_MEM (mode, addr);
10508 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
10512 /* Emit code to save registers using MOV insns.
10513 First register is stored at CFA - CFA_OFFSET. */
10514 static void
10515 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10517 unsigned int regno;
10519 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10520 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10522 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10523 cfa_offset -= UNITS_PER_WORD;
10527 /* Emit code to save SSE registers using MOV insns.
10528 First register is stored at CFA - CFA_OFFSET. */
10529 static void
10530 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10532 unsigned int regno;
10534 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10535 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10537 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10538 cfa_offset -= 16;
10542 static GTY(()) rtx queued_cfa_restores;
10544 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10545 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10546 Don't add the note if the previously saved value will be left untouched
10547 within stack red-zone till return, as unwinders can find the same value
10548 in the register and on the stack. */
10550 static void
10551 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
10553 if (!crtl->shrink_wrapped
10554 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10555 return;
10557 if (insn)
10559 add_reg_note (insn, REG_CFA_RESTORE, reg);
10560 RTX_FRAME_RELATED_P (insn) = 1;
10562 else
10563 queued_cfa_restores
10564 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10567 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10569 static void
10570 ix86_add_queued_cfa_restore_notes (rtx insn)
10572 rtx last;
10573 if (!queued_cfa_restores)
10574 return;
10575 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10577 XEXP (last, 1) = REG_NOTES (insn);
10578 REG_NOTES (insn) = queued_cfa_restores;
10579 queued_cfa_restores = NULL_RTX;
10580 RTX_FRAME_RELATED_P (insn) = 1;
10583 /* Expand prologue or epilogue stack adjustment.
10584 The pattern exist to put a dependency on all ebp-based memory accesses.
10585 STYLE should be negative if instructions should be marked as frame related,
10586 zero if %r11 register is live and cannot be freely used and positive
10587 otherwise. */
10589 static void
10590 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10591 int style, bool set_cfa)
10593 struct machine_function *m = cfun->machine;
10594 rtx insn;
10595 bool add_frame_related_expr = false;
10597 if (Pmode == SImode)
10598 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10599 else if (x86_64_immediate_operand (offset, DImode))
10600 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10601 else
10603 rtx tmp;
10604 /* r11 is used by indirect sibcall return as well, set before the
10605 epilogue and used after the epilogue. */
10606 if (style)
10607 tmp = gen_rtx_REG (DImode, R11_REG);
10608 else
10610 gcc_assert (src != hard_frame_pointer_rtx
10611 && dest != hard_frame_pointer_rtx);
10612 tmp = hard_frame_pointer_rtx;
10614 insn = emit_insn (gen_rtx_SET (tmp, offset));
10615 if (style < 0)
10616 add_frame_related_expr = true;
10618 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10621 insn = emit_insn (insn);
10622 if (style >= 0)
10623 ix86_add_queued_cfa_restore_notes (insn);
10625 if (set_cfa)
10627 rtx r;
10629 gcc_assert (m->fs.cfa_reg == src);
10630 m->fs.cfa_offset += INTVAL (offset);
10631 m->fs.cfa_reg = dest;
10633 r = gen_rtx_PLUS (Pmode, src, offset);
10634 r = gen_rtx_SET (dest, r);
10635 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10636 RTX_FRAME_RELATED_P (insn) = 1;
10638 else if (style < 0)
10640 RTX_FRAME_RELATED_P (insn) = 1;
10641 if (add_frame_related_expr)
10643 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10644 r = gen_rtx_SET (dest, r);
10645 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10649 if (dest == stack_pointer_rtx)
10651 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10652 bool valid = m->fs.sp_valid;
10654 if (src == hard_frame_pointer_rtx)
10656 valid = m->fs.fp_valid;
10657 ooffset = m->fs.fp_offset;
10659 else if (src == crtl->drap_reg)
10661 valid = m->fs.drap_valid;
10662 ooffset = 0;
10664 else
10666 /* Else there are two possibilities: SP itself, which we set
10667 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10668 taken care of this by hand along the eh_return path. */
10669 gcc_checking_assert (src == stack_pointer_rtx
10670 || offset == const0_rtx);
10673 m->fs.sp_offset = ooffset - INTVAL (offset);
10674 m->fs.sp_valid = valid;
10678 /* Find an available register to be used as dynamic realign argument
10679 pointer regsiter. Such a register will be written in prologue and
10680 used in begin of body, so it must not be
10681 1. parameter passing register.
10682 2. GOT pointer.
10683 We reuse static-chain register if it is available. Otherwise, we
10684 use DI for i386 and R13 for x86-64. We chose R13 since it has
10685 shorter encoding.
10687 Return: the regno of chosen register. */
10689 static unsigned int
10690 find_drap_reg (void)
10692 tree decl = cfun->decl;
10694 if (TARGET_64BIT)
10696 /* Use R13 for nested function or function need static chain.
10697 Since function with tail call may use any caller-saved
10698 registers in epilogue, DRAP must not use caller-saved
10699 register in such case. */
10700 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10701 return R13_REG;
10703 return R10_REG;
10705 else
10707 /* Use DI for nested function or function need static chain.
10708 Since function with tail call may use any caller-saved
10709 registers in epilogue, DRAP must not use caller-saved
10710 register in such case. */
10711 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10712 return DI_REG;
10714 /* Reuse static chain register if it isn't used for parameter
10715 passing. */
10716 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10718 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10719 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10720 return CX_REG;
10722 return DI_REG;
10726 /* Return minimum incoming stack alignment. */
10728 static unsigned int
10729 ix86_minimum_incoming_stack_boundary (bool sibcall)
10731 unsigned int incoming_stack_boundary;
10733 /* Prefer the one specified at command line. */
10734 if (ix86_user_incoming_stack_boundary)
10735 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10736 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10737 if -mstackrealign is used, it isn't used for sibcall check and
10738 estimated stack alignment is 128bit. */
10739 else if (!sibcall
10740 && !TARGET_64BIT
10741 && ix86_force_align_arg_pointer
10742 && crtl->stack_alignment_estimated == 128)
10743 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10744 else
10745 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10747 /* Incoming stack alignment can be changed on individual functions
10748 via force_align_arg_pointer attribute. We use the smallest
10749 incoming stack boundary. */
10750 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10751 && lookup_attribute (ix86_force_align_arg_pointer_string,
10752 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10753 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10755 /* The incoming stack frame has to be aligned at least at
10756 parm_stack_boundary. */
10757 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10758 incoming_stack_boundary = crtl->parm_stack_boundary;
10760 /* Stack at entrance of main is aligned by runtime. We use the
10761 smallest incoming stack boundary. */
10762 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10763 && DECL_NAME (current_function_decl)
10764 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10765 && DECL_FILE_SCOPE_P (current_function_decl))
10766 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10768 return incoming_stack_boundary;
10771 /* Update incoming stack boundary and estimated stack alignment. */
10773 static void
10774 ix86_update_stack_boundary (void)
10776 ix86_incoming_stack_boundary
10777 = ix86_minimum_incoming_stack_boundary (false);
10779 /* x86_64 vararg needs 16byte stack alignment for register save
10780 area. */
10781 if (TARGET_64BIT
10782 && cfun->stdarg
10783 && crtl->stack_alignment_estimated < 128)
10784 crtl->stack_alignment_estimated = 128;
10787 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10788 needed or an rtx for DRAP otherwise. */
10790 static rtx
10791 ix86_get_drap_rtx (void)
10793 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10794 crtl->need_drap = true;
10796 if (stack_realign_drap)
10798 /* Assign DRAP to vDRAP and returns vDRAP */
10799 unsigned int regno = find_drap_reg ();
10800 rtx drap_vreg;
10801 rtx arg_ptr;
10802 rtx_insn *seq, *insn;
10804 arg_ptr = gen_rtx_REG (Pmode, regno);
10805 crtl->drap_reg = arg_ptr;
10807 start_sequence ();
10808 drap_vreg = copy_to_reg (arg_ptr);
10809 seq = get_insns ();
10810 end_sequence ();
10812 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10813 if (!optimize)
10815 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10816 RTX_FRAME_RELATED_P (insn) = 1;
10818 return drap_vreg;
10820 else
10821 return NULL;
10824 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10826 static rtx
10827 ix86_internal_arg_pointer (void)
10829 return virtual_incoming_args_rtx;
10832 struct scratch_reg {
10833 rtx reg;
10834 bool saved;
10837 /* Return a short-lived scratch register for use on function entry.
10838 In 32-bit mode, it is valid only after the registers are saved
10839 in the prologue. This register must be released by means of
10840 release_scratch_register_on_entry once it is dead. */
10842 static void
10843 get_scratch_register_on_entry (struct scratch_reg *sr)
10845 int regno;
10847 sr->saved = false;
10849 if (TARGET_64BIT)
10851 /* We always use R11 in 64-bit mode. */
10852 regno = R11_REG;
10854 else
10856 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10857 bool fastcall_p
10858 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10859 bool thiscall_p
10860 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10861 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10862 int regparm = ix86_function_regparm (fntype, decl);
10863 int drap_regno
10864 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10866 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10867 for the static chain register. */
10868 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10869 && drap_regno != AX_REG)
10870 regno = AX_REG;
10871 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10872 for the static chain register. */
10873 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10874 regno = AX_REG;
10875 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10876 regno = DX_REG;
10877 /* ecx is the static chain register. */
10878 else if (regparm < 3 && !fastcall_p && !thiscall_p
10879 && !static_chain_p
10880 && drap_regno != CX_REG)
10881 regno = CX_REG;
10882 else if (ix86_save_reg (BX_REG, true))
10883 regno = BX_REG;
10884 /* esi is the static chain register. */
10885 else if (!(regparm == 3 && static_chain_p)
10886 && ix86_save_reg (SI_REG, true))
10887 regno = SI_REG;
10888 else if (ix86_save_reg (DI_REG, true))
10889 regno = DI_REG;
10890 else
10892 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10893 sr->saved = true;
10897 sr->reg = gen_rtx_REG (Pmode, regno);
10898 if (sr->saved)
10900 rtx_insn *insn = emit_insn (gen_push (sr->reg));
10901 RTX_FRAME_RELATED_P (insn) = 1;
10905 /* Release a scratch register obtained from the preceding function. */
10907 static void
10908 release_scratch_register_on_entry (struct scratch_reg *sr)
10910 if (sr->saved)
10912 struct machine_function *m = cfun->machine;
10913 rtx x, insn = emit_insn (gen_pop (sr->reg));
10915 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10916 RTX_FRAME_RELATED_P (insn) = 1;
10917 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10918 x = gen_rtx_SET (stack_pointer_rtx, x);
10919 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10920 m->fs.sp_offset -= UNITS_PER_WORD;
10924 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10926 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10928 static void
10929 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10931 /* We skip the probe for the first interval + a small dope of 4 words and
10932 probe that many bytes past the specified size to maintain a protection
10933 area at the botton of the stack. */
10934 const int dope = 4 * UNITS_PER_WORD;
10935 rtx size_rtx = GEN_INT (size), last;
10937 /* See if we have a constant small number of probes to generate. If so,
10938 that's the easy case. The run-time loop is made up of 11 insns in the
10939 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10940 for n # of intervals. */
10941 if (size <= 5 * PROBE_INTERVAL)
10943 HOST_WIDE_INT i, adjust;
10944 bool first_probe = true;
10946 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10947 values of N from 1 until it exceeds SIZE. If only one probe is
10948 needed, this will not generate any code. Then adjust and probe
10949 to PROBE_INTERVAL + SIZE. */
10950 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10952 if (first_probe)
10954 adjust = 2 * PROBE_INTERVAL + dope;
10955 first_probe = false;
10957 else
10958 adjust = PROBE_INTERVAL;
10960 emit_insn (gen_rtx_SET (stack_pointer_rtx,
10961 plus_constant (Pmode, stack_pointer_rtx,
10962 -adjust)));
10963 emit_stack_probe (stack_pointer_rtx);
10966 if (first_probe)
10967 adjust = size + PROBE_INTERVAL + dope;
10968 else
10969 adjust = size + PROBE_INTERVAL - i;
10971 emit_insn (gen_rtx_SET (stack_pointer_rtx,
10972 plus_constant (Pmode, stack_pointer_rtx,
10973 -adjust)));
10974 emit_stack_probe (stack_pointer_rtx);
10976 /* Adjust back to account for the additional first interval. */
10977 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
10978 plus_constant (Pmode, stack_pointer_rtx,
10979 PROBE_INTERVAL + dope)));
10982 /* Otherwise, do the same as above, but in a loop. Note that we must be
10983 extra careful with variables wrapping around because we might be at
10984 the very top (or the very bottom) of the address space and we have
10985 to be able to handle this case properly; in particular, we use an
10986 equality test for the loop condition. */
10987 else
10989 HOST_WIDE_INT rounded_size;
10990 struct scratch_reg sr;
10992 get_scratch_register_on_entry (&sr);
10995 /* Step 1: round SIZE to the previous multiple of the interval. */
10997 rounded_size = size & -PROBE_INTERVAL;
11000 /* Step 2: compute initial and final value of the loop counter. */
11002 /* SP = SP_0 + PROBE_INTERVAL. */
11003 emit_insn (gen_rtx_SET (stack_pointer_rtx,
11004 plus_constant (Pmode, stack_pointer_rtx,
11005 - (PROBE_INTERVAL + dope))));
11007 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
11008 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
11009 emit_insn (gen_rtx_SET (sr.reg,
11010 gen_rtx_PLUS (Pmode, sr.reg,
11011 stack_pointer_rtx)));
11014 /* Step 3: the loop
11016 while (SP != LAST_ADDR)
11018 SP = SP + PROBE_INTERVAL
11019 probe at SP
11022 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
11023 values of N from 1 until it is equal to ROUNDED_SIZE. */
11025 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
11028 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
11029 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
11031 if (size != rounded_size)
11033 emit_insn (gen_rtx_SET (stack_pointer_rtx,
11034 plus_constant (Pmode, stack_pointer_rtx,
11035 rounded_size - size)));
11036 emit_stack_probe (stack_pointer_rtx);
11039 /* Adjust back to account for the additional first interval. */
11040 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
11041 plus_constant (Pmode, stack_pointer_rtx,
11042 PROBE_INTERVAL + dope)));
11044 release_scratch_register_on_entry (&sr);
11047 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
11049 /* Even if the stack pointer isn't the CFA register, we need to correctly
11050 describe the adjustments made to it, in particular differentiate the
11051 frame-related ones from the frame-unrelated ones. */
11052 if (size > 0)
11054 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
11055 XVECEXP (expr, 0, 0)
11056 = gen_rtx_SET (stack_pointer_rtx,
11057 plus_constant (Pmode, stack_pointer_rtx, -size));
11058 XVECEXP (expr, 0, 1)
11059 = gen_rtx_SET (stack_pointer_rtx,
11060 plus_constant (Pmode, stack_pointer_rtx,
11061 PROBE_INTERVAL + dope + size));
11062 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
11063 RTX_FRAME_RELATED_P (last) = 1;
11065 cfun->machine->fs.sp_offset += size;
11068 /* Make sure nothing is scheduled before we are done. */
11069 emit_insn (gen_blockage ());
11072 /* Adjust the stack pointer up to REG while probing it. */
11074 const char *
11075 output_adjust_stack_and_probe (rtx reg)
11077 static int labelno = 0;
11078 char loop_lab[32], end_lab[32];
11079 rtx xops[2];
11081 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11082 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11084 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11086 /* Jump to END_LAB if SP == LAST_ADDR. */
11087 xops[0] = stack_pointer_rtx;
11088 xops[1] = reg;
11089 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11090 fputs ("\tje\t", asm_out_file);
11091 assemble_name_raw (asm_out_file, end_lab);
11092 fputc ('\n', asm_out_file);
11094 /* SP = SP + PROBE_INTERVAL. */
11095 xops[1] = GEN_INT (PROBE_INTERVAL);
11096 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11098 /* Probe at SP. */
11099 xops[1] = const0_rtx;
11100 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
11102 fprintf (asm_out_file, "\tjmp\t");
11103 assemble_name_raw (asm_out_file, loop_lab);
11104 fputc ('\n', asm_out_file);
11106 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11108 return "";
11111 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
11112 inclusive. These are offsets from the current stack pointer. */
11114 static void
11115 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
11117 /* See if we have a constant small number of probes to generate. If so,
11118 that's the easy case. The run-time loop is made up of 7 insns in the
11119 generic case while the compile-time loop is made up of n insns for n #
11120 of intervals. */
11121 if (size <= 7 * PROBE_INTERVAL)
11123 HOST_WIDE_INT i;
11125 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
11126 it exceeds SIZE. If only one probe is needed, this will not
11127 generate any code. Then probe at FIRST + SIZE. */
11128 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
11129 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11130 -(first + i)));
11132 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11133 -(first + size)));
11136 /* Otherwise, do the same as above, but in a loop. Note that we must be
11137 extra careful with variables wrapping around because we might be at
11138 the very top (or the very bottom) of the address space and we have
11139 to be able to handle this case properly; in particular, we use an
11140 equality test for the loop condition. */
11141 else
11143 HOST_WIDE_INT rounded_size, last;
11144 struct scratch_reg sr;
11146 get_scratch_register_on_entry (&sr);
11149 /* Step 1: round SIZE to the previous multiple of the interval. */
11151 rounded_size = size & -PROBE_INTERVAL;
11154 /* Step 2: compute initial and final value of the loop counter. */
11156 /* TEST_OFFSET = FIRST. */
11157 emit_move_insn (sr.reg, GEN_INT (-first));
11159 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
11160 last = first + rounded_size;
11163 /* Step 3: the loop
11165 while (TEST_ADDR != LAST_ADDR)
11167 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
11168 probe at TEST_ADDR
11171 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
11172 until it is equal to ROUNDED_SIZE. */
11174 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
11177 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
11178 that SIZE is equal to ROUNDED_SIZE. */
11180 if (size != rounded_size)
11181 emit_stack_probe (plus_constant (Pmode,
11182 gen_rtx_PLUS (Pmode,
11183 stack_pointer_rtx,
11184 sr.reg),
11185 rounded_size - size));
11187 release_scratch_register_on_entry (&sr);
11190 /* Make sure nothing is scheduled before we are done. */
11191 emit_insn (gen_blockage ());
11194 /* Probe a range of stack addresses from REG to END, inclusive. These are
11195 offsets from the current stack pointer. */
11197 const char *
11198 output_probe_stack_range (rtx reg, rtx end)
11200 static int labelno = 0;
11201 char loop_lab[32], end_lab[32];
11202 rtx xops[3];
11204 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11205 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11207 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11209 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
11210 xops[0] = reg;
11211 xops[1] = end;
11212 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11213 fputs ("\tje\t", asm_out_file);
11214 assemble_name_raw (asm_out_file, end_lab);
11215 fputc ('\n', asm_out_file);
11217 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
11218 xops[1] = GEN_INT (PROBE_INTERVAL);
11219 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11221 /* Probe at TEST_ADDR. */
11222 xops[0] = stack_pointer_rtx;
11223 xops[1] = reg;
11224 xops[2] = const0_rtx;
11225 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
11227 fprintf (asm_out_file, "\tjmp\t");
11228 assemble_name_raw (asm_out_file, loop_lab);
11229 fputc ('\n', asm_out_file);
11231 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11233 return "";
11236 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
11237 to be generated in correct form. */
11238 static void
11239 ix86_finalize_stack_realign_flags (void)
11241 /* Check if stack realign is really needed after reload, and
11242 stores result in cfun */
11243 unsigned int incoming_stack_boundary
11244 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
11245 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
11246 unsigned int stack_realign = (incoming_stack_boundary
11247 < (crtl->is_leaf
11248 ? crtl->max_used_stack_slot_alignment
11249 : crtl->stack_alignment_needed));
11251 if (crtl->stack_realign_finalized)
11253 /* After stack_realign_needed is finalized, we can't no longer
11254 change it. */
11255 gcc_assert (crtl->stack_realign_needed == stack_realign);
11256 return;
11259 /* If the only reason for frame_pointer_needed is that we conservatively
11260 assumed stack realignment might be needed, but in the end nothing that
11261 needed the stack alignment had been spilled, clear frame_pointer_needed
11262 and say we don't need stack realignment. */
11263 if (stack_realign
11264 && frame_pointer_needed
11265 && crtl->is_leaf
11266 && flag_omit_frame_pointer
11267 && crtl->sp_is_unchanging
11268 && !ix86_current_function_calls_tls_descriptor
11269 && !crtl->accesses_prior_frames
11270 && !cfun->calls_alloca
11271 && !crtl->calls_eh_return
11272 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
11273 && !ix86_frame_pointer_required ()
11274 && get_frame_size () == 0
11275 && ix86_nsaved_sseregs () == 0
11276 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
11278 HARD_REG_SET set_up_by_prologue, prologue_used;
11279 basic_block bb;
11281 CLEAR_HARD_REG_SET (prologue_used);
11282 CLEAR_HARD_REG_SET (set_up_by_prologue);
11283 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
11284 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
11285 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
11286 HARD_FRAME_POINTER_REGNUM);
11287 FOR_EACH_BB_FN (bb, cfun)
11289 rtx_insn *insn;
11290 FOR_BB_INSNS (bb, insn)
11291 if (NONDEBUG_INSN_P (insn)
11292 && requires_stack_frame_p (insn, prologue_used,
11293 set_up_by_prologue))
11295 crtl->stack_realign_needed = stack_realign;
11296 crtl->stack_realign_finalized = true;
11297 return;
11301 /* If drap has been set, but it actually isn't live at the start
11302 of the function, there is no reason to set it up. */
11303 if (crtl->drap_reg)
11305 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11306 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
11308 crtl->drap_reg = NULL_RTX;
11309 crtl->need_drap = false;
11312 else
11313 cfun->machine->no_drap_save_restore = true;
11315 frame_pointer_needed = false;
11316 stack_realign = false;
11317 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
11318 crtl->stack_alignment_needed = incoming_stack_boundary;
11319 crtl->stack_alignment_estimated = incoming_stack_boundary;
11320 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
11321 crtl->preferred_stack_boundary = incoming_stack_boundary;
11322 df_finish_pass (true);
11323 df_scan_alloc (NULL);
11324 df_scan_blocks ();
11325 df_compute_regs_ever_live (true);
11326 df_analyze ();
11329 crtl->stack_realign_needed = stack_realign;
11330 crtl->stack_realign_finalized = true;
11333 /* Delete SET_GOT right after entry block if it is allocated to reg. */
11335 static void
11336 ix86_elim_entry_set_got (rtx reg)
11338 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11339 rtx_insn *c_insn = BB_HEAD (bb);
11340 if (!NONDEBUG_INSN_P (c_insn))
11341 c_insn = next_nonnote_nondebug_insn (c_insn);
11342 if (c_insn && NONJUMP_INSN_P (c_insn))
11344 rtx pat = PATTERN (c_insn);
11345 if (GET_CODE (pat) == PARALLEL)
11347 rtx vec = XVECEXP (pat, 0, 0);
11348 if (GET_CODE (vec) == SET
11349 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
11350 && REGNO (XEXP (vec, 0)) == REGNO (reg))
11351 delete_insn (c_insn);
11356 /* Expand the prologue into a bunch of separate insns. */
11358 void
11359 ix86_expand_prologue (void)
11361 struct machine_function *m = cfun->machine;
11362 rtx insn, t;
11363 struct ix86_frame frame;
11364 HOST_WIDE_INT allocate;
11365 bool int_registers_saved;
11366 bool sse_registers_saved;
11368 ix86_finalize_stack_realign_flags ();
11370 /* DRAP should not coexist with stack_realign_fp */
11371 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
11373 memset (&m->fs, 0, sizeof (m->fs));
11375 /* Initialize CFA state for before the prologue. */
11376 m->fs.cfa_reg = stack_pointer_rtx;
11377 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
11379 /* Track SP offset to the CFA. We continue tracking this after we've
11380 swapped the CFA register away from SP. In the case of re-alignment
11381 this is fudged; we're interested to offsets within the local frame. */
11382 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11383 m->fs.sp_valid = true;
11385 ix86_compute_frame_layout (&frame);
11387 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
11389 /* We should have already generated an error for any use of
11390 ms_hook on a nested function. */
11391 gcc_checking_assert (!ix86_static_chain_on_stack);
11393 /* Check if profiling is active and we shall use profiling before
11394 prologue variant. If so sorry. */
11395 if (crtl->profile && flag_fentry != 0)
11396 sorry ("ms_hook_prologue attribute isn%'t compatible "
11397 "with -mfentry for 32-bit");
11399 /* In ix86_asm_output_function_label we emitted:
11400 8b ff movl.s %edi,%edi
11401 55 push %ebp
11402 8b ec movl.s %esp,%ebp
11404 This matches the hookable function prologue in Win32 API
11405 functions in Microsoft Windows XP Service Pack 2 and newer.
11406 Wine uses this to enable Windows apps to hook the Win32 API
11407 functions provided by Wine.
11409 What that means is that we've already set up the frame pointer. */
11411 if (frame_pointer_needed
11412 && !(crtl->drap_reg && crtl->stack_realign_needed))
11414 rtx push, mov;
11416 /* We've decided to use the frame pointer already set up.
11417 Describe this to the unwinder by pretending that both
11418 push and mov insns happen right here.
11420 Putting the unwind info here at the end of the ms_hook
11421 is done so that we can make absolutely certain we get
11422 the required byte sequence at the start of the function,
11423 rather than relying on an assembler that can produce
11424 the exact encoding required.
11426 However it does mean (in the unpatched case) that we have
11427 a 1 insn window where the asynchronous unwind info is
11428 incorrect. However, if we placed the unwind info at
11429 its correct location we would have incorrect unwind info
11430 in the patched case. Which is probably all moot since
11431 I don't expect Wine generates dwarf2 unwind info for the
11432 system libraries that use this feature. */
11434 insn = emit_insn (gen_blockage ());
11436 push = gen_push (hard_frame_pointer_rtx);
11437 mov = gen_rtx_SET (hard_frame_pointer_rtx,
11438 stack_pointer_rtx);
11439 RTX_FRAME_RELATED_P (push) = 1;
11440 RTX_FRAME_RELATED_P (mov) = 1;
11442 RTX_FRAME_RELATED_P (insn) = 1;
11443 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11444 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
11446 /* Note that gen_push incremented m->fs.cfa_offset, even
11447 though we didn't emit the push insn here. */
11448 m->fs.cfa_reg = hard_frame_pointer_rtx;
11449 m->fs.fp_offset = m->fs.cfa_offset;
11450 m->fs.fp_valid = true;
11452 else
11454 /* The frame pointer is not needed so pop %ebp again.
11455 This leaves us with a pristine state. */
11456 emit_insn (gen_pop (hard_frame_pointer_rtx));
11460 /* The first insn of a function that accepts its static chain on the
11461 stack is to push the register that would be filled in by a direct
11462 call. This insn will be skipped by the trampoline. */
11463 else if (ix86_static_chain_on_stack)
11465 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
11466 emit_insn (gen_blockage ());
11468 /* We don't want to interpret this push insn as a register save,
11469 only as a stack adjustment. The real copy of the register as
11470 a save will be done later, if needed. */
11471 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
11472 t = gen_rtx_SET (stack_pointer_rtx, t);
11473 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
11474 RTX_FRAME_RELATED_P (insn) = 1;
11477 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
11478 of DRAP is needed and stack realignment is really needed after reload */
11479 if (stack_realign_drap)
11481 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11483 /* Only need to push parameter pointer reg if it is caller saved. */
11484 if (!call_used_regs[REGNO (crtl->drap_reg)])
11486 /* Push arg pointer reg */
11487 insn = emit_insn (gen_push (crtl->drap_reg));
11488 RTX_FRAME_RELATED_P (insn) = 1;
11491 /* Grab the argument pointer. */
11492 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
11493 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
11494 RTX_FRAME_RELATED_P (insn) = 1;
11495 m->fs.cfa_reg = crtl->drap_reg;
11496 m->fs.cfa_offset = 0;
11498 /* Align the stack. */
11499 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11500 stack_pointer_rtx,
11501 GEN_INT (-align_bytes)));
11502 RTX_FRAME_RELATED_P (insn) = 1;
11504 /* Replicate the return address on the stack so that return
11505 address can be reached via (argp - 1) slot. This is needed
11506 to implement macro RETURN_ADDR_RTX and intrinsic function
11507 expand_builtin_return_addr etc. */
11508 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
11509 t = gen_frame_mem (word_mode, t);
11510 insn = emit_insn (gen_push (t));
11511 RTX_FRAME_RELATED_P (insn) = 1;
11513 /* For the purposes of frame and register save area addressing,
11514 we've started over with a new frame. */
11515 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11516 m->fs.realigned = true;
11519 int_registers_saved = (frame.nregs == 0);
11520 sse_registers_saved = (frame.nsseregs == 0);
11522 if (frame_pointer_needed && !m->fs.fp_valid)
11524 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11525 slower on all targets. Also sdb doesn't like it. */
11526 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11527 RTX_FRAME_RELATED_P (insn) = 1;
11529 /* Push registers now, before setting the frame pointer
11530 on SEH target. */
11531 if (!int_registers_saved
11532 && TARGET_SEH
11533 && !frame.save_regs_using_mov)
11535 ix86_emit_save_regs ();
11536 int_registers_saved = true;
11537 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11540 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11542 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11543 RTX_FRAME_RELATED_P (insn) = 1;
11545 if (m->fs.cfa_reg == stack_pointer_rtx)
11546 m->fs.cfa_reg = hard_frame_pointer_rtx;
11547 m->fs.fp_offset = m->fs.sp_offset;
11548 m->fs.fp_valid = true;
11552 if (!int_registers_saved)
11554 /* If saving registers via PUSH, do so now. */
11555 if (!frame.save_regs_using_mov)
11557 ix86_emit_save_regs ();
11558 int_registers_saved = true;
11559 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11562 /* When using red zone we may start register saving before allocating
11563 the stack frame saving one cycle of the prologue. However, avoid
11564 doing this if we have to probe the stack; at least on x86_64 the
11565 stack probe can turn into a call that clobbers a red zone location. */
11566 else if (ix86_using_red_zone ()
11567 && (! TARGET_STACK_PROBE
11568 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11570 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11571 int_registers_saved = true;
11575 if (stack_realign_fp)
11577 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11578 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11580 /* The computation of the size of the re-aligned stack frame means
11581 that we must allocate the size of the register save area before
11582 performing the actual alignment. Otherwise we cannot guarantee
11583 that there's enough storage above the realignment point. */
11584 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11585 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11586 GEN_INT (m->fs.sp_offset
11587 - frame.sse_reg_save_offset),
11588 -1, false);
11590 /* Align the stack. */
11591 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11592 stack_pointer_rtx,
11593 GEN_INT (-align_bytes)));
11595 /* For the purposes of register save area addressing, the stack
11596 pointer is no longer valid. As for the value of sp_offset,
11597 see ix86_compute_frame_layout, which we need to match in order
11598 to pass verification of stack_pointer_offset at the end. */
11599 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11600 m->fs.sp_valid = false;
11603 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11605 if (flag_stack_usage_info)
11607 /* We start to count from ARG_POINTER. */
11608 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11610 /* If it was realigned, take into account the fake frame. */
11611 if (stack_realign_drap)
11613 if (ix86_static_chain_on_stack)
11614 stack_size += UNITS_PER_WORD;
11616 if (!call_used_regs[REGNO (crtl->drap_reg)])
11617 stack_size += UNITS_PER_WORD;
11619 /* This over-estimates by 1 minimal-stack-alignment-unit but
11620 mitigates that by counting in the new return address slot. */
11621 current_function_dynamic_stack_size
11622 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11625 current_function_static_stack_size = stack_size;
11628 /* On SEH target with very large frame size, allocate an area to save
11629 SSE registers (as the very large allocation won't be described). */
11630 if (TARGET_SEH
11631 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11632 && !sse_registers_saved)
11634 HOST_WIDE_INT sse_size =
11635 frame.sse_reg_save_offset - frame.reg_save_offset;
11637 gcc_assert (int_registers_saved);
11639 /* No need to do stack checking as the area will be immediately
11640 written. */
11641 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11642 GEN_INT (-sse_size), -1,
11643 m->fs.cfa_reg == stack_pointer_rtx);
11644 allocate -= sse_size;
11645 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11646 sse_registers_saved = true;
11649 /* The stack has already been decremented by the instruction calling us
11650 so probe if the size is non-negative to preserve the protection area. */
11651 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11653 /* We expect the registers to be saved when probes are used. */
11654 gcc_assert (int_registers_saved);
11656 if (STACK_CHECK_MOVING_SP)
11658 if (!(crtl->is_leaf && !cfun->calls_alloca
11659 && allocate <= PROBE_INTERVAL))
11661 ix86_adjust_stack_and_probe (allocate);
11662 allocate = 0;
11665 else
11667 HOST_WIDE_INT size = allocate;
11669 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11670 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11672 if (TARGET_STACK_PROBE)
11674 if (crtl->is_leaf && !cfun->calls_alloca)
11676 if (size > PROBE_INTERVAL)
11677 ix86_emit_probe_stack_range (0, size);
11679 else
11680 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11682 else
11684 if (crtl->is_leaf && !cfun->calls_alloca)
11686 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11687 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11688 size - STACK_CHECK_PROTECT);
11690 else
11691 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11696 if (allocate == 0)
11698 else if (!ix86_target_stack_probe ()
11699 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11701 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11702 GEN_INT (-allocate), -1,
11703 m->fs.cfa_reg == stack_pointer_rtx);
11705 else
11707 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11708 rtx r10 = NULL;
11709 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11710 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11711 bool eax_live = ix86_eax_live_at_start_p ();
11712 bool r10_live = false;
11714 if (TARGET_64BIT)
11715 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11717 if (eax_live)
11719 insn = emit_insn (gen_push (eax));
11720 allocate -= UNITS_PER_WORD;
11721 /* Note that SEH directives need to continue tracking the stack
11722 pointer even after the frame pointer has been set up. */
11723 if (sp_is_cfa_reg || TARGET_SEH)
11725 if (sp_is_cfa_reg)
11726 m->fs.cfa_offset += UNITS_PER_WORD;
11727 RTX_FRAME_RELATED_P (insn) = 1;
11728 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11729 gen_rtx_SET (stack_pointer_rtx,
11730 plus_constant (Pmode, stack_pointer_rtx,
11731 -UNITS_PER_WORD)));
11735 if (r10_live)
11737 r10 = gen_rtx_REG (Pmode, R10_REG);
11738 insn = emit_insn (gen_push (r10));
11739 allocate -= UNITS_PER_WORD;
11740 if (sp_is_cfa_reg || TARGET_SEH)
11742 if (sp_is_cfa_reg)
11743 m->fs.cfa_offset += UNITS_PER_WORD;
11744 RTX_FRAME_RELATED_P (insn) = 1;
11745 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11746 gen_rtx_SET (stack_pointer_rtx,
11747 plus_constant (Pmode, stack_pointer_rtx,
11748 -UNITS_PER_WORD)));
11752 emit_move_insn (eax, GEN_INT (allocate));
11753 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11755 /* Use the fact that AX still contains ALLOCATE. */
11756 adjust_stack_insn = (Pmode == DImode
11757 ? gen_pro_epilogue_adjust_stack_di_sub
11758 : gen_pro_epilogue_adjust_stack_si_sub);
11760 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11761 stack_pointer_rtx, eax));
11763 if (sp_is_cfa_reg || TARGET_SEH)
11765 if (sp_is_cfa_reg)
11766 m->fs.cfa_offset += allocate;
11767 RTX_FRAME_RELATED_P (insn) = 1;
11768 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11769 gen_rtx_SET (stack_pointer_rtx,
11770 plus_constant (Pmode, stack_pointer_rtx,
11771 -allocate)));
11773 m->fs.sp_offset += allocate;
11775 /* Use stack_pointer_rtx for relative addressing so that code
11776 works for realigned stack, too. */
11777 if (r10_live && eax_live)
11779 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11780 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11781 gen_frame_mem (word_mode, t));
11782 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11783 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11784 gen_frame_mem (word_mode, t));
11786 else if (eax_live || r10_live)
11788 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11789 emit_move_insn (gen_rtx_REG (word_mode,
11790 (eax_live ? AX_REG : R10_REG)),
11791 gen_frame_mem (word_mode, t));
11794 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11796 /* If we havn't already set up the frame pointer, do so now. */
11797 if (frame_pointer_needed && !m->fs.fp_valid)
11799 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11800 GEN_INT (frame.stack_pointer_offset
11801 - frame.hard_frame_pointer_offset));
11802 insn = emit_insn (insn);
11803 RTX_FRAME_RELATED_P (insn) = 1;
11804 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11806 if (m->fs.cfa_reg == stack_pointer_rtx)
11807 m->fs.cfa_reg = hard_frame_pointer_rtx;
11808 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11809 m->fs.fp_valid = true;
11812 if (!int_registers_saved)
11813 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11814 if (!sse_registers_saved)
11815 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11817 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
11818 in PROLOGUE. */
11819 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
11821 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
11822 insn = emit_insn (gen_set_got (pic));
11823 RTX_FRAME_RELATED_P (insn) = 1;
11824 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11825 emit_insn (gen_prologue_use (pic));
11826 /* Deleting already emmitted SET_GOT if exist and allocated to
11827 REAL_PIC_OFFSET_TABLE_REGNUM. */
11828 ix86_elim_entry_set_got (pic);
11831 if (crtl->drap_reg && !crtl->stack_realign_needed)
11833 /* vDRAP is setup but after reload it turns out stack realign
11834 isn't necessary, here we will emit prologue to setup DRAP
11835 without stack realign adjustment */
11836 t = choose_baseaddr (0);
11837 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
11840 /* Prevent instructions from being scheduled into register save push
11841 sequence when access to the redzone area is done through frame pointer.
11842 The offset between the frame pointer and the stack pointer is calculated
11843 relative to the value of the stack pointer at the end of the function
11844 prologue, and moving instructions that access redzone area via frame
11845 pointer inside push sequence violates this assumption. */
11846 if (frame_pointer_needed && frame.red_zone_size)
11847 emit_insn (gen_memory_blockage ());
11849 /* Emit cld instruction if stringops are used in the function. */
11850 if (TARGET_CLD && ix86_current_function_needs_cld)
11851 emit_insn (gen_cld ());
11853 /* SEH requires that the prologue end within 256 bytes of the start of
11854 the function. Prevent instruction schedules that would extend that.
11855 Further, prevent alloca modifications to the stack pointer from being
11856 combined with prologue modifications. */
11857 if (TARGET_SEH)
11858 emit_insn (gen_prologue_use (stack_pointer_rtx));
11861 /* Emit code to restore REG using a POP insn. */
11863 static void
11864 ix86_emit_restore_reg_using_pop (rtx reg)
11866 struct machine_function *m = cfun->machine;
11867 rtx_insn *insn = emit_insn (gen_pop (reg));
11869 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11870 m->fs.sp_offset -= UNITS_PER_WORD;
11872 if (m->fs.cfa_reg == crtl->drap_reg
11873 && REGNO (reg) == REGNO (crtl->drap_reg))
11875 /* Previously we'd represented the CFA as an expression
11876 like *(%ebp - 8). We've just popped that value from
11877 the stack, which means we need to reset the CFA to
11878 the drap register. This will remain until we restore
11879 the stack pointer. */
11880 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11881 RTX_FRAME_RELATED_P (insn) = 1;
11883 /* This means that the DRAP register is valid for addressing too. */
11884 m->fs.drap_valid = true;
11885 return;
11888 if (m->fs.cfa_reg == stack_pointer_rtx)
11890 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11891 x = gen_rtx_SET (stack_pointer_rtx, x);
11892 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11893 RTX_FRAME_RELATED_P (insn) = 1;
11895 m->fs.cfa_offset -= UNITS_PER_WORD;
11898 /* When the frame pointer is the CFA, and we pop it, we are
11899 swapping back to the stack pointer as the CFA. This happens
11900 for stack frames that don't allocate other data, so we assume
11901 the stack pointer is now pointing at the return address, i.e.
11902 the function entry state, which makes the offset be 1 word. */
11903 if (reg == hard_frame_pointer_rtx)
11905 m->fs.fp_valid = false;
11906 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11908 m->fs.cfa_reg = stack_pointer_rtx;
11909 m->fs.cfa_offset -= UNITS_PER_WORD;
11911 add_reg_note (insn, REG_CFA_DEF_CFA,
11912 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11913 GEN_INT (m->fs.cfa_offset)));
11914 RTX_FRAME_RELATED_P (insn) = 1;
11919 /* Emit code to restore saved registers using POP insns. */
11921 static void
11922 ix86_emit_restore_regs_using_pop (void)
11924 unsigned int regno;
11926 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11927 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11928 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11931 /* Emit code and notes for the LEAVE instruction. */
11933 static void
11934 ix86_emit_leave (void)
11936 struct machine_function *m = cfun->machine;
11937 rtx_insn *insn = emit_insn (ix86_gen_leave ());
11939 ix86_add_queued_cfa_restore_notes (insn);
11941 gcc_assert (m->fs.fp_valid);
11942 m->fs.sp_valid = true;
11943 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11944 m->fs.fp_valid = false;
11946 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11948 m->fs.cfa_reg = stack_pointer_rtx;
11949 m->fs.cfa_offset = m->fs.sp_offset;
11951 add_reg_note (insn, REG_CFA_DEF_CFA,
11952 plus_constant (Pmode, stack_pointer_rtx,
11953 m->fs.sp_offset));
11954 RTX_FRAME_RELATED_P (insn) = 1;
11956 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11957 m->fs.fp_offset);
11960 /* Emit code to restore saved registers using MOV insns.
11961 First register is restored from CFA - CFA_OFFSET. */
11962 static void
11963 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11964 bool maybe_eh_return)
11966 struct machine_function *m = cfun->machine;
11967 unsigned int regno;
11969 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11970 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11972 rtx reg = gen_rtx_REG (word_mode, regno);
11973 rtx mem;
11974 rtx_insn *insn;
11976 mem = choose_baseaddr (cfa_offset);
11977 mem = gen_frame_mem (word_mode, mem);
11978 insn = emit_move_insn (reg, mem);
11980 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11982 /* Previously we'd represented the CFA as an expression
11983 like *(%ebp - 8). We've just popped that value from
11984 the stack, which means we need to reset the CFA to
11985 the drap register. This will remain until we restore
11986 the stack pointer. */
11987 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11988 RTX_FRAME_RELATED_P (insn) = 1;
11990 /* This means that the DRAP register is valid for addressing. */
11991 m->fs.drap_valid = true;
11993 else
11994 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
11996 cfa_offset -= UNITS_PER_WORD;
12000 /* Emit code to restore saved registers using MOV insns.
12001 First register is restored from CFA - CFA_OFFSET. */
12002 static void
12003 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
12004 bool maybe_eh_return)
12006 unsigned int regno;
12008 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12009 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
12011 rtx reg = gen_rtx_REG (V4SFmode, regno);
12012 rtx mem;
12014 mem = choose_baseaddr (cfa_offset);
12015 mem = gen_rtx_MEM (V4SFmode, mem);
12016 set_mem_align (mem, 128);
12017 emit_move_insn (reg, mem);
12019 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
12021 cfa_offset -= 16;
12025 /* Restore function stack, frame, and registers. */
12027 void
12028 ix86_expand_epilogue (int style)
12030 struct machine_function *m = cfun->machine;
12031 struct machine_frame_state frame_state_save = m->fs;
12032 struct ix86_frame frame;
12033 bool restore_regs_via_mov;
12034 bool using_drap;
12036 ix86_finalize_stack_realign_flags ();
12037 ix86_compute_frame_layout (&frame);
12039 m->fs.sp_valid = (!frame_pointer_needed
12040 || (crtl->sp_is_unchanging
12041 && !stack_realign_fp));
12042 gcc_assert (!m->fs.sp_valid
12043 || m->fs.sp_offset == frame.stack_pointer_offset);
12045 /* The FP must be valid if the frame pointer is present. */
12046 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
12047 gcc_assert (!m->fs.fp_valid
12048 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
12050 /* We must have *some* valid pointer to the stack frame. */
12051 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
12053 /* The DRAP is never valid at this point. */
12054 gcc_assert (!m->fs.drap_valid);
12056 /* See the comment about red zone and frame
12057 pointer usage in ix86_expand_prologue. */
12058 if (frame_pointer_needed && frame.red_zone_size)
12059 emit_insn (gen_memory_blockage ());
12061 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
12062 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
12064 /* Determine the CFA offset of the end of the red-zone. */
12065 m->fs.red_zone_offset = 0;
12066 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
12068 /* The red-zone begins below the return address. */
12069 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
12071 /* When the register save area is in the aligned portion of
12072 the stack, determine the maximum runtime displacement that
12073 matches up with the aligned frame. */
12074 if (stack_realign_drap)
12075 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
12076 + UNITS_PER_WORD);
12079 /* Special care must be taken for the normal return case of a function
12080 using eh_return: the eax and edx registers are marked as saved, but
12081 not restored along this path. Adjust the save location to match. */
12082 if (crtl->calls_eh_return && style != 2)
12083 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
12085 /* EH_RETURN requires the use of moves to function properly. */
12086 if (crtl->calls_eh_return)
12087 restore_regs_via_mov = true;
12088 /* SEH requires the use of pops to identify the epilogue. */
12089 else if (TARGET_SEH)
12090 restore_regs_via_mov = false;
12091 /* If we're only restoring one register and sp is not valid then
12092 using a move instruction to restore the register since it's
12093 less work than reloading sp and popping the register. */
12094 else if (!m->fs.sp_valid && frame.nregs <= 1)
12095 restore_regs_via_mov = true;
12096 else if (TARGET_EPILOGUE_USING_MOVE
12097 && cfun->machine->use_fast_prologue_epilogue
12098 && (frame.nregs > 1
12099 || m->fs.sp_offset != frame.reg_save_offset))
12100 restore_regs_via_mov = true;
12101 else if (frame_pointer_needed
12102 && !frame.nregs
12103 && m->fs.sp_offset != frame.reg_save_offset)
12104 restore_regs_via_mov = true;
12105 else if (frame_pointer_needed
12106 && TARGET_USE_LEAVE
12107 && cfun->machine->use_fast_prologue_epilogue
12108 && frame.nregs == 1)
12109 restore_regs_via_mov = true;
12110 else
12111 restore_regs_via_mov = false;
12113 if (restore_regs_via_mov || frame.nsseregs)
12115 /* Ensure that the entire register save area is addressable via
12116 the stack pointer, if we will restore via sp. */
12117 if (TARGET_64BIT
12118 && m->fs.sp_offset > 0x7fffffff
12119 && !(m->fs.fp_valid || m->fs.drap_valid)
12120 && (frame.nsseregs + frame.nregs) != 0)
12122 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12123 GEN_INT (m->fs.sp_offset
12124 - frame.sse_reg_save_offset),
12125 style,
12126 m->fs.cfa_reg == stack_pointer_rtx);
12130 /* If there are any SSE registers to restore, then we have to do it
12131 via moves, since there's obviously no pop for SSE regs. */
12132 if (frame.nsseregs)
12133 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
12134 style == 2);
12136 if (restore_regs_via_mov)
12138 rtx t;
12140 if (frame.nregs)
12141 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
12143 /* eh_return epilogues need %ecx added to the stack pointer. */
12144 if (style == 2)
12146 rtx sa = EH_RETURN_STACKADJ_RTX;
12147 rtx_insn *insn;
12149 /* Stack align doesn't work with eh_return. */
12150 gcc_assert (!stack_realign_drap);
12151 /* Neither does regparm nested functions. */
12152 gcc_assert (!ix86_static_chain_on_stack);
12154 if (frame_pointer_needed)
12156 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
12157 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
12158 emit_insn (gen_rtx_SET (sa, t));
12160 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
12161 insn = emit_move_insn (hard_frame_pointer_rtx, t);
12163 /* Note that we use SA as a temporary CFA, as the return
12164 address is at the proper place relative to it. We
12165 pretend this happens at the FP restore insn because
12166 prior to this insn the FP would be stored at the wrong
12167 offset relative to SA, and after this insn we have no
12168 other reasonable register to use for the CFA. We don't
12169 bother resetting the CFA to the SP for the duration of
12170 the return insn. */
12171 add_reg_note (insn, REG_CFA_DEF_CFA,
12172 plus_constant (Pmode, sa, UNITS_PER_WORD));
12173 ix86_add_queued_cfa_restore_notes (insn);
12174 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
12175 RTX_FRAME_RELATED_P (insn) = 1;
12177 m->fs.cfa_reg = sa;
12178 m->fs.cfa_offset = UNITS_PER_WORD;
12179 m->fs.fp_valid = false;
12181 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
12182 const0_rtx, style, false);
12184 else
12186 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
12187 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
12188 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
12189 ix86_add_queued_cfa_restore_notes (insn);
12191 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
12192 if (m->fs.cfa_offset != UNITS_PER_WORD)
12194 m->fs.cfa_offset = UNITS_PER_WORD;
12195 add_reg_note (insn, REG_CFA_DEF_CFA,
12196 plus_constant (Pmode, stack_pointer_rtx,
12197 UNITS_PER_WORD));
12198 RTX_FRAME_RELATED_P (insn) = 1;
12201 m->fs.sp_offset = UNITS_PER_WORD;
12202 m->fs.sp_valid = true;
12205 else
12207 /* SEH requires that the function end with (1) a stack adjustment
12208 if necessary, (2) a sequence of pops, and (3) a return or
12209 jump instruction. Prevent insns from the function body from
12210 being scheduled into this sequence. */
12211 if (TARGET_SEH)
12213 /* Prevent a catch region from being adjacent to the standard
12214 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
12215 several other flags that would be interesting to test are
12216 not yet set up. */
12217 if (flag_non_call_exceptions)
12218 emit_insn (gen_nops (const1_rtx));
12219 else
12220 emit_insn (gen_blockage ());
12223 /* First step is to deallocate the stack frame so that we can
12224 pop the registers. Also do it on SEH target for very large
12225 frame as the emitted instructions aren't allowed by the ABI in
12226 epilogues. */
12227 if (!m->fs.sp_valid
12228 || (TARGET_SEH
12229 && (m->fs.sp_offset - frame.reg_save_offset
12230 >= SEH_MAX_FRAME_SIZE)))
12232 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
12233 GEN_INT (m->fs.fp_offset
12234 - frame.reg_save_offset),
12235 style, false);
12237 else if (m->fs.sp_offset != frame.reg_save_offset)
12239 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12240 GEN_INT (m->fs.sp_offset
12241 - frame.reg_save_offset),
12242 style,
12243 m->fs.cfa_reg == stack_pointer_rtx);
12246 ix86_emit_restore_regs_using_pop ();
12249 /* If we used a stack pointer and haven't already got rid of it,
12250 then do so now. */
12251 if (m->fs.fp_valid)
12253 /* If the stack pointer is valid and pointing at the frame
12254 pointer store address, then we only need a pop. */
12255 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
12256 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12257 /* Leave results in shorter dependency chains on CPUs that are
12258 able to grok it fast. */
12259 else if (TARGET_USE_LEAVE
12260 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
12261 || !cfun->machine->use_fast_prologue_epilogue)
12262 ix86_emit_leave ();
12263 else
12265 pro_epilogue_adjust_stack (stack_pointer_rtx,
12266 hard_frame_pointer_rtx,
12267 const0_rtx, style, !using_drap);
12268 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12272 if (using_drap)
12274 int param_ptr_offset = UNITS_PER_WORD;
12275 rtx_insn *insn;
12277 gcc_assert (stack_realign_drap);
12279 if (ix86_static_chain_on_stack)
12280 param_ptr_offset += UNITS_PER_WORD;
12281 if (!call_used_regs[REGNO (crtl->drap_reg)])
12282 param_ptr_offset += UNITS_PER_WORD;
12284 insn = emit_insn (gen_rtx_SET
12285 (stack_pointer_rtx,
12286 gen_rtx_PLUS (Pmode,
12287 crtl->drap_reg,
12288 GEN_INT (-param_ptr_offset))));
12289 m->fs.cfa_reg = stack_pointer_rtx;
12290 m->fs.cfa_offset = param_ptr_offset;
12291 m->fs.sp_offset = param_ptr_offset;
12292 m->fs.realigned = false;
12294 add_reg_note (insn, REG_CFA_DEF_CFA,
12295 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12296 GEN_INT (param_ptr_offset)));
12297 RTX_FRAME_RELATED_P (insn) = 1;
12299 if (!call_used_regs[REGNO (crtl->drap_reg)])
12300 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
12303 /* At this point the stack pointer must be valid, and we must have
12304 restored all of the registers. We may not have deallocated the
12305 entire stack frame. We've delayed this until now because it may
12306 be possible to merge the local stack deallocation with the
12307 deallocation forced by ix86_static_chain_on_stack. */
12308 gcc_assert (m->fs.sp_valid);
12309 gcc_assert (!m->fs.fp_valid);
12310 gcc_assert (!m->fs.realigned);
12311 if (m->fs.sp_offset != UNITS_PER_WORD)
12313 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12314 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
12315 style, true);
12317 else
12318 ix86_add_queued_cfa_restore_notes (get_last_insn ());
12320 /* Sibcall epilogues don't want a return instruction. */
12321 if (style == 0)
12323 m->fs = frame_state_save;
12324 return;
12327 if (crtl->args.pops_args && crtl->args.size)
12329 rtx popc = GEN_INT (crtl->args.pops_args);
12331 /* i386 can only pop 64K bytes. If asked to pop more, pop return
12332 address, do explicit add, and jump indirectly to the caller. */
12334 if (crtl->args.pops_args >= 65536)
12336 rtx ecx = gen_rtx_REG (SImode, CX_REG);
12337 rtx_insn *insn;
12339 /* There is no "pascal" calling convention in any 64bit ABI. */
12340 gcc_assert (!TARGET_64BIT);
12342 insn = emit_insn (gen_pop (ecx));
12343 m->fs.cfa_offset -= UNITS_PER_WORD;
12344 m->fs.sp_offset -= UNITS_PER_WORD;
12346 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12347 x = gen_rtx_SET (stack_pointer_rtx, x);
12348 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12349 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
12350 RTX_FRAME_RELATED_P (insn) = 1;
12352 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12353 popc, -1, true);
12354 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
12356 else
12357 emit_jump_insn (gen_simple_return_pop_internal (popc));
12359 else
12360 emit_jump_insn (gen_simple_return_internal ());
12362 /* Restore the state back to the state from the prologue,
12363 so that it's correct for the next epilogue. */
12364 m->fs = frame_state_save;
12367 /* Reset from the function's potential modifications. */
12369 static void
12370 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
12372 if (pic_offset_table_rtx
12373 && !ix86_use_pseudo_pic_reg ())
12374 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
12375 #if TARGET_MACHO
12376 /* Mach-O doesn't support labels at the end of objects, so if
12377 it looks like we might want one, insert a NOP. */
12379 rtx_insn *insn = get_last_insn ();
12380 rtx_insn *deleted_debug_label = NULL;
12381 while (insn
12382 && NOTE_P (insn)
12383 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
12385 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
12386 notes only, instead set their CODE_LABEL_NUMBER to -1,
12387 otherwise there would be code generation differences
12388 in between -g and -g0. */
12389 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12390 deleted_debug_label = insn;
12391 insn = PREV_INSN (insn);
12393 if (insn
12394 && (LABEL_P (insn)
12395 || (NOTE_P (insn)
12396 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
12397 fputs ("\tnop\n", file);
12398 else if (deleted_debug_label)
12399 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
12400 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12401 CODE_LABEL_NUMBER (insn) = -1;
12403 #endif
12407 /* Return a scratch register to use in the split stack prologue. The
12408 split stack prologue is used for -fsplit-stack. It is the first
12409 instructions in the function, even before the regular prologue.
12410 The scratch register can be any caller-saved register which is not
12411 used for parameters or for the static chain. */
12413 static unsigned int
12414 split_stack_prologue_scratch_regno (void)
12416 if (TARGET_64BIT)
12417 return R11_REG;
12418 else
12420 bool is_fastcall, is_thiscall;
12421 int regparm;
12423 is_fastcall = (lookup_attribute ("fastcall",
12424 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12425 != NULL);
12426 is_thiscall = (lookup_attribute ("thiscall",
12427 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12428 != NULL);
12429 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12431 if (is_fastcall)
12433 if (DECL_STATIC_CHAIN (cfun->decl))
12435 sorry ("-fsplit-stack does not support fastcall with "
12436 "nested function");
12437 return INVALID_REGNUM;
12439 return AX_REG;
12441 else if (is_thiscall)
12443 if (!DECL_STATIC_CHAIN (cfun->decl))
12444 return DX_REG;
12445 return AX_REG;
12447 else if (regparm < 3)
12449 if (!DECL_STATIC_CHAIN (cfun->decl))
12450 return CX_REG;
12451 else
12453 if (regparm >= 2)
12455 sorry ("-fsplit-stack does not support 2 register "
12456 "parameters for a nested function");
12457 return INVALID_REGNUM;
12459 return DX_REG;
12462 else
12464 /* FIXME: We could make this work by pushing a register
12465 around the addition and comparison. */
12466 sorry ("-fsplit-stack does not support 3 register parameters");
12467 return INVALID_REGNUM;
12472 /* A SYMBOL_REF for the function which allocates new stackspace for
12473 -fsplit-stack. */
12475 static GTY(()) rtx split_stack_fn;
12477 /* A SYMBOL_REF for the more stack function when using the large
12478 model. */
12480 static GTY(()) rtx split_stack_fn_large;
12482 /* Handle -fsplit-stack. These are the first instructions in the
12483 function, even before the regular prologue. */
12485 void
12486 ix86_expand_split_stack_prologue (void)
12488 struct ix86_frame frame;
12489 HOST_WIDE_INT allocate;
12490 unsigned HOST_WIDE_INT args_size;
12491 rtx_code_label *label;
12492 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12493 rtx scratch_reg = NULL_RTX;
12494 rtx_code_label *varargs_label = NULL;
12495 rtx fn;
12497 gcc_assert (flag_split_stack && reload_completed);
12499 ix86_finalize_stack_realign_flags ();
12500 ix86_compute_frame_layout (&frame);
12501 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12503 /* This is the label we will branch to if we have enough stack
12504 space. We expect the basic block reordering pass to reverse this
12505 branch if optimizing, so that we branch in the unlikely case. */
12506 label = gen_label_rtx ();
12508 /* We need to compare the stack pointer minus the frame size with
12509 the stack boundary in the TCB. The stack boundary always gives
12510 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12511 can compare directly. Otherwise we need to do an addition. */
12513 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12514 UNSPEC_STACK_CHECK);
12515 limit = gen_rtx_CONST (Pmode, limit);
12516 limit = gen_rtx_MEM (Pmode, limit);
12517 if (allocate < SPLIT_STACK_AVAILABLE)
12518 current = stack_pointer_rtx;
12519 else
12521 unsigned int scratch_regno;
12522 rtx offset;
12524 /* We need a scratch register to hold the stack pointer minus
12525 the required frame size. Since this is the very start of the
12526 function, the scratch register can be any caller-saved
12527 register which is not used for parameters. */
12528 offset = GEN_INT (- allocate);
12529 scratch_regno = split_stack_prologue_scratch_regno ();
12530 if (scratch_regno == INVALID_REGNUM)
12531 return;
12532 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12533 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12535 /* We don't use ix86_gen_add3 in this case because it will
12536 want to split to lea, but when not optimizing the insn
12537 will not be split after this point. */
12538 emit_insn (gen_rtx_SET (scratch_reg,
12539 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12540 offset)));
12542 else
12544 emit_move_insn (scratch_reg, offset);
12545 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12546 stack_pointer_rtx));
12548 current = scratch_reg;
12551 ix86_expand_branch (GEU, current, limit, label);
12552 jump_insn = get_last_insn ();
12553 JUMP_LABEL (jump_insn) = label;
12555 /* Mark the jump as very likely to be taken. */
12556 add_int_reg_note (jump_insn, REG_BR_PROB,
12557 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12559 if (split_stack_fn == NULL_RTX)
12561 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12562 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12564 fn = split_stack_fn;
12566 /* Get more stack space. We pass in the desired stack space and the
12567 size of the arguments to copy to the new stack. In 32-bit mode
12568 we push the parameters; __morestack will return on a new stack
12569 anyhow. In 64-bit mode we pass the parameters in r10 and
12570 r11. */
12571 allocate_rtx = GEN_INT (allocate);
12572 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12573 call_fusage = NULL_RTX;
12574 if (TARGET_64BIT)
12576 rtx reg10, reg11;
12578 reg10 = gen_rtx_REG (Pmode, R10_REG);
12579 reg11 = gen_rtx_REG (Pmode, R11_REG);
12581 /* If this function uses a static chain, it will be in %r10.
12582 Preserve it across the call to __morestack. */
12583 if (DECL_STATIC_CHAIN (cfun->decl))
12585 rtx rax;
12587 rax = gen_rtx_REG (word_mode, AX_REG);
12588 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12589 use_reg (&call_fusage, rax);
12592 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12593 && !TARGET_PECOFF)
12595 HOST_WIDE_INT argval;
12597 gcc_assert (Pmode == DImode);
12598 /* When using the large model we need to load the address
12599 into a register, and we've run out of registers. So we
12600 switch to a different calling convention, and we call a
12601 different function: __morestack_large. We pass the
12602 argument size in the upper 32 bits of r10 and pass the
12603 frame size in the lower 32 bits. */
12604 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12605 gcc_assert ((args_size & 0xffffffff) == args_size);
12607 if (split_stack_fn_large == NULL_RTX)
12609 split_stack_fn_large =
12610 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12611 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12613 if (ix86_cmodel == CM_LARGE_PIC)
12615 rtx_code_label *label;
12616 rtx x;
12618 label = gen_label_rtx ();
12619 emit_label (label);
12620 LABEL_PRESERVE_P (label) = 1;
12621 emit_insn (gen_set_rip_rex64 (reg10, label));
12622 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12623 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12624 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12625 UNSPEC_GOT);
12626 x = gen_rtx_CONST (Pmode, x);
12627 emit_move_insn (reg11, x);
12628 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12629 x = gen_const_mem (Pmode, x);
12630 emit_move_insn (reg11, x);
12632 else
12633 emit_move_insn (reg11, split_stack_fn_large);
12635 fn = reg11;
12637 argval = ((args_size << 16) << 16) + allocate;
12638 emit_move_insn (reg10, GEN_INT (argval));
12640 else
12642 emit_move_insn (reg10, allocate_rtx);
12643 emit_move_insn (reg11, GEN_INT (args_size));
12644 use_reg (&call_fusage, reg11);
12647 use_reg (&call_fusage, reg10);
12649 else
12651 emit_insn (gen_push (GEN_INT (args_size)));
12652 emit_insn (gen_push (allocate_rtx));
12654 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12655 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12656 NULL_RTX, false);
12657 add_function_usage_to (call_insn, call_fusage);
12659 /* In order to make call/return prediction work right, we now need
12660 to execute a return instruction. See
12661 libgcc/config/i386/morestack.S for the details on how this works.
12663 For flow purposes gcc must not see this as a return
12664 instruction--we need control flow to continue at the subsequent
12665 label. Therefore, we use an unspec. */
12666 gcc_assert (crtl->args.pops_args < 65536);
12667 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12669 /* If we are in 64-bit mode and this function uses a static chain,
12670 we saved %r10 in %rax before calling _morestack. */
12671 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12672 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12673 gen_rtx_REG (word_mode, AX_REG));
12675 /* If this function calls va_start, we need to store a pointer to
12676 the arguments on the old stack, because they may not have been
12677 all copied to the new stack. At this point the old stack can be
12678 found at the frame pointer value used by __morestack, because
12679 __morestack has set that up before calling back to us. Here we
12680 store that pointer in a scratch register, and in
12681 ix86_expand_prologue we store the scratch register in a stack
12682 slot. */
12683 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12685 unsigned int scratch_regno;
12686 rtx frame_reg;
12687 int words;
12689 scratch_regno = split_stack_prologue_scratch_regno ();
12690 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12691 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12693 /* 64-bit:
12694 fp -> old fp value
12695 return address within this function
12696 return address of caller of this function
12697 stack arguments
12698 So we add three words to get to the stack arguments.
12700 32-bit:
12701 fp -> old fp value
12702 return address within this function
12703 first argument to __morestack
12704 second argument to __morestack
12705 return address of caller of this function
12706 stack arguments
12707 So we add five words to get to the stack arguments.
12709 words = TARGET_64BIT ? 3 : 5;
12710 emit_insn (gen_rtx_SET (scratch_reg,
12711 gen_rtx_PLUS (Pmode, frame_reg,
12712 GEN_INT (words * UNITS_PER_WORD))));
12714 varargs_label = gen_label_rtx ();
12715 emit_jump_insn (gen_jump (varargs_label));
12716 JUMP_LABEL (get_last_insn ()) = varargs_label;
12718 emit_barrier ();
12721 emit_label (label);
12722 LABEL_NUSES (label) = 1;
12724 /* If this function calls va_start, we now have to set the scratch
12725 register for the case where we do not call __morestack. In this
12726 case we need to set it based on the stack pointer. */
12727 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12729 emit_insn (gen_rtx_SET (scratch_reg,
12730 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12731 GEN_INT (UNITS_PER_WORD))));
12733 emit_label (varargs_label);
12734 LABEL_NUSES (varargs_label) = 1;
12738 /* We may have to tell the dataflow pass that the split stack prologue
12739 is initializing a scratch register. */
12741 static void
12742 ix86_live_on_entry (bitmap regs)
12744 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12746 gcc_assert (flag_split_stack);
12747 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12751 /* Extract the parts of an RTL expression that is a valid memory address
12752 for an instruction. Return 0 if the structure of the address is
12753 grossly off. Return -1 if the address contains ASHIFT, so it is not
12754 strictly valid, but still used for computing length of lea instruction. */
12757 ix86_decompose_address (rtx addr, struct ix86_address *out)
12759 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12760 rtx base_reg, index_reg;
12761 HOST_WIDE_INT scale = 1;
12762 rtx scale_rtx = NULL_RTX;
12763 rtx tmp;
12764 int retval = 1;
12765 enum ix86_address_seg seg = SEG_DEFAULT;
12767 /* Allow zero-extended SImode addresses,
12768 they will be emitted with addr32 prefix. */
12769 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12771 if (GET_CODE (addr) == ZERO_EXTEND
12772 && GET_MODE (XEXP (addr, 0)) == SImode)
12774 addr = XEXP (addr, 0);
12775 if (CONST_INT_P (addr))
12776 return 0;
12778 else if (GET_CODE (addr) == AND
12779 && const_32bit_mask (XEXP (addr, 1), DImode))
12781 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12782 if (addr == NULL_RTX)
12783 return 0;
12785 if (CONST_INT_P (addr))
12786 return 0;
12790 /* Allow SImode subregs of DImode addresses,
12791 they will be emitted with addr32 prefix. */
12792 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12794 if (GET_CODE (addr) == SUBREG
12795 && GET_MODE (SUBREG_REG (addr)) == DImode)
12797 addr = SUBREG_REG (addr);
12798 if (CONST_INT_P (addr))
12799 return 0;
12803 if (REG_P (addr))
12804 base = addr;
12805 else if (GET_CODE (addr) == SUBREG)
12807 if (REG_P (SUBREG_REG (addr)))
12808 base = addr;
12809 else
12810 return 0;
12812 else if (GET_CODE (addr) == PLUS)
12814 rtx addends[4], op;
12815 int n = 0, i;
12817 op = addr;
12820 if (n >= 4)
12821 return 0;
12822 addends[n++] = XEXP (op, 1);
12823 op = XEXP (op, 0);
12825 while (GET_CODE (op) == PLUS);
12826 if (n >= 4)
12827 return 0;
12828 addends[n] = op;
12830 for (i = n; i >= 0; --i)
12832 op = addends[i];
12833 switch (GET_CODE (op))
12835 case MULT:
12836 if (index)
12837 return 0;
12838 index = XEXP (op, 0);
12839 scale_rtx = XEXP (op, 1);
12840 break;
12842 case ASHIFT:
12843 if (index)
12844 return 0;
12845 index = XEXP (op, 0);
12846 tmp = XEXP (op, 1);
12847 if (!CONST_INT_P (tmp))
12848 return 0;
12849 scale = INTVAL (tmp);
12850 if ((unsigned HOST_WIDE_INT) scale > 3)
12851 return 0;
12852 scale = 1 << scale;
12853 break;
12855 case ZERO_EXTEND:
12856 op = XEXP (op, 0);
12857 if (GET_CODE (op) != UNSPEC)
12858 return 0;
12859 /* FALLTHRU */
12861 case UNSPEC:
12862 if (XINT (op, 1) == UNSPEC_TP
12863 && TARGET_TLS_DIRECT_SEG_REFS
12864 && seg == SEG_DEFAULT)
12865 seg = DEFAULT_TLS_SEG_REG;
12866 else
12867 return 0;
12868 break;
12870 case SUBREG:
12871 if (!REG_P (SUBREG_REG (op)))
12872 return 0;
12873 /* FALLTHRU */
12875 case REG:
12876 if (!base)
12877 base = op;
12878 else if (!index)
12879 index = op;
12880 else
12881 return 0;
12882 break;
12884 case CONST:
12885 case CONST_INT:
12886 case SYMBOL_REF:
12887 case LABEL_REF:
12888 if (disp)
12889 return 0;
12890 disp = op;
12891 break;
12893 default:
12894 return 0;
12898 else if (GET_CODE (addr) == MULT)
12900 index = XEXP (addr, 0); /* index*scale */
12901 scale_rtx = XEXP (addr, 1);
12903 else if (GET_CODE (addr) == ASHIFT)
12905 /* We're called for lea too, which implements ashift on occasion. */
12906 index = XEXP (addr, 0);
12907 tmp = XEXP (addr, 1);
12908 if (!CONST_INT_P (tmp))
12909 return 0;
12910 scale = INTVAL (tmp);
12911 if ((unsigned HOST_WIDE_INT) scale > 3)
12912 return 0;
12913 scale = 1 << scale;
12914 retval = -1;
12916 else
12917 disp = addr; /* displacement */
12919 if (index)
12921 if (REG_P (index))
12923 else if (GET_CODE (index) == SUBREG
12924 && REG_P (SUBREG_REG (index)))
12926 else
12927 return 0;
12930 /* Extract the integral value of scale. */
12931 if (scale_rtx)
12933 if (!CONST_INT_P (scale_rtx))
12934 return 0;
12935 scale = INTVAL (scale_rtx);
12938 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12939 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12941 /* Avoid useless 0 displacement. */
12942 if (disp == const0_rtx && (base || index))
12943 disp = NULL_RTX;
12945 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12946 if (base_reg && index_reg && scale == 1
12947 && (index_reg == arg_pointer_rtx
12948 || index_reg == frame_pointer_rtx
12949 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12951 std::swap (base, index);
12952 std::swap (base_reg, index_reg);
12955 /* Special case: %ebp cannot be encoded as a base without a displacement.
12956 Similarly %r13. */
12957 if (!disp
12958 && base_reg
12959 && (base_reg == hard_frame_pointer_rtx
12960 || base_reg == frame_pointer_rtx
12961 || base_reg == arg_pointer_rtx
12962 || (REG_P (base_reg)
12963 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12964 || REGNO (base_reg) == R13_REG))))
12965 disp = const0_rtx;
12967 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12968 Avoid this by transforming to [%esi+0].
12969 Reload calls address legitimization without cfun defined, so we need
12970 to test cfun for being non-NULL. */
12971 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12972 && base_reg && !index_reg && !disp
12973 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12974 disp = const0_rtx;
12976 /* Special case: encode reg+reg instead of reg*2. */
12977 if (!base && index && scale == 2)
12978 base = index, base_reg = index_reg, scale = 1;
12980 /* Special case: scaling cannot be encoded without base or displacement. */
12981 if (!base && !disp && index && scale != 1)
12982 disp = const0_rtx;
12984 out->base = base;
12985 out->index = index;
12986 out->disp = disp;
12987 out->scale = scale;
12988 out->seg = seg;
12990 return retval;
12993 /* Return cost of the memory address x.
12994 For i386, it is better to use a complex address than let gcc copy
12995 the address into a reg and make a new pseudo. But not if the address
12996 requires to two regs - that would mean more pseudos with longer
12997 lifetimes. */
12998 static int
12999 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
13001 struct ix86_address parts;
13002 int cost = 1;
13003 int ok = ix86_decompose_address (x, &parts);
13005 gcc_assert (ok);
13007 if (parts.base && GET_CODE (parts.base) == SUBREG)
13008 parts.base = SUBREG_REG (parts.base);
13009 if (parts.index && GET_CODE (parts.index) == SUBREG)
13010 parts.index = SUBREG_REG (parts.index);
13012 /* Attempt to minimize number of registers in the address by increasing
13013 address cost for each used register. We don't increase address cost
13014 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
13015 is not invariant itself it most likely means that base or index is not
13016 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
13017 which is not profitable for x86. */
13018 if (parts.base
13019 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
13020 && (current_pass->type == GIMPLE_PASS
13021 || !pic_offset_table_rtx
13022 || !REG_P (parts.base)
13023 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
13024 cost++;
13026 if (parts.index
13027 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
13028 && (current_pass->type == GIMPLE_PASS
13029 || !pic_offset_table_rtx
13030 || !REG_P (parts.index)
13031 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
13032 cost++;
13034 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
13035 since it's predecode logic can't detect the length of instructions
13036 and it degenerates to vector decoded. Increase cost of such
13037 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
13038 to split such addresses or even refuse such addresses at all.
13040 Following addressing modes are affected:
13041 [base+scale*index]
13042 [scale*index+disp]
13043 [base+index]
13045 The first and last case may be avoidable by explicitly coding the zero in
13046 memory address, but I don't have AMD-K6 machine handy to check this
13047 theory. */
13049 if (TARGET_K6
13050 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
13051 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
13052 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
13053 cost += 10;
13055 return cost;
13058 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
13059 this is used for to form addresses to local data when -fPIC is in
13060 use. */
13062 static bool
13063 darwin_local_data_pic (rtx disp)
13065 return (GET_CODE (disp) == UNSPEC
13066 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
13069 /* Determine if a given RTX is a valid constant. We already know this
13070 satisfies CONSTANT_P. */
13072 static bool
13073 ix86_legitimate_constant_p (machine_mode, rtx x)
13075 /* Pointer bounds constants are not valid. */
13076 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
13077 return false;
13079 switch (GET_CODE (x))
13081 case CONST:
13082 x = XEXP (x, 0);
13084 if (GET_CODE (x) == PLUS)
13086 if (!CONST_INT_P (XEXP (x, 1)))
13087 return false;
13088 x = XEXP (x, 0);
13091 if (TARGET_MACHO && darwin_local_data_pic (x))
13092 return true;
13094 /* Only some unspecs are valid as "constants". */
13095 if (GET_CODE (x) == UNSPEC)
13096 switch (XINT (x, 1))
13098 case UNSPEC_GOT:
13099 case UNSPEC_GOTOFF:
13100 case UNSPEC_PLTOFF:
13101 return TARGET_64BIT;
13102 case UNSPEC_TPOFF:
13103 case UNSPEC_NTPOFF:
13104 x = XVECEXP (x, 0, 0);
13105 return (GET_CODE (x) == SYMBOL_REF
13106 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13107 case UNSPEC_DTPOFF:
13108 x = XVECEXP (x, 0, 0);
13109 return (GET_CODE (x) == SYMBOL_REF
13110 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
13111 default:
13112 return false;
13115 /* We must have drilled down to a symbol. */
13116 if (GET_CODE (x) == LABEL_REF)
13117 return true;
13118 if (GET_CODE (x) != SYMBOL_REF)
13119 return false;
13120 /* FALLTHRU */
13122 case SYMBOL_REF:
13123 /* TLS symbols are never valid. */
13124 if (SYMBOL_REF_TLS_MODEL (x))
13125 return false;
13127 /* DLLIMPORT symbols are never valid. */
13128 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13129 && SYMBOL_REF_DLLIMPORT_P (x))
13130 return false;
13132 #if TARGET_MACHO
13133 /* mdynamic-no-pic */
13134 if (MACHO_DYNAMIC_NO_PIC_P)
13135 return machopic_symbol_defined_p (x);
13136 #endif
13137 break;
13139 case CONST_WIDE_INT:
13140 if (!TARGET_64BIT && !standard_sse_constant_p (x))
13141 return false;
13142 break;
13144 case CONST_VECTOR:
13145 if (!standard_sse_constant_p (x))
13146 return false;
13148 default:
13149 break;
13152 /* Otherwise we handle everything else in the move patterns. */
13153 return true;
13156 /* Determine if it's legal to put X into the constant pool. This
13157 is not possible for the address of thread-local symbols, which
13158 is checked above. */
13160 static bool
13161 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
13163 /* We can always put integral constants and vectors in memory. */
13164 switch (GET_CODE (x))
13166 case CONST_INT:
13167 case CONST_WIDE_INT:
13168 case CONST_DOUBLE:
13169 case CONST_VECTOR:
13170 return false;
13172 default:
13173 break;
13175 return !ix86_legitimate_constant_p (mode, x);
13178 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
13179 otherwise zero. */
13181 static bool
13182 is_imported_p (rtx x)
13184 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
13185 || GET_CODE (x) != SYMBOL_REF)
13186 return false;
13188 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
13192 /* Nonzero if the constant value X is a legitimate general operand
13193 when generating PIC code. It is given that flag_pic is on and
13194 that X satisfies CONSTANT_P. */
13196 bool
13197 legitimate_pic_operand_p (rtx x)
13199 rtx inner;
13201 switch (GET_CODE (x))
13203 case CONST:
13204 inner = XEXP (x, 0);
13205 if (GET_CODE (inner) == PLUS
13206 && CONST_INT_P (XEXP (inner, 1)))
13207 inner = XEXP (inner, 0);
13209 /* Only some unspecs are valid as "constants". */
13210 if (GET_CODE (inner) == UNSPEC)
13211 switch (XINT (inner, 1))
13213 case UNSPEC_GOT:
13214 case UNSPEC_GOTOFF:
13215 case UNSPEC_PLTOFF:
13216 return TARGET_64BIT;
13217 case UNSPEC_TPOFF:
13218 x = XVECEXP (inner, 0, 0);
13219 return (GET_CODE (x) == SYMBOL_REF
13220 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13221 case UNSPEC_MACHOPIC_OFFSET:
13222 return legitimate_pic_address_disp_p (x);
13223 default:
13224 return false;
13226 /* FALLTHRU */
13228 case SYMBOL_REF:
13229 case LABEL_REF:
13230 return legitimate_pic_address_disp_p (x);
13232 default:
13233 return true;
13237 /* Determine if a given CONST RTX is a valid memory displacement
13238 in PIC mode. */
13240 bool
13241 legitimate_pic_address_disp_p (rtx disp)
13243 bool saw_plus;
13245 /* In 64bit mode we can allow direct addresses of symbols and labels
13246 when they are not dynamic symbols. */
13247 if (TARGET_64BIT)
13249 rtx op0 = disp, op1;
13251 switch (GET_CODE (disp))
13253 case LABEL_REF:
13254 return true;
13256 case CONST:
13257 if (GET_CODE (XEXP (disp, 0)) != PLUS)
13258 break;
13259 op0 = XEXP (XEXP (disp, 0), 0);
13260 op1 = XEXP (XEXP (disp, 0), 1);
13261 if (!CONST_INT_P (op1)
13262 || INTVAL (op1) >= 16*1024*1024
13263 || INTVAL (op1) < -16*1024*1024)
13264 break;
13265 if (GET_CODE (op0) == LABEL_REF)
13266 return true;
13267 if (GET_CODE (op0) == CONST
13268 && GET_CODE (XEXP (op0, 0)) == UNSPEC
13269 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
13270 return true;
13271 if (GET_CODE (op0) == UNSPEC
13272 && XINT (op0, 1) == UNSPEC_PCREL)
13273 return true;
13274 if (GET_CODE (op0) != SYMBOL_REF)
13275 break;
13276 /* FALLTHRU */
13278 case SYMBOL_REF:
13279 /* TLS references should always be enclosed in UNSPEC.
13280 The dllimported symbol needs always to be resolved. */
13281 if (SYMBOL_REF_TLS_MODEL (op0)
13282 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
13283 return false;
13285 if (TARGET_PECOFF)
13287 if (is_imported_p (op0))
13288 return true;
13290 if (SYMBOL_REF_FAR_ADDR_P (op0)
13291 || !SYMBOL_REF_LOCAL_P (op0))
13292 break;
13294 /* Function-symbols need to be resolved only for
13295 large-model.
13296 For the small-model we don't need to resolve anything
13297 here. */
13298 if ((ix86_cmodel != CM_LARGE_PIC
13299 && SYMBOL_REF_FUNCTION_P (op0))
13300 || ix86_cmodel == CM_SMALL_PIC)
13301 return true;
13302 /* Non-external symbols don't need to be resolved for
13303 large, and medium-model. */
13304 if ((ix86_cmodel == CM_LARGE_PIC
13305 || ix86_cmodel == CM_MEDIUM_PIC)
13306 && !SYMBOL_REF_EXTERNAL_P (op0))
13307 return true;
13309 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
13310 && (SYMBOL_REF_LOCAL_P (op0)
13311 || (HAVE_LD_PIE_COPYRELOC
13312 && flag_pie
13313 && !SYMBOL_REF_WEAK (op0)
13314 && !SYMBOL_REF_FUNCTION_P (op0)))
13315 && ix86_cmodel != CM_LARGE_PIC)
13316 return true;
13317 break;
13319 default:
13320 break;
13323 if (GET_CODE (disp) != CONST)
13324 return false;
13325 disp = XEXP (disp, 0);
13327 if (TARGET_64BIT)
13329 /* We are unsafe to allow PLUS expressions. This limit allowed distance
13330 of GOT tables. We should not need these anyway. */
13331 if (GET_CODE (disp) != UNSPEC
13332 || (XINT (disp, 1) != UNSPEC_GOTPCREL
13333 && XINT (disp, 1) != UNSPEC_GOTOFF
13334 && XINT (disp, 1) != UNSPEC_PCREL
13335 && XINT (disp, 1) != UNSPEC_PLTOFF))
13336 return false;
13338 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
13339 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
13340 return false;
13341 return true;
13344 saw_plus = false;
13345 if (GET_CODE (disp) == PLUS)
13347 if (!CONST_INT_P (XEXP (disp, 1)))
13348 return false;
13349 disp = XEXP (disp, 0);
13350 saw_plus = true;
13353 if (TARGET_MACHO && darwin_local_data_pic (disp))
13354 return true;
13356 if (GET_CODE (disp) != UNSPEC)
13357 return false;
13359 switch (XINT (disp, 1))
13361 case UNSPEC_GOT:
13362 if (saw_plus)
13363 return false;
13364 /* We need to check for both symbols and labels because VxWorks loads
13365 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
13366 details. */
13367 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13368 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
13369 case UNSPEC_GOTOFF:
13370 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
13371 While ABI specify also 32bit relocation but we don't produce it in
13372 small PIC model at all. */
13373 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13374 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
13375 && !TARGET_64BIT)
13376 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
13377 return false;
13378 case UNSPEC_GOTTPOFF:
13379 case UNSPEC_GOTNTPOFF:
13380 case UNSPEC_INDNTPOFF:
13381 if (saw_plus)
13382 return false;
13383 disp = XVECEXP (disp, 0, 0);
13384 return (GET_CODE (disp) == SYMBOL_REF
13385 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
13386 case UNSPEC_NTPOFF:
13387 disp = XVECEXP (disp, 0, 0);
13388 return (GET_CODE (disp) == SYMBOL_REF
13389 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
13390 case UNSPEC_DTPOFF:
13391 disp = XVECEXP (disp, 0, 0);
13392 return (GET_CODE (disp) == SYMBOL_REF
13393 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
13396 return false;
13399 /* Determine if op is suitable RTX for an address register.
13400 Return naked register if a register or a register subreg is
13401 found, otherwise return NULL_RTX. */
13403 static rtx
13404 ix86_validate_address_register (rtx op)
13406 machine_mode mode = GET_MODE (op);
13408 /* Only SImode or DImode registers can form the address. */
13409 if (mode != SImode && mode != DImode)
13410 return NULL_RTX;
13412 if (REG_P (op))
13413 return op;
13414 else if (GET_CODE (op) == SUBREG)
13416 rtx reg = SUBREG_REG (op);
13418 if (!REG_P (reg))
13419 return NULL_RTX;
13421 mode = GET_MODE (reg);
13423 /* Don't allow SUBREGs that span more than a word. It can
13424 lead to spill failures when the register is one word out
13425 of a two word structure. */
13426 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13427 return NULL_RTX;
13429 /* Allow only SUBREGs of non-eliminable hard registers. */
13430 if (register_no_elim_operand (reg, mode))
13431 return reg;
13434 /* Op is not a register. */
13435 return NULL_RTX;
13438 /* Recognizes RTL expressions that are valid memory addresses for an
13439 instruction. The MODE argument is the machine mode for the MEM
13440 expression that wants to use this address.
13442 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
13443 convert common non-canonical forms to canonical form so that they will
13444 be recognized. */
13446 static bool
13447 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
13449 struct ix86_address parts;
13450 rtx base, index, disp;
13451 HOST_WIDE_INT scale;
13452 enum ix86_address_seg seg;
13454 if (ix86_decompose_address (addr, &parts) <= 0)
13455 /* Decomposition failed. */
13456 return false;
13458 base = parts.base;
13459 index = parts.index;
13460 disp = parts.disp;
13461 scale = parts.scale;
13462 seg = parts.seg;
13464 /* Validate base register. */
13465 if (base)
13467 rtx reg = ix86_validate_address_register (base);
13469 if (reg == NULL_RTX)
13470 return false;
13472 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13473 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13474 /* Base is not valid. */
13475 return false;
13478 /* Validate index register. */
13479 if (index)
13481 rtx reg = ix86_validate_address_register (index);
13483 if (reg == NULL_RTX)
13484 return false;
13486 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13487 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13488 /* Index is not valid. */
13489 return false;
13492 /* Index and base should have the same mode. */
13493 if (base && index
13494 && GET_MODE (base) != GET_MODE (index))
13495 return false;
13497 /* Address override works only on the (%reg) part of %fs:(%reg). */
13498 if (seg != SEG_DEFAULT
13499 && ((base && GET_MODE (base) != word_mode)
13500 || (index && GET_MODE (index) != word_mode)))
13501 return false;
13503 /* Validate scale factor. */
13504 if (scale != 1)
13506 if (!index)
13507 /* Scale without index. */
13508 return false;
13510 if (scale != 2 && scale != 4 && scale != 8)
13511 /* Scale is not a valid multiplier. */
13512 return false;
13515 /* Validate displacement. */
13516 if (disp)
13518 if (GET_CODE (disp) == CONST
13519 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13520 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13521 switch (XINT (XEXP (disp, 0), 1))
13523 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13524 used. While ABI specify also 32bit relocations, we don't produce
13525 them at all and use IP relative instead. */
13526 case UNSPEC_GOT:
13527 case UNSPEC_GOTOFF:
13528 gcc_assert (flag_pic);
13529 if (!TARGET_64BIT)
13530 goto is_legitimate_pic;
13532 /* 64bit address unspec. */
13533 return false;
13535 case UNSPEC_GOTPCREL:
13536 case UNSPEC_PCREL:
13537 gcc_assert (flag_pic);
13538 goto is_legitimate_pic;
13540 case UNSPEC_GOTTPOFF:
13541 case UNSPEC_GOTNTPOFF:
13542 case UNSPEC_INDNTPOFF:
13543 case UNSPEC_NTPOFF:
13544 case UNSPEC_DTPOFF:
13545 break;
13547 case UNSPEC_STACK_CHECK:
13548 gcc_assert (flag_split_stack);
13549 break;
13551 default:
13552 /* Invalid address unspec. */
13553 return false;
13556 else if (SYMBOLIC_CONST (disp)
13557 && (flag_pic
13558 || (TARGET_MACHO
13559 #if TARGET_MACHO
13560 && MACHOPIC_INDIRECT
13561 && !machopic_operand_p (disp)
13562 #endif
13566 is_legitimate_pic:
13567 if (TARGET_64BIT && (index || base))
13569 /* foo@dtpoff(%rX) is ok. */
13570 if (GET_CODE (disp) != CONST
13571 || GET_CODE (XEXP (disp, 0)) != PLUS
13572 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13573 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13574 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13575 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13576 /* Non-constant pic memory reference. */
13577 return false;
13579 else if ((!TARGET_MACHO || flag_pic)
13580 && ! legitimate_pic_address_disp_p (disp))
13581 /* Displacement is an invalid pic construct. */
13582 return false;
13583 #if TARGET_MACHO
13584 else if (MACHO_DYNAMIC_NO_PIC_P
13585 && !ix86_legitimate_constant_p (Pmode, disp))
13586 /* displacment must be referenced via non_lazy_pointer */
13587 return false;
13588 #endif
13590 /* This code used to verify that a symbolic pic displacement
13591 includes the pic_offset_table_rtx register.
13593 While this is good idea, unfortunately these constructs may
13594 be created by "adds using lea" optimization for incorrect
13595 code like:
13597 int a;
13598 int foo(int i)
13600 return *(&a+i);
13603 This code is nonsensical, but results in addressing
13604 GOT table with pic_offset_table_rtx base. We can't
13605 just refuse it easily, since it gets matched by
13606 "addsi3" pattern, that later gets split to lea in the
13607 case output register differs from input. While this
13608 can be handled by separate addsi pattern for this case
13609 that never results in lea, this seems to be easier and
13610 correct fix for crash to disable this test. */
13612 else if (GET_CODE (disp) != LABEL_REF
13613 && !CONST_INT_P (disp)
13614 && (GET_CODE (disp) != CONST
13615 || !ix86_legitimate_constant_p (Pmode, disp))
13616 && (GET_CODE (disp) != SYMBOL_REF
13617 || !ix86_legitimate_constant_p (Pmode, disp)))
13618 /* Displacement is not constant. */
13619 return false;
13620 else if (TARGET_64BIT
13621 && !x86_64_immediate_operand (disp, VOIDmode))
13622 /* Displacement is out of range. */
13623 return false;
13624 /* In x32 mode, constant addresses are sign extended to 64bit, so
13625 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13626 else if (TARGET_X32 && !(index || base)
13627 && CONST_INT_P (disp)
13628 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13629 return false;
13632 /* Everything looks valid. */
13633 return true;
13636 /* Determine if a given RTX is a valid constant address. */
13638 bool
13639 constant_address_p (rtx x)
13641 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13644 /* Return a unique alias set for the GOT. */
13646 static alias_set_type
13647 ix86_GOT_alias_set (void)
13649 static alias_set_type set = -1;
13650 if (set == -1)
13651 set = new_alias_set ();
13652 return set;
13655 /* Return a legitimate reference for ORIG (an address) using the
13656 register REG. If REG is 0, a new pseudo is generated.
13658 There are two types of references that must be handled:
13660 1. Global data references must load the address from the GOT, via
13661 the PIC reg. An insn is emitted to do this load, and the reg is
13662 returned.
13664 2. Static data references, constant pool addresses, and code labels
13665 compute the address as an offset from the GOT, whose base is in
13666 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13667 differentiate them from global data objects. The returned
13668 address is the PIC reg + an unspec constant.
13670 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13671 reg also appears in the address. */
13673 static rtx
13674 legitimize_pic_address (rtx orig, rtx reg)
13676 rtx addr = orig;
13677 rtx new_rtx = orig;
13679 #if TARGET_MACHO
13680 if (TARGET_MACHO && !TARGET_64BIT)
13682 if (reg == 0)
13683 reg = gen_reg_rtx (Pmode);
13684 /* Use the generic Mach-O PIC machinery. */
13685 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13687 #endif
13689 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13691 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13692 if (tmp)
13693 return tmp;
13696 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13697 new_rtx = addr;
13698 else if (TARGET_64BIT && !TARGET_PECOFF
13699 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13701 rtx tmpreg;
13702 /* This symbol may be referenced via a displacement from the PIC
13703 base address (@GOTOFF). */
13705 if (GET_CODE (addr) == CONST)
13706 addr = XEXP (addr, 0);
13707 if (GET_CODE (addr) == PLUS)
13709 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13710 UNSPEC_GOTOFF);
13711 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13713 else
13714 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13715 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13716 if (!reg)
13717 tmpreg = gen_reg_rtx (Pmode);
13718 else
13719 tmpreg = reg;
13720 emit_move_insn (tmpreg, new_rtx);
13722 if (reg != 0)
13724 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13725 tmpreg, 1, OPTAB_DIRECT);
13726 new_rtx = reg;
13728 else
13729 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13731 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13733 /* This symbol may be referenced via a displacement from the PIC
13734 base address (@GOTOFF). */
13736 if (GET_CODE (addr) == CONST)
13737 addr = XEXP (addr, 0);
13738 if (GET_CODE (addr) == PLUS)
13740 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13741 UNSPEC_GOTOFF);
13742 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13744 else
13745 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13746 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13747 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13749 if (reg != 0)
13751 emit_move_insn (reg, new_rtx);
13752 new_rtx = reg;
13755 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13756 /* We can't use @GOTOFF for text labels on VxWorks;
13757 see gotoff_operand. */
13758 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13760 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13761 if (tmp)
13762 return tmp;
13764 /* For x64 PE-COFF there is no GOT table. So we use address
13765 directly. */
13766 if (TARGET_64BIT && TARGET_PECOFF)
13768 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13769 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13771 if (reg == 0)
13772 reg = gen_reg_rtx (Pmode);
13773 emit_move_insn (reg, new_rtx);
13774 new_rtx = reg;
13776 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13778 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13779 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13780 new_rtx = gen_const_mem (Pmode, new_rtx);
13781 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13783 if (reg == 0)
13784 reg = gen_reg_rtx (Pmode);
13785 /* Use directly gen_movsi, otherwise the address is loaded
13786 into register for CSE. We don't want to CSE this addresses,
13787 instead we CSE addresses from the GOT table, so skip this. */
13788 emit_insn (gen_movsi (reg, new_rtx));
13789 new_rtx = reg;
13791 else
13793 /* This symbol must be referenced via a load from the
13794 Global Offset Table (@GOT). */
13796 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13797 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13798 if (TARGET_64BIT)
13799 new_rtx = force_reg (Pmode, new_rtx);
13800 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13801 new_rtx = gen_const_mem (Pmode, new_rtx);
13802 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13804 if (reg == 0)
13805 reg = gen_reg_rtx (Pmode);
13806 emit_move_insn (reg, new_rtx);
13807 new_rtx = reg;
13810 else
13812 if (CONST_INT_P (addr)
13813 && !x86_64_immediate_operand (addr, VOIDmode))
13815 if (reg)
13817 emit_move_insn (reg, addr);
13818 new_rtx = reg;
13820 else
13821 new_rtx = force_reg (Pmode, addr);
13823 else if (GET_CODE (addr) == CONST)
13825 addr = XEXP (addr, 0);
13827 /* We must match stuff we generate before. Assume the only
13828 unspecs that can get here are ours. Not that we could do
13829 anything with them anyway.... */
13830 if (GET_CODE (addr) == UNSPEC
13831 || (GET_CODE (addr) == PLUS
13832 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13833 return orig;
13834 gcc_assert (GET_CODE (addr) == PLUS);
13836 if (GET_CODE (addr) == PLUS)
13838 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13840 /* Check first to see if this is a constant offset from a @GOTOFF
13841 symbol reference. */
13842 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13843 && CONST_INT_P (op1))
13845 if (!TARGET_64BIT)
13847 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13848 UNSPEC_GOTOFF);
13849 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13850 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13851 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13853 if (reg != 0)
13855 emit_move_insn (reg, new_rtx);
13856 new_rtx = reg;
13859 else
13861 if (INTVAL (op1) < -16*1024*1024
13862 || INTVAL (op1) >= 16*1024*1024)
13864 if (!x86_64_immediate_operand (op1, Pmode))
13865 op1 = force_reg (Pmode, op1);
13866 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13870 else
13872 rtx base = legitimize_pic_address (op0, reg);
13873 machine_mode mode = GET_MODE (base);
13874 new_rtx
13875 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13877 if (CONST_INT_P (new_rtx))
13879 if (INTVAL (new_rtx) < -16*1024*1024
13880 || INTVAL (new_rtx) >= 16*1024*1024)
13882 if (!x86_64_immediate_operand (new_rtx, mode))
13883 new_rtx = force_reg (mode, new_rtx);
13884 new_rtx
13885 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13887 else
13888 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13890 else
13892 /* For %rip addressing, we have to use just disp32, not
13893 base nor index. */
13894 if (TARGET_64BIT
13895 && (GET_CODE (base) == SYMBOL_REF
13896 || GET_CODE (base) == LABEL_REF))
13897 base = force_reg (mode, base);
13898 if (GET_CODE (new_rtx) == PLUS
13899 && CONSTANT_P (XEXP (new_rtx, 1)))
13901 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13902 new_rtx = XEXP (new_rtx, 1);
13904 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13909 return new_rtx;
13912 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13914 static rtx
13915 get_thread_pointer (machine_mode tp_mode, bool to_reg)
13917 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13919 if (GET_MODE (tp) != tp_mode)
13921 gcc_assert (GET_MODE (tp) == SImode);
13922 gcc_assert (tp_mode == DImode);
13924 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13927 if (to_reg)
13928 tp = copy_to_mode_reg (tp_mode, tp);
13930 return tp;
13933 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13935 static GTY(()) rtx ix86_tls_symbol;
13937 static rtx
13938 ix86_tls_get_addr (void)
13940 if (!ix86_tls_symbol)
13942 const char *sym
13943 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13944 ? "___tls_get_addr" : "__tls_get_addr");
13946 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13949 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13951 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13952 UNSPEC_PLTOFF);
13953 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13954 gen_rtx_CONST (Pmode, unspec));
13957 return ix86_tls_symbol;
13960 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13962 static GTY(()) rtx ix86_tls_module_base_symbol;
13965 ix86_tls_module_base (void)
13967 if (!ix86_tls_module_base_symbol)
13969 ix86_tls_module_base_symbol
13970 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13972 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13973 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13976 return ix86_tls_module_base_symbol;
13979 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13980 false if we expect this to be used for a memory address and true if
13981 we expect to load the address into a register. */
13983 static rtx
13984 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13986 rtx dest, base, off;
13987 rtx pic = NULL_RTX, tp = NULL_RTX;
13988 machine_mode tp_mode = Pmode;
13989 int type;
13991 /* Fall back to global dynamic model if tool chain cannot support local
13992 dynamic. */
13993 if (TARGET_SUN_TLS && !TARGET_64BIT
13994 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13995 && model == TLS_MODEL_LOCAL_DYNAMIC)
13996 model = TLS_MODEL_GLOBAL_DYNAMIC;
13998 switch (model)
14000 case TLS_MODEL_GLOBAL_DYNAMIC:
14001 dest = gen_reg_rtx (Pmode);
14003 if (!TARGET_64BIT)
14005 if (flag_pic && !TARGET_PECOFF)
14006 pic = pic_offset_table_rtx;
14007 else
14009 pic = gen_reg_rtx (Pmode);
14010 emit_insn (gen_set_got (pic));
14014 if (TARGET_GNU2_TLS)
14016 if (TARGET_64BIT)
14017 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
14018 else
14019 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
14021 tp = get_thread_pointer (Pmode, true);
14022 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
14024 if (GET_MODE (x) != Pmode)
14025 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14027 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14029 else
14031 rtx caddr = ix86_tls_get_addr ();
14033 if (TARGET_64BIT)
14035 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14036 rtx_insn *insns;
14038 start_sequence ();
14039 emit_call_insn
14040 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
14041 insns = get_insns ();
14042 end_sequence ();
14044 if (GET_MODE (x) != Pmode)
14045 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14047 RTL_CONST_CALL_P (insns) = 1;
14048 emit_libcall_block (insns, dest, rax, x);
14050 else
14051 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
14053 break;
14055 case TLS_MODEL_LOCAL_DYNAMIC:
14056 base = gen_reg_rtx (Pmode);
14058 if (!TARGET_64BIT)
14060 if (flag_pic)
14061 pic = pic_offset_table_rtx;
14062 else
14064 pic = gen_reg_rtx (Pmode);
14065 emit_insn (gen_set_got (pic));
14069 if (TARGET_GNU2_TLS)
14071 rtx tmp = ix86_tls_module_base ();
14073 if (TARGET_64BIT)
14074 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
14075 else
14076 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
14078 tp = get_thread_pointer (Pmode, true);
14079 set_unique_reg_note (get_last_insn (), REG_EQUAL,
14080 gen_rtx_MINUS (Pmode, tmp, tp));
14082 else
14084 rtx caddr = ix86_tls_get_addr ();
14086 if (TARGET_64BIT)
14088 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14089 rtx_insn *insns;
14090 rtx eqv;
14092 start_sequence ();
14093 emit_call_insn
14094 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
14095 insns = get_insns ();
14096 end_sequence ();
14098 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
14099 share the LD_BASE result with other LD model accesses. */
14100 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
14101 UNSPEC_TLS_LD_BASE);
14103 RTL_CONST_CALL_P (insns) = 1;
14104 emit_libcall_block (insns, base, rax, eqv);
14106 else
14107 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
14110 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
14111 off = gen_rtx_CONST (Pmode, off);
14113 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
14115 if (TARGET_GNU2_TLS)
14117 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
14119 if (GET_MODE (x) != Pmode)
14120 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14122 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14124 break;
14126 case TLS_MODEL_INITIAL_EXEC:
14127 if (TARGET_64BIT)
14129 if (TARGET_SUN_TLS && !TARGET_X32)
14131 /* The Sun linker took the AMD64 TLS spec literally
14132 and can only handle %rax as destination of the
14133 initial executable code sequence. */
14135 dest = gen_reg_rtx (DImode);
14136 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
14137 return dest;
14140 /* Generate DImode references to avoid %fs:(%reg32)
14141 problems and linker IE->LE relaxation bug. */
14142 tp_mode = DImode;
14143 pic = NULL;
14144 type = UNSPEC_GOTNTPOFF;
14146 else if (flag_pic)
14148 pic = pic_offset_table_rtx;
14149 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
14151 else if (!TARGET_ANY_GNU_TLS)
14153 pic = gen_reg_rtx (Pmode);
14154 emit_insn (gen_set_got (pic));
14155 type = UNSPEC_GOTTPOFF;
14157 else
14159 pic = NULL;
14160 type = UNSPEC_INDNTPOFF;
14163 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
14164 off = gen_rtx_CONST (tp_mode, off);
14165 if (pic)
14166 off = gen_rtx_PLUS (tp_mode, pic, off);
14167 off = gen_const_mem (tp_mode, off);
14168 set_mem_alias_set (off, ix86_GOT_alias_set ());
14170 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14172 base = get_thread_pointer (tp_mode,
14173 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14174 off = force_reg (tp_mode, off);
14175 return gen_rtx_PLUS (tp_mode, base, off);
14177 else
14179 base = get_thread_pointer (Pmode, true);
14180 dest = gen_reg_rtx (Pmode);
14181 emit_insn (ix86_gen_sub3 (dest, base, off));
14183 break;
14185 case TLS_MODEL_LOCAL_EXEC:
14186 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14187 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14188 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
14189 off = gen_rtx_CONST (Pmode, off);
14191 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14193 base = get_thread_pointer (Pmode,
14194 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14195 return gen_rtx_PLUS (Pmode, base, off);
14197 else
14199 base = get_thread_pointer (Pmode, true);
14200 dest = gen_reg_rtx (Pmode);
14201 emit_insn (ix86_gen_sub3 (dest, base, off));
14203 break;
14205 default:
14206 gcc_unreachable ();
14209 return dest;
14212 /* Create or return the unique __imp_DECL dllimport symbol corresponding
14213 to symbol DECL if BEIMPORT is true. Otherwise create or return the
14214 unique refptr-DECL symbol corresponding to symbol DECL. */
14216 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
14218 static inline hashval_t hash (tree_map *m) { return m->hash; }
14219 static inline bool
14220 equal (tree_map *a, tree_map *b)
14222 return a->base.from == b->base.from;
14225 static int
14226 keep_cache_entry (tree_map *&m)
14228 return ggc_marked_p (m->base.from);
14232 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
14234 static tree
14235 get_dllimport_decl (tree decl, bool beimport)
14237 struct tree_map *h, in;
14238 const char *name;
14239 const char *prefix;
14240 size_t namelen, prefixlen;
14241 char *imp_name;
14242 tree to;
14243 rtx rtl;
14245 if (!dllimport_map)
14246 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
14248 in.hash = htab_hash_pointer (decl);
14249 in.base.from = decl;
14250 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
14251 h = *loc;
14252 if (h)
14253 return h->to;
14255 *loc = h = ggc_alloc<tree_map> ();
14256 h->hash = in.hash;
14257 h->base.from = decl;
14258 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
14259 VAR_DECL, NULL, ptr_type_node);
14260 DECL_ARTIFICIAL (to) = 1;
14261 DECL_IGNORED_P (to) = 1;
14262 DECL_EXTERNAL (to) = 1;
14263 TREE_READONLY (to) = 1;
14265 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
14266 name = targetm.strip_name_encoding (name);
14267 if (beimport)
14268 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
14269 ? "*__imp_" : "*__imp__";
14270 else
14271 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
14272 namelen = strlen (name);
14273 prefixlen = strlen (prefix);
14274 imp_name = (char *) alloca (namelen + prefixlen + 1);
14275 memcpy (imp_name, prefix, prefixlen);
14276 memcpy (imp_name + prefixlen, name, namelen + 1);
14278 name = ggc_alloc_string (imp_name, namelen + prefixlen);
14279 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
14280 SET_SYMBOL_REF_DECL (rtl, to);
14281 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
14282 if (!beimport)
14284 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
14285 #ifdef SUB_TARGET_RECORD_STUB
14286 SUB_TARGET_RECORD_STUB (name);
14287 #endif
14290 rtl = gen_const_mem (Pmode, rtl);
14291 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
14293 SET_DECL_RTL (to, rtl);
14294 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
14296 return to;
14299 /* Expand SYMBOL into its corresponding far-addresse symbol.
14300 WANT_REG is true if we require the result be a register. */
14302 static rtx
14303 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
14305 tree imp_decl;
14306 rtx x;
14308 gcc_assert (SYMBOL_REF_DECL (symbol));
14309 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
14311 x = DECL_RTL (imp_decl);
14312 if (want_reg)
14313 x = force_reg (Pmode, x);
14314 return x;
14317 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
14318 true if we require the result be a register. */
14320 static rtx
14321 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
14323 tree imp_decl;
14324 rtx x;
14326 gcc_assert (SYMBOL_REF_DECL (symbol));
14327 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
14329 x = DECL_RTL (imp_decl);
14330 if (want_reg)
14331 x = force_reg (Pmode, x);
14332 return x;
14335 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
14336 is true if we require the result be a register. */
14338 static rtx
14339 legitimize_pe_coff_symbol (rtx addr, bool inreg)
14341 if (!TARGET_PECOFF)
14342 return NULL_RTX;
14344 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14346 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
14347 return legitimize_dllimport_symbol (addr, inreg);
14348 if (GET_CODE (addr) == CONST
14349 && GET_CODE (XEXP (addr, 0)) == PLUS
14350 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14351 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
14353 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
14354 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14358 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
14359 return NULL_RTX;
14360 if (GET_CODE (addr) == SYMBOL_REF
14361 && !is_imported_p (addr)
14362 && SYMBOL_REF_EXTERNAL_P (addr)
14363 && SYMBOL_REF_DECL (addr))
14364 return legitimize_pe_coff_extern_decl (addr, inreg);
14366 if (GET_CODE (addr) == CONST
14367 && GET_CODE (XEXP (addr, 0)) == PLUS
14368 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14369 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
14370 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
14371 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
14373 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
14374 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14376 return NULL_RTX;
14379 /* Try machine-dependent ways of modifying an illegitimate address
14380 to be legitimate. If we find one, return the new, valid address.
14381 This macro is used in only one place: `memory_address' in explow.c.
14383 OLDX is the address as it was before break_out_memory_refs was called.
14384 In some cases it is useful to look at this to decide what needs to be done.
14386 It is always safe for this macro to do nothing. It exists to recognize
14387 opportunities to optimize the output.
14389 For the 80386, we handle X+REG by loading X into a register R and
14390 using R+REG. R will go in a general reg and indexing will be used.
14391 However, if REG is a broken-out memory address or multiplication,
14392 nothing needs to be done because REG can certainly go in a general reg.
14394 When -fpic is used, special handling is needed for symbolic references.
14395 See comments by legitimize_pic_address in i386.c for details. */
14397 static rtx
14398 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
14400 bool changed = false;
14401 unsigned log;
14403 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14404 if (log)
14405 return legitimize_tls_address (x, (enum tls_model) log, false);
14406 if (GET_CODE (x) == CONST
14407 && GET_CODE (XEXP (x, 0)) == PLUS
14408 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14409 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14411 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14412 (enum tls_model) log, false);
14413 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14416 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14418 rtx tmp = legitimize_pe_coff_symbol (x, true);
14419 if (tmp)
14420 return tmp;
14423 if (flag_pic && SYMBOLIC_CONST (x))
14424 return legitimize_pic_address (x, 0);
14426 #if TARGET_MACHO
14427 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14428 return machopic_indirect_data_reference (x, 0);
14429 #endif
14431 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14432 if (GET_CODE (x) == ASHIFT
14433 && CONST_INT_P (XEXP (x, 1))
14434 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14436 changed = true;
14437 log = INTVAL (XEXP (x, 1));
14438 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14439 GEN_INT (1 << log));
14442 if (GET_CODE (x) == PLUS)
14444 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
14446 if (GET_CODE (XEXP (x, 0)) == ASHIFT
14447 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14448 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14450 changed = true;
14451 log = INTVAL (XEXP (XEXP (x, 0), 1));
14452 XEXP (x, 0) = gen_rtx_MULT (Pmode,
14453 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14454 GEN_INT (1 << log));
14457 if (GET_CODE (XEXP (x, 1)) == ASHIFT
14458 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14459 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14461 changed = true;
14462 log = INTVAL (XEXP (XEXP (x, 1), 1));
14463 XEXP (x, 1) = gen_rtx_MULT (Pmode,
14464 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14465 GEN_INT (1 << log));
14468 /* Put multiply first if it isn't already. */
14469 if (GET_CODE (XEXP (x, 1)) == MULT)
14471 std::swap (XEXP (x, 0), XEXP (x, 1));
14472 changed = true;
14475 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14476 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
14477 created by virtual register instantiation, register elimination, and
14478 similar optimizations. */
14479 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14481 changed = true;
14482 x = gen_rtx_PLUS (Pmode,
14483 gen_rtx_PLUS (Pmode, XEXP (x, 0),
14484 XEXP (XEXP (x, 1), 0)),
14485 XEXP (XEXP (x, 1), 1));
14488 /* Canonicalize
14489 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14490 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
14491 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14492 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14493 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14494 && CONSTANT_P (XEXP (x, 1)))
14496 rtx constant;
14497 rtx other = NULL_RTX;
14499 if (CONST_INT_P (XEXP (x, 1)))
14501 constant = XEXP (x, 1);
14502 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14504 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14506 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14507 other = XEXP (x, 1);
14509 else
14510 constant = 0;
14512 if (constant)
14514 changed = true;
14515 x = gen_rtx_PLUS (Pmode,
14516 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14517 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14518 plus_constant (Pmode, other,
14519 INTVAL (constant)));
14523 if (changed && ix86_legitimate_address_p (mode, x, false))
14524 return x;
14526 if (GET_CODE (XEXP (x, 0)) == MULT)
14528 changed = true;
14529 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14532 if (GET_CODE (XEXP (x, 1)) == MULT)
14534 changed = true;
14535 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14538 if (changed
14539 && REG_P (XEXP (x, 1))
14540 && REG_P (XEXP (x, 0)))
14541 return x;
14543 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14545 changed = true;
14546 x = legitimize_pic_address (x, 0);
14549 if (changed && ix86_legitimate_address_p (mode, x, false))
14550 return x;
14552 if (REG_P (XEXP (x, 0)))
14554 rtx temp = gen_reg_rtx (Pmode);
14555 rtx val = force_operand (XEXP (x, 1), temp);
14556 if (val != temp)
14558 val = convert_to_mode (Pmode, val, 1);
14559 emit_move_insn (temp, val);
14562 XEXP (x, 1) = temp;
14563 return x;
14566 else if (REG_P (XEXP (x, 1)))
14568 rtx temp = gen_reg_rtx (Pmode);
14569 rtx val = force_operand (XEXP (x, 0), temp);
14570 if (val != temp)
14572 val = convert_to_mode (Pmode, val, 1);
14573 emit_move_insn (temp, val);
14576 XEXP (x, 0) = temp;
14577 return x;
14581 return x;
14584 /* Print an integer constant expression in assembler syntax. Addition
14585 and subtraction are the only arithmetic that may appear in these
14586 expressions. FILE is the stdio stream to write to, X is the rtx, and
14587 CODE is the operand print code from the output string. */
14589 static void
14590 output_pic_addr_const (FILE *file, rtx x, int code)
14592 char buf[256];
14594 switch (GET_CODE (x))
14596 case PC:
14597 gcc_assert (flag_pic);
14598 putc ('.', file);
14599 break;
14601 case SYMBOL_REF:
14602 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14603 output_addr_const (file, x);
14604 else
14606 const char *name = XSTR (x, 0);
14608 /* Mark the decl as referenced so that cgraph will
14609 output the function. */
14610 if (SYMBOL_REF_DECL (x))
14611 mark_decl_referenced (SYMBOL_REF_DECL (x));
14613 #if TARGET_MACHO
14614 if (MACHOPIC_INDIRECT
14615 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14616 name = machopic_indirection_name (x, /*stub_p=*/true);
14617 #endif
14618 assemble_name (file, name);
14620 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14621 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14622 fputs ("@PLT", file);
14623 break;
14625 case LABEL_REF:
14626 x = XEXP (x, 0);
14627 /* FALLTHRU */
14628 case CODE_LABEL:
14629 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14630 assemble_name (asm_out_file, buf);
14631 break;
14633 case CONST_INT:
14634 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14635 break;
14637 case CONST:
14638 /* This used to output parentheses around the expression,
14639 but that does not work on the 386 (either ATT or BSD assembler). */
14640 output_pic_addr_const (file, XEXP (x, 0), code);
14641 break;
14643 case CONST_DOUBLE:
14644 /* We can't handle floating point constants;
14645 TARGET_PRINT_OPERAND must handle them. */
14646 output_operand_lossage ("floating constant misused");
14647 break;
14649 case PLUS:
14650 /* Some assemblers need integer constants to appear first. */
14651 if (CONST_INT_P (XEXP (x, 0)))
14653 output_pic_addr_const (file, XEXP (x, 0), code);
14654 putc ('+', file);
14655 output_pic_addr_const (file, XEXP (x, 1), code);
14657 else
14659 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14660 output_pic_addr_const (file, XEXP (x, 1), code);
14661 putc ('+', file);
14662 output_pic_addr_const (file, XEXP (x, 0), code);
14664 break;
14666 case MINUS:
14667 if (!TARGET_MACHO)
14668 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14669 output_pic_addr_const (file, XEXP (x, 0), code);
14670 putc ('-', file);
14671 output_pic_addr_const (file, XEXP (x, 1), code);
14672 if (!TARGET_MACHO)
14673 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14674 break;
14676 case UNSPEC:
14677 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14679 bool f = i386_asm_output_addr_const_extra (file, x);
14680 gcc_assert (f);
14681 break;
14684 gcc_assert (XVECLEN (x, 0) == 1);
14685 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14686 switch (XINT (x, 1))
14688 case UNSPEC_GOT:
14689 fputs ("@GOT", file);
14690 break;
14691 case UNSPEC_GOTOFF:
14692 fputs ("@GOTOFF", file);
14693 break;
14694 case UNSPEC_PLTOFF:
14695 fputs ("@PLTOFF", file);
14696 break;
14697 case UNSPEC_PCREL:
14698 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14699 "(%rip)" : "[rip]", file);
14700 break;
14701 case UNSPEC_GOTPCREL:
14702 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14703 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14704 break;
14705 case UNSPEC_GOTTPOFF:
14706 /* FIXME: This might be @TPOFF in Sun ld too. */
14707 fputs ("@gottpoff", file);
14708 break;
14709 case UNSPEC_TPOFF:
14710 fputs ("@tpoff", file);
14711 break;
14712 case UNSPEC_NTPOFF:
14713 if (TARGET_64BIT)
14714 fputs ("@tpoff", file);
14715 else
14716 fputs ("@ntpoff", file);
14717 break;
14718 case UNSPEC_DTPOFF:
14719 fputs ("@dtpoff", file);
14720 break;
14721 case UNSPEC_GOTNTPOFF:
14722 if (TARGET_64BIT)
14723 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14724 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14725 else
14726 fputs ("@gotntpoff", file);
14727 break;
14728 case UNSPEC_INDNTPOFF:
14729 fputs ("@indntpoff", file);
14730 break;
14731 #if TARGET_MACHO
14732 case UNSPEC_MACHOPIC_OFFSET:
14733 putc ('-', file);
14734 machopic_output_function_base_name (file);
14735 break;
14736 #endif
14737 default:
14738 output_operand_lossage ("invalid UNSPEC as operand");
14739 break;
14741 break;
14743 default:
14744 output_operand_lossage ("invalid expression as operand");
14748 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14749 We need to emit DTP-relative relocations. */
14751 static void ATTRIBUTE_UNUSED
14752 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14754 fputs (ASM_LONG, file);
14755 output_addr_const (file, x);
14756 fputs ("@dtpoff", file);
14757 switch (size)
14759 case 4:
14760 break;
14761 case 8:
14762 fputs (", 0", file);
14763 break;
14764 default:
14765 gcc_unreachable ();
14769 /* Return true if X is a representation of the PIC register. This copes
14770 with calls from ix86_find_base_term, where the register might have
14771 been replaced by a cselib value. */
14773 static bool
14774 ix86_pic_register_p (rtx x)
14776 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14777 return (pic_offset_table_rtx
14778 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14779 else if (!REG_P (x))
14780 return false;
14781 else if (pic_offset_table_rtx)
14783 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14784 return true;
14785 if (HARD_REGISTER_P (x)
14786 && !HARD_REGISTER_P (pic_offset_table_rtx)
14787 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14788 return true;
14789 return false;
14791 else
14792 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14795 /* Helper function for ix86_delegitimize_address.
14796 Attempt to delegitimize TLS local-exec accesses. */
14798 static rtx
14799 ix86_delegitimize_tls_address (rtx orig_x)
14801 rtx x = orig_x, unspec;
14802 struct ix86_address addr;
14804 if (!TARGET_TLS_DIRECT_SEG_REFS)
14805 return orig_x;
14806 if (MEM_P (x))
14807 x = XEXP (x, 0);
14808 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14809 return orig_x;
14810 if (ix86_decompose_address (x, &addr) == 0
14811 || addr.seg != DEFAULT_TLS_SEG_REG
14812 || addr.disp == NULL_RTX
14813 || GET_CODE (addr.disp) != CONST)
14814 return orig_x;
14815 unspec = XEXP (addr.disp, 0);
14816 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14817 unspec = XEXP (unspec, 0);
14818 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14819 return orig_x;
14820 x = XVECEXP (unspec, 0, 0);
14821 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14822 if (unspec != XEXP (addr.disp, 0))
14823 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14824 if (addr.index)
14826 rtx idx = addr.index;
14827 if (addr.scale != 1)
14828 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14829 x = gen_rtx_PLUS (Pmode, idx, x);
14831 if (addr.base)
14832 x = gen_rtx_PLUS (Pmode, addr.base, x);
14833 if (MEM_P (orig_x))
14834 x = replace_equiv_address_nv (orig_x, x);
14835 return x;
14838 /* In the name of slightly smaller debug output, and to cater to
14839 general assembler lossage, recognize PIC+GOTOFF and turn it back
14840 into a direct symbol reference.
14842 On Darwin, this is necessary to avoid a crash, because Darwin
14843 has a different PIC label for each routine but the DWARF debugging
14844 information is not associated with any particular routine, so it's
14845 necessary to remove references to the PIC label from RTL stored by
14846 the DWARF output code. */
14848 static rtx
14849 ix86_delegitimize_address (rtx x)
14851 rtx orig_x = delegitimize_mem_from_attrs (x);
14852 /* addend is NULL or some rtx if x is something+GOTOFF where
14853 something doesn't include the PIC register. */
14854 rtx addend = NULL_RTX;
14855 /* reg_addend is NULL or a multiple of some register. */
14856 rtx reg_addend = NULL_RTX;
14857 /* const_addend is NULL or a const_int. */
14858 rtx const_addend = NULL_RTX;
14859 /* This is the result, or NULL. */
14860 rtx result = NULL_RTX;
14862 x = orig_x;
14864 if (MEM_P (x))
14865 x = XEXP (x, 0);
14867 if (TARGET_64BIT)
14869 if (GET_CODE (x) == CONST
14870 && GET_CODE (XEXP (x, 0)) == PLUS
14871 && GET_MODE (XEXP (x, 0)) == Pmode
14872 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14873 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14874 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14876 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14877 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14878 if (MEM_P (orig_x))
14879 x = replace_equiv_address_nv (orig_x, x);
14880 return x;
14883 if (GET_CODE (x) == CONST
14884 && GET_CODE (XEXP (x, 0)) == UNSPEC
14885 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14886 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14887 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14889 x = XVECEXP (XEXP (x, 0), 0, 0);
14890 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14892 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14893 GET_MODE (x), 0);
14894 if (x == NULL_RTX)
14895 return orig_x;
14897 return x;
14900 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14901 return ix86_delegitimize_tls_address (orig_x);
14903 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14904 and -mcmodel=medium -fpic. */
14907 if (GET_CODE (x) != PLUS
14908 || GET_CODE (XEXP (x, 1)) != CONST)
14909 return ix86_delegitimize_tls_address (orig_x);
14911 if (ix86_pic_register_p (XEXP (x, 0)))
14912 /* %ebx + GOT/GOTOFF */
14914 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14916 /* %ebx + %reg * scale + GOT/GOTOFF */
14917 reg_addend = XEXP (x, 0);
14918 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14919 reg_addend = XEXP (reg_addend, 1);
14920 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14921 reg_addend = XEXP (reg_addend, 0);
14922 else
14924 reg_addend = NULL_RTX;
14925 addend = XEXP (x, 0);
14928 else
14929 addend = XEXP (x, 0);
14931 x = XEXP (XEXP (x, 1), 0);
14932 if (GET_CODE (x) == PLUS
14933 && CONST_INT_P (XEXP (x, 1)))
14935 const_addend = XEXP (x, 1);
14936 x = XEXP (x, 0);
14939 if (GET_CODE (x) == UNSPEC
14940 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14941 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14942 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14943 && !MEM_P (orig_x) && !addend)))
14944 result = XVECEXP (x, 0, 0);
14946 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14947 && !MEM_P (orig_x))
14948 result = XVECEXP (x, 0, 0);
14950 if (! result)
14951 return ix86_delegitimize_tls_address (orig_x);
14953 if (const_addend)
14954 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14955 if (reg_addend)
14956 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14957 if (addend)
14959 /* If the rest of original X doesn't involve the PIC register, add
14960 addend and subtract pic_offset_table_rtx. This can happen e.g.
14961 for code like:
14962 leal (%ebx, %ecx, 4), %ecx
14964 movl foo@GOTOFF(%ecx), %edx
14965 in which case we return (%ecx - %ebx) + foo
14966 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
14967 and reload has completed. */
14968 if (pic_offset_table_rtx
14969 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
14970 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14971 pic_offset_table_rtx),
14972 result);
14973 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
14975 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
14976 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
14977 result = gen_rtx_PLUS (Pmode, tmp, result);
14979 else
14980 return orig_x;
14982 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14984 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14985 if (result == NULL_RTX)
14986 return orig_x;
14988 return result;
14991 /* If X is a machine specific address (i.e. a symbol or label being
14992 referenced as a displacement from the GOT implemented using an
14993 UNSPEC), then return the base term. Otherwise return X. */
14996 ix86_find_base_term (rtx x)
14998 rtx term;
15000 if (TARGET_64BIT)
15002 if (GET_CODE (x) != CONST)
15003 return x;
15004 term = XEXP (x, 0);
15005 if (GET_CODE (term) == PLUS
15006 && CONST_INT_P (XEXP (term, 1)))
15007 term = XEXP (term, 0);
15008 if (GET_CODE (term) != UNSPEC
15009 || (XINT (term, 1) != UNSPEC_GOTPCREL
15010 && XINT (term, 1) != UNSPEC_PCREL))
15011 return x;
15013 return XVECEXP (term, 0, 0);
15016 return ix86_delegitimize_address (x);
15019 static void
15020 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
15021 bool fp, FILE *file)
15023 const char *suffix;
15025 if (mode == CCFPmode || mode == CCFPUmode)
15027 code = ix86_fp_compare_code_to_integer (code);
15028 mode = CCmode;
15030 if (reverse)
15031 code = reverse_condition (code);
15033 switch (code)
15035 case EQ:
15036 switch (mode)
15038 case CCAmode:
15039 suffix = "a";
15040 break;
15041 case CCCmode:
15042 suffix = "c";
15043 break;
15044 case CCOmode:
15045 suffix = "o";
15046 break;
15047 case CCPmode:
15048 suffix = "p";
15049 break;
15050 case CCSmode:
15051 suffix = "s";
15052 break;
15053 default:
15054 suffix = "e";
15055 break;
15057 break;
15058 case NE:
15059 switch (mode)
15061 case CCAmode:
15062 suffix = "na";
15063 break;
15064 case CCCmode:
15065 suffix = "nc";
15066 break;
15067 case CCOmode:
15068 suffix = "no";
15069 break;
15070 case CCPmode:
15071 suffix = "np";
15072 break;
15073 case CCSmode:
15074 suffix = "ns";
15075 break;
15076 default:
15077 suffix = "ne";
15078 break;
15080 break;
15081 case GT:
15082 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
15083 suffix = "g";
15084 break;
15085 case GTU:
15086 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
15087 Those same assemblers have the same but opposite lossage on cmov. */
15088 if (mode == CCmode)
15089 suffix = fp ? "nbe" : "a";
15090 else
15091 gcc_unreachable ();
15092 break;
15093 case LT:
15094 switch (mode)
15096 case CCNOmode:
15097 case CCGOCmode:
15098 suffix = "s";
15099 break;
15101 case CCmode:
15102 case CCGCmode:
15103 suffix = "l";
15104 break;
15106 default:
15107 gcc_unreachable ();
15109 break;
15110 case LTU:
15111 if (mode == CCmode)
15112 suffix = "b";
15113 else if (mode == CCCmode)
15114 suffix = fp ? "b" : "c";
15115 else
15116 gcc_unreachable ();
15117 break;
15118 case GE:
15119 switch (mode)
15121 case CCNOmode:
15122 case CCGOCmode:
15123 suffix = "ns";
15124 break;
15126 case CCmode:
15127 case CCGCmode:
15128 suffix = "ge";
15129 break;
15131 default:
15132 gcc_unreachable ();
15134 break;
15135 case GEU:
15136 if (mode == CCmode)
15137 suffix = "nb";
15138 else if (mode == CCCmode)
15139 suffix = fp ? "nb" : "nc";
15140 else
15141 gcc_unreachable ();
15142 break;
15143 case LE:
15144 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
15145 suffix = "le";
15146 break;
15147 case LEU:
15148 if (mode == CCmode)
15149 suffix = "be";
15150 else
15151 gcc_unreachable ();
15152 break;
15153 case UNORDERED:
15154 suffix = fp ? "u" : "p";
15155 break;
15156 case ORDERED:
15157 suffix = fp ? "nu" : "np";
15158 break;
15159 default:
15160 gcc_unreachable ();
15162 fputs (suffix, file);
15165 /* Print the name of register X to FILE based on its machine mode and number.
15166 If CODE is 'w', pretend the mode is HImode.
15167 If CODE is 'b', pretend the mode is QImode.
15168 If CODE is 'k', pretend the mode is SImode.
15169 If CODE is 'q', pretend the mode is DImode.
15170 If CODE is 'x', pretend the mode is V4SFmode.
15171 If CODE is 't', pretend the mode is V8SFmode.
15172 If CODE is 'g', pretend the mode is V16SFmode.
15173 If CODE is 'h', pretend the reg is the 'high' byte register.
15174 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
15175 If CODE is 'd', duplicate the operand for AVX instruction.
15178 void
15179 print_reg (rtx x, int code, FILE *file)
15181 const char *reg;
15182 int msize;
15183 unsigned int regno;
15184 bool duplicated;
15186 if (ASSEMBLER_DIALECT == ASM_ATT)
15187 putc ('%', file);
15189 if (x == pc_rtx)
15191 gcc_assert (TARGET_64BIT);
15192 fputs ("rip", file);
15193 return;
15196 if (code == 'y' && STACK_TOP_P (x))
15198 fputs ("st(0)", file);
15199 return;
15202 if (code == 'w')
15203 msize = 2;
15204 else if (code == 'b')
15205 msize = 1;
15206 else if (code == 'k')
15207 msize = 4;
15208 else if (code == 'q')
15209 msize = 8;
15210 else if (code == 'h')
15211 msize = 0;
15212 else if (code == 'x')
15213 msize = 16;
15214 else if (code == 't')
15215 msize = 32;
15216 else if (code == 'g')
15217 msize = 64;
15218 else
15219 msize = GET_MODE_SIZE (GET_MODE (x));
15221 regno = true_regnum (x);
15223 gcc_assert (regno != ARG_POINTER_REGNUM
15224 && regno != FRAME_POINTER_REGNUM
15225 && regno != FLAGS_REG
15226 && regno != FPSR_REG
15227 && regno != FPCR_REG);
15229 duplicated = code == 'd' && TARGET_AVX;
15231 switch (msize)
15233 case 8:
15234 case 4:
15235 if (LEGACY_INT_REGNO_P (regno))
15236 putc (msize == 8 && TARGET_64BIT ? 'r' : 'e', file);
15237 case 16:
15238 case 12:
15239 case 2:
15240 normal:
15241 reg = hi_reg_name[regno];
15242 break;
15243 case 1:
15244 if (regno >= ARRAY_SIZE (qi_reg_name))
15245 goto normal;
15246 reg = qi_reg_name[regno];
15247 break;
15248 case 0:
15249 if (regno >= ARRAY_SIZE (qi_high_reg_name))
15250 goto normal;
15251 reg = qi_high_reg_name[regno];
15252 break;
15253 case 32:
15254 case 64:
15255 if (SSE_REGNO_P (regno))
15257 gcc_assert (!duplicated);
15258 putc (msize == 32 ? 'y' : 'z', file);
15259 reg = hi_reg_name[regno] + 1;
15260 break;
15262 goto normal;
15263 default:
15264 gcc_unreachable ();
15267 fputs (reg, file);
15269 /* Irritatingly, AMD extended registers use
15270 different naming convention: "r%d[bwd]" */
15271 if (REX_INT_REGNO_P (regno))
15273 gcc_assert (TARGET_64BIT);
15274 switch (msize)
15276 case 0:
15277 error ("extended registers have no high halves");
15278 break;
15279 case 1:
15280 putc ('b', file);
15281 break;
15282 case 2:
15283 putc ('w', file);
15284 break;
15285 case 4:
15286 putc ('d', file);
15287 break;
15288 case 8:
15289 /* no suffix */
15290 break;
15291 default:
15292 error ("unsupported operand size for extended register");
15293 break;
15295 return;
15298 if (duplicated)
15300 if (ASSEMBLER_DIALECT == ASM_ATT)
15301 fprintf (file, ", %%%s", reg);
15302 else
15303 fprintf (file, ", %s", reg);
15307 /* Meaning of CODE:
15308 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
15309 C -- print opcode suffix for set/cmov insn.
15310 c -- like C, but print reversed condition
15311 F,f -- likewise, but for floating-point.
15312 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
15313 otherwise nothing
15314 R -- print embeded rounding and sae.
15315 r -- print only sae.
15316 z -- print the opcode suffix for the size of the current operand.
15317 Z -- likewise, with special suffixes for x87 instructions.
15318 * -- print a star (in certain assembler syntax)
15319 A -- print an absolute memory reference.
15320 E -- print address with DImode register names if TARGET_64BIT.
15321 w -- print the operand as if it's a "word" (HImode) even if it isn't.
15322 s -- print a shift double count, followed by the assemblers argument
15323 delimiter.
15324 b -- print the QImode name of the register for the indicated operand.
15325 %b0 would print %al if operands[0] is reg 0.
15326 w -- likewise, print the HImode name of the register.
15327 k -- likewise, print the SImode name of the register.
15328 q -- likewise, print the DImode name of the register.
15329 x -- likewise, print the V4SFmode name of the register.
15330 t -- likewise, print the V8SFmode name of the register.
15331 g -- likewise, print the V16SFmode name of the register.
15332 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
15333 y -- print "st(0)" instead of "st" as a register.
15334 d -- print duplicated register operand for AVX instruction.
15335 D -- print condition for SSE cmp instruction.
15336 P -- if PIC, print an @PLT suffix.
15337 p -- print raw symbol name.
15338 X -- don't print any sort of PIC '@' suffix for a symbol.
15339 & -- print some in-use local-dynamic symbol name.
15340 H -- print a memory address offset by 8; used for sse high-parts
15341 Y -- print condition for XOP pcom* instruction.
15342 + -- print a branch hint as 'cs' or 'ds' prefix
15343 ; -- print a semicolon (after prefixes due to bug in older gas).
15344 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15345 @ -- print a segment register of thread base pointer load
15346 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15347 ! -- print MPX prefix for jxx/call/ret instructions if required.
15350 void
15351 ix86_print_operand (FILE *file, rtx x, int code)
15353 if (code)
15355 switch (code)
15357 case 'A':
15358 switch (ASSEMBLER_DIALECT)
15360 case ASM_ATT:
15361 putc ('*', file);
15362 break;
15364 case ASM_INTEL:
15365 /* Intel syntax. For absolute addresses, registers should not
15366 be surrounded by braces. */
15367 if (!REG_P (x))
15369 putc ('[', file);
15370 ix86_print_operand (file, x, 0);
15371 putc (']', file);
15372 return;
15374 break;
15376 default:
15377 gcc_unreachable ();
15380 ix86_print_operand (file, x, 0);
15381 return;
15383 case 'E':
15384 /* Wrap address in an UNSPEC to declare special handling. */
15385 if (TARGET_64BIT)
15386 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15388 output_address (x);
15389 return;
15391 case 'L':
15392 if (ASSEMBLER_DIALECT == ASM_ATT)
15393 putc ('l', file);
15394 return;
15396 case 'W':
15397 if (ASSEMBLER_DIALECT == ASM_ATT)
15398 putc ('w', file);
15399 return;
15401 case 'B':
15402 if (ASSEMBLER_DIALECT == ASM_ATT)
15403 putc ('b', file);
15404 return;
15406 case 'Q':
15407 if (ASSEMBLER_DIALECT == ASM_ATT)
15408 putc ('l', file);
15409 return;
15411 case 'S':
15412 if (ASSEMBLER_DIALECT == ASM_ATT)
15413 putc ('s', file);
15414 return;
15416 case 'T':
15417 if (ASSEMBLER_DIALECT == ASM_ATT)
15418 putc ('t', file);
15419 return;
15421 case 'O':
15422 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15423 if (ASSEMBLER_DIALECT != ASM_ATT)
15424 return;
15426 switch (GET_MODE_SIZE (GET_MODE (x)))
15428 case 2:
15429 putc ('w', file);
15430 break;
15432 case 4:
15433 putc ('l', file);
15434 break;
15436 case 8:
15437 putc ('q', file);
15438 break;
15440 default:
15441 output_operand_lossage
15442 ("invalid operand size for operand code 'O'");
15443 return;
15446 putc ('.', file);
15447 #endif
15448 return;
15450 case 'z':
15451 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15453 /* Opcodes don't get size suffixes if using Intel opcodes. */
15454 if (ASSEMBLER_DIALECT == ASM_INTEL)
15455 return;
15457 switch (GET_MODE_SIZE (GET_MODE (x)))
15459 case 1:
15460 putc ('b', file);
15461 return;
15463 case 2:
15464 putc ('w', file);
15465 return;
15467 case 4:
15468 putc ('l', file);
15469 return;
15471 case 8:
15472 putc ('q', file);
15473 return;
15475 default:
15476 output_operand_lossage
15477 ("invalid operand size for operand code 'z'");
15478 return;
15482 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15483 warning
15484 (0, "non-integer operand used with operand code 'z'");
15485 /* FALLTHRU */
15487 case 'Z':
15488 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15489 if (ASSEMBLER_DIALECT == ASM_INTEL)
15490 return;
15492 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15494 switch (GET_MODE_SIZE (GET_MODE (x)))
15496 case 2:
15497 #ifdef HAVE_AS_IX86_FILDS
15498 putc ('s', file);
15499 #endif
15500 return;
15502 case 4:
15503 putc ('l', file);
15504 return;
15506 case 8:
15507 #ifdef HAVE_AS_IX86_FILDQ
15508 putc ('q', file);
15509 #else
15510 fputs ("ll", file);
15511 #endif
15512 return;
15514 default:
15515 break;
15518 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15520 /* 387 opcodes don't get size suffixes
15521 if the operands are registers. */
15522 if (STACK_REG_P (x))
15523 return;
15525 switch (GET_MODE_SIZE (GET_MODE (x)))
15527 case 4:
15528 putc ('s', file);
15529 return;
15531 case 8:
15532 putc ('l', file);
15533 return;
15535 case 12:
15536 case 16:
15537 putc ('t', file);
15538 return;
15540 default:
15541 break;
15544 else
15546 output_operand_lossage
15547 ("invalid operand type used with operand code 'Z'");
15548 return;
15551 output_operand_lossage
15552 ("invalid operand size for operand code 'Z'");
15553 return;
15555 case 'd':
15556 case 'b':
15557 case 'w':
15558 case 'k':
15559 case 'q':
15560 case 'h':
15561 case 't':
15562 case 'g':
15563 case 'y':
15564 case 'x':
15565 case 'X':
15566 case 'P':
15567 case 'p':
15568 break;
15570 case 's':
15571 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15573 ix86_print_operand (file, x, 0);
15574 fputs (", ", file);
15576 return;
15578 case 'Y':
15579 switch (GET_CODE (x))
15581 case NE:
15582 fputs ("neq", file);
15583 break;
15584 case EQ:
15585 fputs ("eq", file);
15586 break;
15587 case GE:
15588 case GEU:
15589 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15590 break;
15591 case GT:
15592 case GTU:
15593 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15594 break;
15595 case LE:
15596 case LEU:
15597 fputs ("le", file);
15598 break;
15599 case LT:
15600 case LTU:
15601 fputs ("lt", file);
15602 break;
15603 case UNORDERED:
15604 fputs ("unord", file);
15605 break;
15606 case ORDERED:
15607 fputs ("ord", file);
15608 break;
15609 case UNEQ:
15610 fputs ("ueq", file);
15611 break;
15612 case UNGE:
15613 fputs ("nlt", file);
15614 break;
15615 case UNGT:
15616 fputs ("nle", file);
15617 break;
15618 case UNLE:
15619 fputs ("ule", file);
15620 break;
15621 case UNLT:
15622 fputs ("ult", file);
15623 break;
15624 case LTGT:
15625 fputs ("une", file);
15626 break;
15627 default:
15628 output_operand_lossage ("operand is not a condition code, "
15629 "invalid operand code 'Y'");
15630 return;
15632 return;
15634 case 'D':
15635 /* Little bit of braindamage here. The SSE compare instructions
15636 does use completely different names for the comparisons that the
15637 fp conditional moves. */
15638 switch (GET_CODE (x))
15640 case UNEQ:
15641 if (TARGET_AVX)
15643 fputs ("eq_us", file);
15644 break;
15646 case EQ:
15647 fputs ("eq", file);
15648 break;
15649 case UNLT:
15650 if (TARGET_AVX)
15652 fputs ("nge", file);
15653 break;
15655 case LT:
15656 fputs ("lt", file);
15657 break;
15658 case UNLE:
15659 if (TARGET_AVX)
15661 fputs ("ngt", file);
15662 break;
15664 case LE:
15665 fputs ("le", file);
15666 break;
15667 case UNORDERED:
15668 fputs ("unord", file);
15669 break;
15670 case LTGT:
15671 if (TARGET_AVX)
15673 fputs ("neq_oq", file);
15674 break;
15676 case NE:
15677 fputs ("neq", file);
15678 break;
15679 case GE:
15680 if (TARGET_AVX)
15682 fputs ("ge", file);
15683 break;
15685 case UNGE:
15686 fputs ("nlt", file);
15687 break;
15688 case GT:
15689 if (TARGET_AVX)
15691 fputs ("gt", file);
15692 break;
15694 case UNGT:
15695 fputs ("nle", file);
15696 break;
15697 case ORDERED:
15698 fputs ("ord", file);
15699 break;
15700 default:
15701 output_operand_lossage ("operand is not a condition code, "
15702 "invalid operand code 'D'");
15703 return;
15705 return;
15707 case 'F':
15708 case 'f':
15709 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15710 if (ASSEMBLER_DIALECT == ASM_ATT)
15711 putc ('.', file);
15712 #endif
15714 case 'C':
15715 case 'c':
15716 if (!COMPARISON_P (x))
15718 output_operand_lossage ("operand is not a condition code, "
15719 "invalid operand code '%c'", code);
15720 return;
15722 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15723 code == 'c' || code == 'f',
15724 code == 'F' || code == 'f',
15725 file);
15726 return;
15728 case 'H':
15729 if (!offsettable_memref_p (x))
15731 output_operand_lossage ("operand is not an offsettable memory "
15732 "reference, invalid operand code 'H'");
15733 return;
15735 /* It doesn't actually matter what mode we use here, as we're
15736 only going to use this for printing. */
15737 x = adjust_address_nv (x, DImode, 8);
15738 /* Output 'qword ptr' for intel assembler dialect. */
15739 if (ASSEMBLER_DIALECT == ASM_INTEL)
15740 code = 'q';
15741 break;
15743 case 'K':
15744 gcc_assert (CONST_INT_P (x));
15746 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15747 #ifdef HAVE_AS_IX86_HLE
15748 fputs ("xacquire ", file);
15749 #else
15750 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15751 #endif
15752 else if (INTVAL (x) & IX86_HLE_RELEASE)
15753 #ifdef HAVE_AS_IX86_HLE
15754 fputs ("xrelease ", file);
15755 #else
15756 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15757 #endif
15758 /* We do not want to print value of the operand. */
15759 return;
15761 case 'N':
15762 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15763 fputs ("{z}", file);
15764 return;
15766 case 'r':
15767 gcc_assert (CONST_INT_P (x));
15768 gcc_assert (INTVAL (x) == ROUND_SAE);
15770 if (ASSEMBLER_DIALECT == ASM_INTEL)
15771 fputs (", ", file);
15773 fputs ("{sae}", file);
15775 if (ASSEMBLER_DIALECT == ASM_ATT)
15776 fputs (", ", file);
15778 return;
15780 case 'R':
15781 gcc_assert (CONST_INT_P (x));
15783 if (ASSEMBLER_DIALECT == ASM_INTEL)
15784 fputs (", ", file);
15786 switch (INTVAL (x))
15788 case ROUND_NEAREST_INT | ROUND_SAE:
15789 fputs ("{rn-sae}", file);
15790 break;
15791 case ROUND_NEG_INF | ROUND_SAE:
15792 fputs ("{rd-sae}", file);
15793 break;
15794 case ROUND_POS_INF | ROUND_SAE:
15795 fputs ("{ru-sae}", file);
15796 break;
15797 case ROUND_ZERO | ROUND_SAE:
15798 fputs ("{rz-sae}", file);
15799 break;
15800 default:
15801 gcc_unreachable ();
15804 if (ASSEMBLER_DIALECT == ASM_ATT)
15805 fputs (", ", file);
15807 return;
15809 case '*':
15810 if (ASSEMBLER_DIALECT == ASM_ATT)
15811 putc ('*', file);
15812 return;
15814 case '&':
15816 const char *name = get_some_local_dynamic_name ();
15817 if (name == NULL)
15818 output_operand_lossage ("'%%&' used without any "
15819 "local dynamic TLS references");
15820 else
15821 assemble_name (file, name);
15822 return;
15825 case '+':
15827 rtx x;
15829 if (!optimize
15830 || optimize_function_for_size_p (cfun)
15831 || !TARGET_BRANCH_PREDICTION_HINTS)
15832 return;
15834 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15835 if (x)
15837 int pred_val = XINT (x, 0);
15839 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15840 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15842 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15843 bool cputaken
15844 = final_forward_branch_p (current_output_insn) == 0;
15846 /* Emit hints only in the case default branch prediction
15847 heuristics would fail. */
15848 if (taken != cputaken)
15850 /* We use 3e (DS) prefix for taken branches and
15851 2e (CS) prefix for not taken branches. */
15852 if (taken)
15853 fputs ("ds ; ", file);
15854 else
15855 fputs ("cs ; ", file);
15859 return;
15862 case ';':
15863 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15864 putc (';', file);
15865 #endif
15866 return;
15868 case '@':
15869 if (ASSEMBLER_DIALECT == ASM_ATT)
15870 putc ('%', file);
15872 /* The kernel uses a different segment register for performance
15873 reasons; a system call would not have to trash the userspace
15874 segment register, which would be expensive. */
15875 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15876 fputs ("fs", file);
15877 else
15878 fputs ("gs", file);
15879 return;
15881 case '~':
15882 putc (TARGET_AVX2 ? 'i' : 'f', file);
15883 return;
15885 case '^':
15886 if (TARGET_64BIT && Pmode != word_mode)
15887 fputs ("addr32 ", file);
15888 return;
15890 case '!':
15891 if (ix86_bnd_prefixed_insn_p (current_output_insn))
15892 fputs ("bnd ", file);
15893 return;
15895 default:
15896 output_operand_lossage ("invalid operand code '%c'", code);
15900 if (REG_P (x))
15901 print_reg (x, code, file);
15903 else if (MEM_P (x))
15905 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15906 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15907 && GET_MODE (x) != BLKmode)
15909 const char * size;
15910 switch (GET_MODE_SIZE (GET_MODE (x)))
15912 case 1: size = "BYTE"; break;
15913 case 2: size = "WORD"; break;
15914 case 4: size = "DWORD"; break;
15915 case 8: size = "QWORD"; break;
15916 case 12: size = "TBYTE"; break;
15917 case 16:
15918 if (GET_MODE (x) == XFmode)
15919 size = "TBYTE";
15920 else
15921 size = "XMMWORD";
15922 break;
15923 case 32: size = "YMMWORD"; break;
15924 case 64: size = "ZMMWORD"; break;
15925 default:
15926 gcc_unreachable ();
15929 /* Check for explicit size override (codes 'b', 'w', 'k',
15930 'q' and 'x') */
15931 if (code == 'b')
15932 size = "BYTE";
15933 else if (code == 'w')
15934 size = "WORD";
15935 else if (code == 'k')
15936 size = "DWORD";
15937 else if (code == 'q')
15938 size = "QWORD";
15939 else if (code == 'x')
15940 size = "XMMWORD";
15942 fputs (size, file);
15943 fputs (" PTR ", file);
15946 x = XEXP (x, 0);
15947 /* Avoid (%rip) for call operands. */
15948 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15949 && !CONST_INT_P (x))
15950 output_addr_const (file, x);
15951 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15952 output_operand_lossage ("invalid constraints for operand");
15953 else
15954 output_address (x);
15957 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
15959 REAL_VALUE_TYPE r;
15960 long l;
15962 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15963 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15965 if (ASSEMBLER_DIALECT == ASM_ATT)
15966 putc ('$', file);
15967 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15968 if (code == 'q')
15969 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15970 (unsigned long long) (int) l);
15971 else
15972 fprintf (file, "0x%08x", (unsigned int) l);
15975 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
15977 REAL_VALUE_TYPE r;
15978 long l[2];
15980 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15981 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15983 if (ASSEMBLER_DIALECT == ASM_ATT)
15984 putc ('$', file);
15985 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15988 /* These float cases don't actually occur as immediate operands. */
15989 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
15991 char dstr[30];
15993 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15994 fputs (dstr, file);
15997 else
15999 /* We have patterns that allow zero sets of memory, for instance.
16000 In 64-bit mode, we should probably support all 8-byte vectors,
16001 since we can in fact encode that into an immediate. */
16002 if (GET_CODE (x) == CONST_VECTOR)
16004 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
16005 x = const0_rtx;
16008 if (code != 'P' && code != 'p')
16010 if (CONST_INT_P (x))
16012 if (ASSEMBLER_DIALECT == ASM_ATT)
16013 putc ('$', file);
16015 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
16016 || GET_CODE (x) == LABEL_REF)
16018 if (ASSEMBLER_DIALECT == ASM_ATT)
16019 putc ('$', file);
16020 else
16021 fputs ("OFFSET FLAT:", file);
16024 if (CONST_INT_P (x))
16025 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16026 else if (flag_pic || MACHOPIC_INDIRECT)
16027 output_pic_addr_const (file, x, code);
16028 else
16029 output_addr_const (file, x);
16033 static bool
16034 ix86_print_operand_punct_valid_p (unsigned char code)
16036 return (code == '@' || code == '*' || code == '+' || code == '&'
16037 || code == ';' || code == '~' || code == '^' || code == '!');
16040 /* Print a memory operand whose address is ADDR. */
16042 static void
16043 ix86_print_operand_address (FILE *file, rtx addr)
16045 struct ix86_address parts;
16046 rtx base, index, disp;
16047 int scale;
16048 int ok;
16049 bool vsib = false;
16050 int code = 0;
16052 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
16054 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16055 gcc_assert (parts.index == NULL_RTX);
16056 parts.index = XVECEXP (addr, 0, 1);
16057 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
16058 addr = XVECEXP (addr, 0, 0);
16059 vsib = true;
16061 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
16063 gcc_assert (TARGET_64BIT);
16064 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16065 code = 'q';
16067 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
16069 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
16070 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
16071 if (parts.base != NULL_RTX)
16073 parts.index = parts.base;
16074 parts.scale = 1;
16076 parts.base = XVECEXP (addr, 0, 0);
16077 addr = XVECEXP (addr, 0, 0);
16079 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
16081 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16082 gcc_assert (parts.index == NULL_RTX);
16083 parts.index = XVECEXP (addr, 0, 1);
16084 addr = XVECEXP (addr, 0, 0);
16086 else
16087 ok = ix86_decompose_address (addr, &parts);
16089 gcc_assert (ok);
16091 base = parts.base;
16092 index = parts.index;
16093 disp = parts.disp;
16094 scale = parts.scale;
16096 switch (parts.seg)
16098 case SEG_DEFAULT:
16099 break;
16100 case SEG_FS:
16101 case SEG_GS:
16102 if (ASSEMBLER_DIALECT == ASM_ATT)
16103 putc ('%', file);
16104 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
16105 break;
16106 default:
16107 gcc_unreachable ();
16110 /* Use one byte shorter RIP relative addressing for 64bit mode. */
16111 if (TARGET_64BIT && !base && !index)
16113 rtx symbol = disp;
16115 if (GET_CODE (disp) == CONST
16116 && GET_CODE (XEXP (disp, 0)) == PLUS
16117 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16118 symbol = XEXP (XEXP (disp, 0), 0);
16120 if (GET_CODE (symbol) == LABEL_REF
16121 || (GET_CODE (symbol) == SYMBOL_REF
16122 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
16123 base = pc_rtx;
16125 if (!base && !index)
16127 /* Displacement only requires special attention. */
16129 if (CONST_INT_P (disp))
16131 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
16132 fputs ("ds:", file);
16133 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
16135 else if (flag_pic)
16136 output_pic_addr_const (file, disp, 0);
16137 else
16138 output_addr_const (file, disp);
16140 else
16142 /* Print SImode register names to force addr32 prefix. */
16143 if (SImode_address_operand (addr, VOIDmode))
16145 #ifdef ENABLE_CHECKING
16146 gcc_assert (TARGET_64BIT);
16147 switch (GET_CODE (addr))
16149 case SUBREG:
16150 gcc_assert (GET_MODE (addr) == SImode);
16151 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
16152 break;
16153 case ZERO_EXTEND:
16154 case AND:
16155 gcc_assert (GET_MODE (addr) == DImode);
16156 break;
16157 default:
16158 gcc_unreachable ();
16160 #endif
16161 gcc_assert (!code);
16162 code = 'k';
16164 else if (code == 0
16165 && TARGET_X32
16166 && disp
16167 && CONST_INT_P (disp)
16168 && INTVAL (disp) < -16*1024*1024)
16170 /* X32 runs in 64-bit mode, where displacement, DISP, in
16171 address DISP(%r64), is encoded as 32-bit immediate sign-
16172 extended from 32-bit to 64-bit. For -0x40000300(%r64),
16173 address is %r64 + 0xffffffffbffffd00. When %r64 <
16174 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
16175 which is invalid for x32. The correct address is %r64
16176 - 0x40000300 == 0xf7ffdd64. To properly encode
16177 -0x40000300(%r64) for x32, we zero-extend negative
16178 displacement by forcing addr32 prefix which truncates
16179 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
16180 zero-extend all negative displacements, including -1(%rsp).
16181 However, for small negative displacements, sign-extension
16182 won't cause overflow. We only zero-extend negative
16183 displacements if they < -16*1024*1024, which is also used
16184 to check legitimate address displacements for PIC. */
16185 code = 'k';
16188 if (ASSEMBLER_DIALECT == ASM_ATT)
16190 if (disp)
16192 if (flag_pic)
16193 output_pic_addr_const (file, disp, 0);
16194 else if (GET_CODE (disp) == LABEL_REF)
16195 output_asm_label (disp);
16196 else
16197 output_addr_const (file, disp);
16200 putc ('(', file);
16201 if (base)
16202 print_reg (base, code, file);
16203 if (index)
16205 putc (',', file);
16206 print_reg (index, vsib ? 0 : code, file);
16207 if (scale != 1 || vsib)
16208 fprintf (file, ",%d", scale);
16210 putc (')', file);
16212 else
16214 rtx offset = NULL_RTX;
16216 if (disp)
16218 /* Pull out the offset of a symbol; print any symbol itself. */
16219 if (GET_CODE (disp) == CONST
16220 && GET_CODE (XEXP (disp, 0)) == PLUS
16221 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16223 offset = XEXP (XEXP (disp, 0), 1);
16224 disp = gen_rtx_CONST (VOIDmode,
16225 XEXP (XEXP (disp, 0), 0));
16228 if (flag_pic)
16229 output_pic_addr_const (file, disp, 0);
16230 else if (GET_CODE (disp) == LABEL_REF)
16231 output_asm_label (disp);
16232 else if (CONST_INT_P (disp))
16233 offset = disp;
16234 else
16235 output_addr_const (file, disp);
16238 putc ('[', file);
16239 if (base)
16241 print_reg (base, code, file);
16242 if (offset)
16244 if (INTVAL (offset) >= 0)
16245 putc ('+', file);
16246 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16249 else if (offset)
16250 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16251 else
16252 putc ('0', file);
16254 if (index)
16256 putc ('+', file);
16257 print_reg (index, vsib ? 0 : code, file);
16258 if (scale != 1 || vsib)
16259 fprintf (file, "*%d", scale);
16261 putc (']', file);
16266 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
16268 static bool
16269 i386_asm_output_addr_const_extra (FILE *file, rtx x)
16271 rtx op;
16273 if (GET_CODE (x) != UNSPEC)
16274 return false;
16276 op = XVECEXP (x, 0, 0);
16277 switch (XINT (x, 1))
16279 case UNSPEC_GOTTPOFF:
16280 output_addr_const (file, op);
16281 /* FIXME: This might be @TPOFF in Sun ld. */
16282 fputs ("@gottpoff", file);
16283 break;
16284 case UNSPEC_TPOFF:
16285 output_addr_const (file, op);
16286 fputs ("@tpoff", file);
16287 break;
16288 case UNSPEC_NTPOFF:
16289 output_addr_const (file, op);
16290 if (TARGET_64BIT)
16291 fputs ("@tpoff", file);
16292 else
16293 fputs ("@ntpoff", file);
16294 break;
16295 case UNSPEC_DTPOFF:
16296 output_addr_const (file, op);
16297 fputs ("@dtpoff", file);
16298 break;
16299 case UNSPEC_GOTNTPOFF:
16300 output_addr_const (file, op);
16301 if (TARGET_64BIT)
16302 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16303 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
16304 else
16305 fputs ("@gotntpoff", file);
16306 break;
16307 case UNSPEC_INDNTPOFF:
16308 output_addr_const (file, op);
16309 fputs ("@indntpoff", file);
16310 break;
16311 #if TARGET_MACHO
16312 case UNSPEC_MACHOPIC_OFFSET:
16313 output_addr_const (file, op);
16314 putc ('-', file);
16315 machopic_output_function_base_name (file);
16316 break;
16317 #endif
16319 case UNSPEC_STACK_CHECK:
16321 int offset;
16323 gcc_assert (flag_split_stack);
16325 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
16326 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
16327 #else
16328 gcc_unreachable ();
16329 #endif
16331 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
16333 break;
16335 default:
16336 return false;
16339 return true;
16342 /* Split one or more double-mode RTL references into pairs of half-mode
16343 references. The RTL can be REG, offsettable MEM, integer constant, or
16344 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
16345 split and "num" is its length. lo_half and hi_half are output arrays
16346 that parallel "operands". */
16348 void
16349 split_double_mode (machine_mode mode, rtx operands[],
16350 int num, rtx lo_half[], rtx hi_half[])
16352 machine_mode half_mode;
16353 unsigned int byte;
16355 switch (mode)
16357 case TImode:
16358 half_mode = DImode;
16359 break;
16360 case DImode:
16361 half_mode = SImode;
16362 break;
16363 default:
16364 gcc_unreachable ();
16367 byte = GET_MODE_SIZE (half_mode);
16369 while (num--)
16371 rtx op = operands[num];
16373 /* simplify_subreg refuse to split volatile memory addresses,
16374 but we still have to handle it. */
16375 if (MEM_P (op))
16377 lo_half[num] = adjust_address (op, half_mode, 0);
16378 hi_half[num] = adjust_address (op, half_mode, byte);
16380 else
16382 lo_half[num] = simplify_gen_subreg (half_mode, op,
16383 GET_MODE (op) == VOIDmode
16384 ? mode : GET_MODE (op), 0);
16385 hi_half[num] = simplify_gen_subreg (half_mode, op,
16386 GET_MODE (op) == VOIDmode
16387 ? mode : GET_MODE (op), byte);
16392 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
16393 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
16394 is the expression of the binary operation. The output may either be
16395 emitted here, or returned to the caller, like all output_* functions.
16397 There is no guarantee that the operands are the same mode, as they
16398 might be within FLOAT or FLOAT_EXTEND expressions. */
16400 #ifndef SYSV386_COMPAT
16401 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
16402 wants to fix the assemblers because that causes incompatibility
16403 with gcc. No-one wants to fix gcc because that causes
16404 incompatibility with assemblers... You can use the option of
16405 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
16406 #define SYSV386_COMPAT 1
16407 #endif
16409 const char *
16410 output_387_binary_op (rtx insn, rtx *operands)
16412 static char buf[40];
16413 const char *p;
16414 const char *ssep;
16415 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16417 #ifdef ENABLE_CHECKING
16418 /* Even if we do not want to check the inputs, this documents input
16419 constraints. Which helps in understanding the following code. */
16420 if (STACK_REG_P (operands[0])
16421 && ((REG_P (operands[1])
16422 && REGNO (operands[0]) == REGNO (operands[1])
16423 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16424 || (REG_P (operands[2])
16425 && REGNO (operands[0]) == REGNO (operands[2])
16426 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16427 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16428 ; /* ok */
16429 else
16430 gcc_assert (is_sse);
16431 #endif
16433 switch (GET_CODE (operands[3]))
16435 case PLUS:
16436 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16437 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16438 p = "fiadd";
16439 else
16440 p = "fadd";
16441 ssep = "vadd";
16442 break;
16444 case MINUS:
16445 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16446 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16447 p = "fisub";
16448 else
16449 p = "fsub";
16450 ssep = "vsub";
16451 break;
16453 case MULT:
16454 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16455 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16456 p = "fimul";
16457 else
16458 p = "fmul";
16459 ssep = "vmul";
16460 break;
16462 case DIV:
16463 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16464 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16465 p = "fidiv";
16466 else
16467 p = "fdiv";
16468 ssep = "vdiv";
16469 break;
16471 default:
16472 gcc_unreachable ();
16475 if (is_sse)
16477 if (TARGET_AVX)
16479 strcpy (buf, ssep);
16480 if (GET_MODE (operands[0]) == SFmode)
16481 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16482 else
16483 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16485 else
16487 strcpy (buf, ssep + 1);
16488 if (GET_MODE (operands[0]) == SFmode)
16489 strcat (buf, "ss\t{%2, %0|%0, %2}");
16490 else
16491 strcat (buf, "sd\t{%2, %0|%0, %2}");
16493 return buf;
16495 strcpy (buf, p);
16497 switch (GET_CODE (operands[3]))
16499 case MULT:
16500 case PLUS:
16501 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16502 std::swap (operands[1], operands[2]);
16504 /* know operands[0] == operands[1]. */
16506 if (MEM_P (operands[2]))
16508 p = "%Z2\t%2";
16509 break;
16512 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16514 if (STACK_TOP_P (operands[0]))
16515 /* How is it that we are storing to a dead operand[2]?
16516 Well, presumably operands[1] is dead too. We can't
16517 store the result to st(0) as st(0) gets popped on this
16518 instruction. Instead store to operands[2] (which I
16519 think has to be st(1)). st(1) will be popped later.
16520 gcc <= 2.8.1 didn't have this check and generated
16521 assembly code that the Unixware assembler rejected. */
16522 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16523 else
16524 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16525 break;
16528 if (STACK_TOP_P (operands[0]))
16529 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16530 else
16531 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16532 break;
16534 case MINUS:
16535 case DIV:
16536 if (MEM_P (operands[1]))
16538 p = "r%Z1\t%1";
16539 break;
16542 if (MEM_P (operands[2]))
16544 p = "%Z2\t%2";
16545 break;
16548 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16550 #if SYSV386_COMPAT
16551 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16552 derived assemblers, confusingly reverse the direction of
16553 the operation for fsub{r} and fdiv{r} when the
16554 destination register is not st(0). The Intel assembler
16555 doesn't have this brain damage. Read !SYSV386_COMPAT to
16556 figure out what the hardware really does. */
16557 if (STACK_TOP_P (operands[0]))
16558 p = "{p\t%0, %2|rp\t%2, %0}";
16559 else
16560 p = "{rp\t%2, %0|p\t%0, %2}";
16561 #else
16562 if (STACK_TOP_P (operands[0]))
16563 /* As above for fmul/fadd, we can't store to st(0). */
16564 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16565 else
16566 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16567 #endif
16568 break;
16571 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16573 #if SYSV386_COMPAT
16574 if (STACK_TOP_P (operands[0]))
16575 p = "{rp\t%0, %1|p\t%1, %0}";
16576 else
16577 p = "{p\t%1, %0|rp\t%0, %1}";
16578 #else
16579 if (STACK_TOP_P (operands[0]))
16580 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16581 else
16582 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16583 #endif
16584 break;
16587 if (STACK_TOP_P (operands[0]))
16589 if (STACK_TOP_P (operands[1]))
16590 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16591 else
16592 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16593 break;
16595 else if (STACK_TOP_P (operands[1]))
16597 #if SYSV386_COMPAT
16598 p = "{\t%1, %0|r\t%0, %1}";
16599 #else
16600 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16601 #endif
16603 else
16605 #if SYSV386_COMPAT
16606 p = "{r\t%2, %0|\t%0, %2}";
16607 #else
16608 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16609 #endif
16611 break;
16613 default:
16614 gcc_unreachable ();
16617 strcat (buf, p);
16618 return buf;
16621 /* Check if a 256bit AVX register is referenced inside of EXP. */
16623 static bool
16624 ix86_check_avx256_register (const_rtx exp)
16626 if (GET_CODE (exp) == SUBREG)
16627 exp = SUBREG_REG (exp);
16629 return (REG_P (exp)
16630 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16633 /* Return needed mode for entity in optimize_mode_switching pass. */
16635 static int
16636 ix86_avx_u128_mode_needed (rtx_insn *insn)
16638 if (CALL_P (insn))
16640 rtx link;
16642 /* Needed mode is set to AVX_U128_CLEAN if there are
16643 no 256bit modes used in function arguments. */
16644 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16645 link;
16646 link = XEXP (link, 1))
16648 if (GET_CODE (XEXP (link, 0)) == USE)
16650 rtx arg = XEXP (XEXP (link, 0), 0);
16652 if (ix86_check_avx256_register (arg))
16653 return AVX_U128_DIRTY;
16657 return AVX_U128_CLEAN;
16660 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16661 changes state only when a 256bit register is written to, but we need
16662 to prevent the compiler from moving optimal insertion point above
16663 eventual read from 256bit register. */
16664 subrtx_iterator::array_type array;
16665 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16666 if (ix86_check_avx256_register (*iter))
16667 return AVX_U128_DIRTY;
16669 return AVX_U128_ANY;
16672 /* Return mode that i387 must be switched into
16673 prior to the execution of insn. */
16675 static int
16676 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16678 enum attr_i387_cw mode;
16680 /* The mode UNINITIALIZED is used to store control word after a
16681 function call or ASM pattern. The mode ANY specify that function
16682 has no requirements on the control word and make no changes in the
16683 bits we are interested in. */
16685 if (CALL_P (insn)
16686 || (NONJUMP_INSN_P (insn)
16687 && (asm_noperands (PATTERN (insn)) >= 0
16688 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16689 return I387_CW_UNINITIALIZED;
16691 if (recog_memoized (insn) < 0)
16692 return I387_CW_ANY;
16694 mode = get_attr_i387_cw (insn);
16696 switch (entity)
16698 case I387_TRUNC:
16699 if (mode == I387_CW_TRUNC)
16700 return mode;
16701 break;
16703 case I387_FLOOR:
16704 if (mode == I387_CW_FLOOR)
16705 return mode;
16706 break;
16708 case I387_CEIL:
16709 if (mode == I387_CW_CEIL)
16710 return mode;
16711 break;
16713 case I387_MASK_PM:
16714 if (mode == I387_CW_MASK_PM)
16715 return mode;
16716 break;
16718 default:
16719 gcc_unreachable ();
16722 return I387_CW_ANY;
16725 /* Return mode that entity must be switched into
16726 prior to the execution of insn. */
16728 static int
16729 ix86_mode_needed (int entity, rtx_insn *insn)
16731 switch (entity)
16733 case AVX_U128:
16734 return ix86_avx_u128_mode_needed (insn);
16735 case I387_TRUNC:
16736 case I387_FLOOR:
16737 case I387_CEIL:
16738 case I387_MASK_PM:
16739 return ix86_i387_mode_needed (entity, insn);
16740 default:
16741 gcc_unreachable ();
16743 return 0;
16746 /* Check if a 256bit AVX register is referenced in stores. */
16748 static void
16749 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16751 if (ix86_check_avx256_register (dest))
16753 bool *used = (bool *) data;
16754 *used = true;
16758 /* Calculate mode of upper 128bit AVX registers after the insn. */
16760 static int
16761 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16763 rtx pat = PATTERN (insn);
16765 if (vzeroupper_operation (pat, VOIDmode)
16766 || vzeroall_operation (pat, VOIDmode))
16767 return AVX_U128_CLEAN;
16769 /* We know that state is clean after CALL insn if there are no
16770 256bit registers used in the function return register. */
16771 if (CALL_P (insn))
16773 bool avx_reg256_found = false;
16774 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16776 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16779 /* Otherwise, return current mode. Remember that if insn
16780 references AVX 256bit registers, the mode was already changed
16781 to DIRTY from MODE_NEEDED. */
16782 return mode;
16785 /* Return the mode that an insn results in. */
16787 static int
16788 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16790 switch (entity)
16792 case AVX_U128:
16793 return ix86_avx_u128_mode_after (mode, insn);
16794 case I387_TRUNC:
16795 case I387_FLOOR:
16796 case I387_CEIL:
16797 case I387_MASK_PM:
16798 return mode;
16799 default:
16800 gcc_unreachable ();
16804 static int
16805 ix86_avx_u128_mode_entry (void)
16807 tree arg;
16809 /* Entry mode is set to AVX_U128_DIRTY if there are
16810 256bit modes used in function arguments. */
16811 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16812 arg = TREE_CHAIN (arg))
16814 rtx incoming = DECL_INCOMING_RTL (arg);
16816 if (incoming && ix86_check_avx256_register (incoming))
16817 return AVX_U128_DIRTY;
16820 return AVX_U128_CLEAN;
16823 /* Return a mode that ENTITY is assumed to be
16824 switched to at function entry. */
16826 static int
16827 ix86_mode_entry (int entity)
16829 switch (entity)
16831 case AVX_U128:
16832 return ix86_avx_u128_mode_entry ();
16833 case I387_TRUNC:
16834 case I387_FLOOR:
16835 case I387_CEIL:
16836 case I387_MASK_PM:
16837 return I387_CW_ANY;
16838 default:
16839 gcc_unreachable ();
16843 static int
16844 ix86_avx_u128_mode_exit (void)
16846 rtx reg = crtl->return_rtx;
16848 /* Exit mode is set to AVX_U128_DIRTY if there are
16849 256bit modes used in the function return register. */
16850 if (reg && ix86_check_avx256_register (reg))
16851 return AVX_U128_DIRTY;
16853 return AVX_U128_CLEAN;
16856 /* Return a mode that ENTITY is assumed to be
16857 switched to at function exit. */
16859 static int
16860 ix86_mode_exit (int entity)
16862 switch (entity)
16864 case AVX_U128:
16865 return ix86_avx_u128_mode_exit ();
16866 case I387_TRUNC:
16867 case I387_FLOOR:
16868 case I387_CEIL:
16869 case I387_MASK_PM:
16870 return I387_CW_ANY;
16871 default:
16872 gcc_unreachable ();
16876 static int
16877 ix86_mode_priority (int, int n)
16879 return n;
16882 /* Output code to initialize control word copies used by trunc?f?i and
16883 rounding patterns. CURRENT_MODE is set to current control word,
16884 while NEW_MODE is set to new control word. */
16886 static void
16887 emit_i387_cw_initialization (int mode)
16889 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16890 rtx new_mode;
16892 enum ix86_stack_slot slot;
16894 rtx reg = gen_reg_rtx (HImode);
16896 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16897 emit_move_insn (reg, copy_rtx (stored_mode));
16899 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16900 || optimize_insn_for_size_p ())
16902 switch (mode)
16904 case I387_CW_TRUNC:
16905 /* round toward zero (truncate) */
16906 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16907 slot = SLOT_CW_TRUNC;
16908 break;
16910 case I387_CW_FLOOR:
16911 /* round down toward -oo */
16912 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16913 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16914 slot = SLOT_CW_FLOOR;
16915 break;
16917 case I387_CW_CEIL:
16918 /* round up toward +oo */
16919 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16920 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16921 slot = SLOT_CW_CEIL;
16922 break;
16924 case I387_CW_MASK_PM:
16925 /* mask precision exception for nearbyint() */
16926 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16927 slot = SLOT_CW_MASK_PM;
16928 break;
16930 default:
16931 gcc_unreachable ();
16934 else
16936 switch (mode)
16938 case I387_CW_TRUNC:
16939 /* round toward zero (truncate) */
16940 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16941 slot = SLOT_CW_TRUNC;
16942 break;
16944 case I387_CW_FLOOR:
16945 /* round down toward -oo */
16946 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16947 slot = SLOT_CW_FLOOR;
16948 break;
16950 case I387_CW_CEIL:
16951 /* round up toward +oo */
16952 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16953 slot = SLOT_CW_CEIL;
16954 break;
16956 case I387_CW_MASK_PM:
16957 /* mask precision exception for nearbyint() */
16958 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16959 slot = SLOT_CW_MASK_PM;
16960 break;
16962 default:
16963 gcc_unreachable ();
16967 gcc_assert (slot < MAX_386_STACK_LOCALS);
16969 new_mode = assign_386_stack_local (HImode, slot);
16970 emit_move_insn (new_mode, reg);
16973 /* Emit vzeroupper. */
16975 void
16976 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16978 int i;
16980 /* Cancel automatic vzeroupper insertion if there are
16981 live call-saved SSE registers at the insertion point. */
16983 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16984 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16985 return;
16987 if (TARGET_64BIT)
16988 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16989 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16990 return;
16992 emit_insn (gen_avx_vzeroupper ());
16995 /* Generate one or more insns to set ENTITY to MODE. */
16997 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16998 is the set of hard registers live at the point where the insn(s)
16999 are to be inserted. */
17001 static void
17002 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
17003 HARD_REG_SET regs_live)
17005 switch (entity)
17007 case AVX_U128:
17008 if (mode == AVX_U128_CLEAN)
17009 ix86_avx_emit_vzeroupper (regs_live);
17010 break;
17011 case I387_TRUNC:
17012 case I387_FLOOR:
17013 case I387_CEIL:
17014 case I387_MASK_PM:
17015 if (mode != I387_CW_ANY
17016 && mode != I387_CW_UNINITIALIZED)
17017 emit_i387_cw_initialization (mode);
17018 break;
17019 default:
17020 gcc_unreachable ();
17024 /* Output code for INSN to convert a float to a signed int. OPERANDS
17025 are the insn operands. The output may be [HSD]Imode and the input
17026 operand may be [SDX]Fmode. */
17028 const char *
17029 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
17031 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17032 int dimode_p = GET_MODE (operands[0]) == DImode;
17033 int round_mode = get_attr_i387_cw (insn);
17035 /* Jump through a hoop or two for DImode, since the hardware has no
17036 non-popping instruction. We used to do this a different way, but
17037 that was somewhat fragile and broke with post-reload splitters. */
17038 if ((dimode_p || fisttp) && !stack_top_dies)
17039 output_asm_insn ("fld\t%y1", operands);
17041 gcc_assert (STACK_TOP_P (operands[1]));
17042 gcc_assert (MEM_P (operands[0]));
17043 gcc_assert (GET_MODE (operands[1]) != TFmode);
17045 if (fisttp)
17046 output_asm_insn ("fisttp%Z0\t%0", operands);
17047 else
17049 if (round_mode != I387_CW_ANY)
17050 output_asm_insn ("fldcw\t%3", operands);
17051 if (stack_top_dies || dimode_p)
17052 output_asm_insn ("fistp%Z0\t%0", operands);
17053 else
17054 output_asm_insn ("fist%Z0\t%0", operands);
17055 if (round_mode != I387_CW_ANY)
17056 output_asm_insn ("fldcw\t%2", operands);
17059 return "";
17062 /* Output code for x87 ffreep insn. The OPNO argument, which may only
17063 have the values zero or one, indicates the ffreep insn's operand
17064 from the OPERANDS array. */
17066 static const char *
17067 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
17069 if (TARGET_USE_FFREEP)
17070 #ifdef HAVE_AS_IX86_FFREEP
17071 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
17072 #else
17074 static char retval[32];
17075 int regno = REGNO (operands[opno]);
17077 gcc_assert (STACK_REGNO_P (regno));
17079 regno -= FIRST_STACK_REG;
17081 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
17082 return retval;
17084 #endif
17086 return opno ? "fstp\t%y1" : "fstp\t%y0";
17090 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
17091 should be used. UNORDERED_P is true when fucom should be used. */
17093 const char *
17094 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
17096 int stack_top_dies;
17097 rtx cmp_op0, cmp_op1;
17098 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
17100 if (eflags_p)
17102 cmp_op0 = operands[0];
17103 cmp_op1 = operands[1];
17105 else
17107 cmp_op0 = operands[1];
17108 cmp_op1 = operands[2];
17111 if (is_sse)
17113 if (GET_MODE (operands[0]) == SFmode)
17114 if (unordered_p)
17115 return "%vucomiss\t{%1, %0|%0, %1}";
17116 else
17117 return "%vcomiss\t{%1, %0|%0, %1}";
17118 else
17119 if (unordered_p)
17120 return "%vucomisd\t{%1, %0|%0, %1}";
17121 else
17122 return "%vcomisd\t{%1, %0|%0, %1}";
17125 gcc_assert (STACK_TOP_P (cmp_op0));
17127 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17129 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
17131 if (stack_top_dies)
17133 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
17134 return output_387_ffreep (operands, 1);
17136 else
17137 return "ftst\n\tfnstsw\t%0";
17140 if (STACK_REG_P (cmp_op1)
17141 && stack_top_dies
17142 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
17143 && REGNO (cmp_op1) != FIRST_STACK_REG)
17145 /* If both the top of the 387 stack dies, and the other operand
17146 is also a stack register that dies, then this must be a
17147 `fcompp' float compare */
17149 if (eflags_p)
17151 /* There is no double popping fcomi variant. Fortunately,
17152 eflags is immune from the fstp's cc clobbering. */
17153 if (unordered_p)
17154 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
17155 else
17156 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
17157 return output_387_ffreep (operands, 0);
17159 else
17161 if (unordered_p)
17162 return "fucompp\n\tfnstsw\t%0";
17163 else
17164 return "fcompp\n\tfnstsw\t%0";
17167 else
17169 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
17171 static const char * const alt[16] =
17173 "fcom%Z2\t%y2\n\tfnstsw\t%0",
17174 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
17175 "fucom%Z2\t%y2\n\tfnstsw\t%0",
17176 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
17178 "ficom%Z2\t%y2\n\tfnstsw\t%0",
17179 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
17180 NULL,
17181 NULL,
17183 "fcomi\t{%y1, %0|%0, %y1}",
17184 "fcomip\t{%y1, %0|%0, %y1}",
17185 "fucomi\t{%y1, %0|%0, %y1}",
17186 "fucomip\t{%y1, %0|%0, %y1}",
17188 NULL,
17189 NULL,
17190 NULL,
17191 NULL
17194 int mask;
17195 const char *ret;
17197 mask = eflags_p << 3;
17198 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
17199 mask |= unordered_p << 1;
17200 mask |= stack_top_dies;
17202 gcc_assert (mask < 16);
17203 ret = alt[mask];
17204 gcc_assert (ret);
17206 return ret;
17210 void
17211 ix86_output_addr_vec_elt (FILE *file, int value)
17213 const char *directive = ASM_LONG;
17215 #ifdef ASM_QUAD
17216 if (TARGET_LP64)
17217 directive = ASM_QUAD;
17218 #else
17219 gcc_assert (!TARGET_64BIT);
17220 #endif
17222 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
17225 void
17226 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
17228 const char *directive = ASM_LONG;
17230 #ifdef ASM_QUAD
17231 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
17232 directive = ASM_QUAD;
17233 #else
17234 gcc_assert (!TARGET_64BIT);
17235 #endif
17236 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
17237 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
17238 fprintf (file, "%s%s%d-%s%d\n",
17239 directive, LPREFIX, value, LPREFIX, rel);
17240 else if (HAVE_AS_GOTOFF_IN_DATA)
17241 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
17242 #if TARGET_MACHO
17243 else if (TARGET_MACHO)
17245 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
17246 machopic_output_function_base_name (file);
17247 putc ('\n', file);
17249 #endif
17250 else
17251 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
17252 GOT_SYMBOL_NAME, LPREFIX, value);
17255 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
17256 for the target. */
17258 void
17259 ix86_expand_clear (rtx dest)
17261 rtx tmp;
17263 /* We play register width games, which are only valid after reload. */
17264 gcc_assert (reload_completed);
17266 /* Avoid HImode and its attendant prefix byte. */
17267 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
17268 dest = gen_rtx_REG (SImode, REGNO (dest));
17269 tmp = gen_rtx_SET (dest, const0_rtx);
17271 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
17273 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17274 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
17277 emit_insn (tmp);
17280 /* X is an unchanging MEM. If it is a constant pool reference, return
17281 the constant pool rtx, else NULL. */
17284 maybe_get_pool_constant (rtx x)
17286 x = ix86_delegitimize_address (XEXP (x, 0));
17288 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
17289 return get_pool_constant (x);
17291 return NULL_RTX;
17294 void
17295 ix86_expand_move (machine_mode mode, rtx operands[])
17297 rtx op0, op1;
17298 enum tls_model model;
17300 op0 = operands[0];
17301 op1 = operands[1];
17303 if (GET_CODE (op1) == SYMBOL_REF)
17305 rtx tmp;
17307 model = SYMBOL_REF_TLS_MODEL (op1);
17308 if (model)
17310 op1 = legitimize_tls_address (op1, model, true);
17311 op1 = force_operand (op1, op0);
17312 if (op1 == op0)
17313 return;
17314 op1 = convert_to_mode (mode, op1, 1);
17316 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
17317 op1 = tmp;
17319 else if (GET_CODE (op1) == CONST
17320 && GET_CODE (XEXP (op1, 0)) == PLUS
17321 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
17323 rtx addend = XEXP (XEXP (op1, 0), 1);
17324 rtx symbol = XEXP (XEXP (op1, 0), 0);
17325 rtx tmp;
17327 model = SYMBOL_REF_TLS_MODEL (symbol);
17328 if (model)
17329 tmp = legitimize_tls_address (symbol, model, true);
17330 else
17331 tmp = legitimize_pe_coff_symbol (symbol, true);
17333 if (tmp)
17335 tmp = force_operand (tmp, NULL);
17336 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
17337 op0, 1, OPTAB_DIRECT);
17338 if (tmp == op0)
17339 return;
17340 op1 = convert_to_mode (mode, tmp, 1);
17344 if ((flag_pic || MACHOPIC_INDIRECT)
17345 && symbolic_operand (op1, mode))
17347 if (TARGET_MACHO && !TARGET_64BIT)
17349 #if TARGET_MACHO
17350 /* dynamic-no-pic */
17351 if (MACHOPIC_INDIRECT)
17353 rtx temp = (op0 && REG_P (op0) && mode == Pmode)
17354 ? op0 : gen_reg_rtx (Pmode);
17355 op1 = machopic_indirect_data_reference (op1, temp);
17356 if (MACHOPIC_PURE)
17357 op1 = machopic_legitimize_pic_address (op1, mode,
17358 temp == op1 ? 0 : temp);
17360 if (op0 != op1 && GET_CODE (op0) != MEM)
17362 rtx insn = gen_rtx_SET (op0, op1);
17363 emit_insn (insn);
17364 return;
17366 if (GET_CODE (op0) == MEM)
17367 op1 = force_reg (Pmode, op1);
17368 else
17370 rtx temp = op0;
17371 if (GET_CODE (temp) != REG)
17372 temp = gen_reg_rtx (Pmode);
17373 temp = legitimize_pic_address (op1, temp);
17374 if (temp == op0)
17375 return;
17376 op1 = temp;
17378 /* dynamic-no-pic */
17379 #endif
17381 else
17383 if (MEM_P (op0))
17384 op1 = force_reg (mode, op1);
17385 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17387 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17388 op1 = legitimize_pic_address (op1, reg);
17389 if (op0 == op1)
17390 return;
17391 op1 = convert_to_mode (mode, op1, 1);
17395 else
17397 if (MEM_P (op0)
17398 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17399 || !push_operand (op0, mode))
17400 && MEM_P (op1))
17401 op1 = force_reg (mode, op1);
17403 if (push_operand (op0, mode)
17404 && ! general_no_elim_operand (op1, mode))
17405 op1 = copy_to_mode_reg (mode, op1);
17407 /* Force large constants in 64bit compilation into register
17408 to get them CSEed. */
17409 if (can_create_pseudo_p ()
17410 && (mode == DImode) && TARGET_64BIT
17411 && immediate_operand (op1, mode)
17412 && !x86_64_zext_immediate_operand (op1, VOIDmode)
17413 && !register_operand (op0, mode)
17414 && optimize)
17415 op1 = copy_to_mode_reg (mode, op1);
17417 if (can_create_pseudo_p ()
17418 && CONST_DOUBLE_P (op1))
17420 /* If we are loading a floating point constant to a register,
17421 force the value to memory now, since we'll get better code
17422 out the back end. */
17424 op1 = validize_mem (force_const_mem (mode, op1));
17425 if (!register_operand (op0, mode))
17427 rtx temp = gen_reg_rtx (mode);
17428 emit_insn (gen_rtx_SET (temp, op1));
17429 emit_move_insn (op0, temp);
17430 return;
17435 emit_insn (gen_rtx_SET (op0, op1));
17438 void
17439 ix86_expand_vector_move (machine_mode mode, rtx operands[])
17441 rtx op0 = operands[0], op1 = operands[1];
17442 unsigned int align = GET_MODE_ALIGNMENT (mode);
17444 if (push_operand (op0, VOIDmode))
17445 op0 = emit_move_resolve_push (mode, op0);
17447 /* Force constants other than zero into memory. We do not know how
17448 the instructions used to build constants modify the upper 64 bits
17449 of the register, once we have that information we may be able
17450 to handle some of them more efficiently. */
17451 if (can_create_pseudo_p ()
17452 && register_operand (op0, mode)
17453 && (CONSTANT_P (op1)
17454 || (GET_CODE (op1) == SUBREG
17455 && CONSTANT_P (SUBREG_REG (op1))))
17456 && !standard_sse_constant_p (op1))
17457 op1 = validize_mem (force_const_mem (mode, op1));
17459 /* We need to check memory alignment for SSE mode since attribute
17460 can make operands unaligned. */
17461 if (can_create_pseudo_p ()
17462 && SSE_REG_MODE_P (mode)
17463 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17464 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17466 rtx tmp[2];
17468 /* ix86_expand_vector_move_misalign() does not like constants ... */
17469 if (CONSTANT_P (op1)
17470 || (GET_CODE (op1) == SUBREG
17471 && CONSTANT_P (SUBREG_REG (op1))))
17472 op1 = validize_mem (force_const_mem (mode, op1));
17474 /* ... nor both arguments in memory. */
17475 if (!register_operand (op0, mode)
17476 && !register_operand (op1, mode))
17477 op1 = force_reg (mode, op1);
17479 tmp[0] = op0; tmp[1] = op1;
17480 ix86_expand_vector_move_misalign (mode, tmp);
17481 return;
17484 /* Make operand1 a register if it isn't already. */
17485 if (can_create_pseudo_p ()
17486 && !register_operand (op0, mode)
17487 && !register_operand (op1, mode))
17489 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17490 return;
17493 emit_insn (gen_rtx_SET (op0, op1));
17496 /* Split 32-byte AVX unaligned load and store if needed. */
17498 static void
17499 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17501 rtx m;
17502 rtx (*extract) (rtx, rtx, rtx);
17503 rtx (*load_unaligned) (rtx, rtx);
17504 rtx (*store_unaligned) (rtx, rtx);
17505 machine_mode mode;
17507 switch (GET_MODE (op0))
17509 default:
17510 gcc_unreachable ();
17511 case V32QImode:
17512 extract = gen_avx_vextractf128v32qi;
17513 load_unaligned = gen_avx_loaddquv32qi;
17514 store_unaligned = gen_avx_storedquv32qi;
17515 mode = V16QImode;
17516 break;
17517 case V8SFmode:
17518 extract = gen_avx_vextractf128v8sf;
17519 load_unaligned = gen_avx_loadups256;
17520 store_unaligned = gen_avx_storeups256;
17521 mode = V4SFmode;
17522 break;
17523 case V4DFmode:
17524 extract = gen_avx_vextractf128v4df;
17525 load_unaligned = gen_avx_loadupd256;
17526 store_unaligned = gen_avx_storeupd256;
17527 mode = V2DFmode;
17528 break;
17531 if (MEM_P (op1))
17533 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
17534 && optimize_insn_for_speed_p ())
17536 rtx r = gen_reg_rtx (mode);
17537 m = adjust_address (op1, mode, 0);
17538 emit_move_insn (r, m);
17539 m = adjust_address (op1, mode, 16);
17540 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17541 emit_move_insn (op0, r);
17543 /* Normal *mov<mode>_internal pattern will handle
17544 unaligned loads just fine if misaligned_operand
17545 is true, and without the UNSPEC it can be combined
17546 with arithmetic instructions. */
17547 else if (misaligned_operand (op1, GET_MODE (op1)))
17548 emit_insn (gen_rtx_SET (op0, op1));
17549 else
17550 emit_insn (load_unaligned (op0, op1));
17552 else if (MEM_P (op0))
17554 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
17555 && optimize_insn_for_speed_p ())
17557 m = adjust_address (op0, mode, 0);
17558 emit_insn (extract (m, op1, const0_rtx));
17559 m = adjust_address (op0, mode, 16);
17560 emit_insn (extract (m, op1, const1_rtx));
17562 else
17563 emit_insn (store_unaligned (op0, op1));
17565 else
17566 gcc_unreachable ();
17569 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17570 straight to ix86_expand_vector_move. */
17571 /* Code generation for scalar reg-reg moves of single and double precision data:
17572 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17573 movaps reg, reg
17574 else
17575 movss reg, reg
17576 if (x86_sse_partial_reg_dependency == true)
17577 movapd reg, reg
17578 else
17579 movsd reg, reg
17581 Code generation for scalar loads of double precision data:
17582 if (x86_sse_split_regs == true)
17583 movlpd mem, reg (gas syntax)
17584 else
17585 movsd mem, reg
17587 Code generation for unaligned packed loads of single precision data
17588 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17589 if (x86_sse_unaligned_move_optimal)
17590 movups mem, reg
17592 if (x86_sse_partial_reg_dependency == true)
17594 xorps reg, reg
17595 movlps mem, reg
17596 movhps mem+8, reg
17598 else
17600 movlps mem, reg
17601 movhps mem+8, reg
17604 Code generation for unaligned packed loads of double precision data
17605 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17606 if (x86_sse_unaligned_move_optimal)
17607 movupd mem, reg
17609 if (x86_sse_split_regs == true)
17611 movlpd mem, reg
17612 movhpd mem+8, reg
17614 else
17616 movsd mem, reg
17617 movhpd mem+8, reg
17621 void
17622 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
17624 rtx op0, op1, orig_op0 = NULL_RTX, m;
17625 rtx (*load_unaligned) (rtx, rtx);
17626 rtx (*store_unaligned) (rtx, rtx);
17628 op0 = operands[0];
17629 op1 = operands[1];
17631 if (GET_MODE_SIZE (mode) == 64)
17633 switch (GET_MODE_CLASS (mode))
17635 case MODE_VECTOR_INT:
17636 case MODE_INT:
17637 if (GET_MODE (op0) != V16SImode)
17639 if (!MEM_P (op0))
17641 orig_op0 = op0;
17642 op0 = gen_reg_rtx (V16SImode);
17644 else
17645 op0 = gen_lowpart (V16SImode, op0);
17647 op1 = gen_lowpart (V16SImode, op1);
17648 /* FALLTHRU */
17650 case MODE_VECTOR_FLOAT:
17651 switch (GET_MODE (op0))
17653 default:
17654 gcc_unreachable ();
17655 case V16SImode:
17656 load_unaligned = gen_avx512f_loaddquv16si;
17657 store_unaligned = gen_avx512f_storedquv16si;
17658 break;
17659 case V16SFmode:
17660 load_unaligned = gen_avx512f_loadups512;
17661 store_unaligned = gen_avx512f_storeups512;
17662 break;
17663 case V8DFmode:
17664 load_unaligned = gen_avx512f_loadupd512;
17665 store_unaligned = gen_avx512f_storeupd512;
17666 break;
17669 if (MEM_P (op1))
17670 emit_insn (load_unaligned (op0, op1));
17671 else if (MEM_P (op0))
17672 emit_insn (store_unaligned (op0, op1));
17673 else
17674 gcc_unreachable ();
17675 if (orig_op0)
17676 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17677 break;
17679 default:
17680 gcc_unreachable ();
17683 return;
17686 if (TARGET_AVX
17687 && GET_MODE_SIZE (mode) == 32)
17689 switch (GET_MODE_CLASS (mode))
17691 case MODE_VECTOR_INT:
17692 case MODE_INT:
17693 if (GET_MODE (op0) != V32QImode)
17695 if (!MEM_P (op0))
17697 orig_op0 = op0;
17698 op0 = gen_reg_rtx (V32QImode);
17700 else
17701 op0 = gen_lowpart (V32QImode, op0);
17703 op1 = gen_lowpart (V32QImode, op1);
17704 /* FALLTHRU */
17706 case MODE_VECTOR_FLOAT:
17707 ix86_avx256_split_vector_move_misalign (op0, op1);
17708 if (orig_op0)
17709 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17710 break;
17712 default:
17713 gcc_unreachable ();
17716 return;
17719 if (MEM_P (op1))
17721 /* Normal *mov<mode>_internal pattern will handle
17722 unaligned loads just fine if misaligned_operand
17723 is true, and without the UNSPEC it can be combined
17724 with arithmetic instructions. */
17725 if (TARGET_AVX
17726 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17727 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17728 && misaligned_operand (op1, GET_MODE (op1)))
17729 emit_insn (gen_rtx_SET (op0, op1));
17730 /* ??? If we have typed data, then it would appear that using
17731 movdqu is the only way to get unaligned data loaded with
17732 integer type. */
17733 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17735 if (GET_MODE (op0) != V16QImode)
17737 orig_op0 = op0;
17738 op0 = gen_reg_rtx (V16QImode);
17740 op1 = gen_lowpart (V16QImode, op1);
17741 /* We will eventually emit movups based on insn attributes. */
17742 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17743 if (orig_op0)
17744 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17746 else if (TARGET_SSE2 && mode == V2DFmode)
17748 rtx zero;
17750 if (TARGET_AVX
17751 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17752 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17753 || optimize_insn_for_size_p ())
17755 /* We will eventually emit movups based on insn attributes. */
17756 emit_insn (gen_sse2_loadupd (op0, op1));
17757 return;
17760 /* When SSE registers are split into halves, we can avoid
17761 writing to the top half twice. */
17762 if (TARGET_SSE_SPLIT_REGS)
17764 emit_clobber (op0);
17765 zero = op0;
17767 else
17769 /* ??? Not sure about the best option for the Intel chips.
17770 The following would seem to satisfy; the register is
17771 entirely cleared, breaking the dependency chain. We
17772 then store to the upper half, with a dependency depth
17773 of one. A rumor has it that Intel recommends two movsd
17774 followed by an unpacklpd, but this is unconfirmed. And
17775 given that the dependency depth of the unpacklpd would
17776 still be one, I'm not sure why this would be better. */
17777 zero = CONST0_RTX (V2DFmode);
17780 m = adjust_address (op1, DFmode, 0);
17781 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17782 m = adjust_address (op1, DFmode, 8);
17783 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17785 else
17787 rtx t;
17789 if (TARGET_AVX
17790 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17791 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17792 || optimize_insn_for_size_p ())
17794 if (GET_MODE (op0) != V4SFmode)
17796 orig_op0 = op0;
17797 op0 = gen_reg_rtx (V4SFmode);
17799 op1 = gen_lowpart (V4SFmode, op1);
17800 emit_insn (gen_sse_loadups (op0, op1));
17801 if (orig_op0)
17802 emit_move_insn (orig_op0,
17803 gen_lowpart (GET_MODE (orig_op0), op0));
17804 return;
17807 if (mode != V4SFmode)
17808 t = gen_reg_rtx (V4SFmode);
17809 else
17810 t = op0;
17812 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17813 emit_move_insn (t, CONST0_RTX (V4SFmode));
17814 else
17815 emit_clobber (t);
17817 m = adjust_address (op1, V2SFmode, 0);
17818 emit_insn (gen_sse_loadlps (t, t, m));
17819 m = adjust_address (op1, V2SFmode, 8);
17820 emit_insn (gen_sse_loadhps (t, t, m));
17821 if (mode != V4SFmode)
17822 emit_move_insn (op0, gen_lowpart (mode, t));
17825 else if (MEM_P (op0))
17827 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17829 op0 = gen_lowpart (V16QImode, op0);
17830 op1 = gen_lowpart (V16QImode, op1);
17831 /* We will eventually emit movups based on insn attributes. */
17832 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17834 else if (TARGET_SSE2 && mode == V2DFmode)
17836 if (TARGET_AVX
17837 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17838 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17839 || optimize_insn_for_size_p ())
17840 /* We will eventually emit movups based on insn attributes. */
17841 emit_insn (gen_sse2_storeupd (op0, op1));
17842 else
17844 m = adjust_address (op0, DFmode, 0);
17845 emit_insn (gen_sse2_storelpd (m, op1));
17846 m = adjust_address (op0, DFmode, 8);
17847 emit_insn (gen_sse2_storehpd (m, op1));
17850 else
17852 if (mode != V4SFmode)
17853 op1 = gen_lowpart (V4SFmode, op1);
17855 if (TARGET_AVX
17856 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17857 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17858 || optimize_insn_for_size_p ())
17860 op0 = gen_lowpart (V4SFmode, op0);
17861 emit_insn (gen_sse_storeups (op0, op1));
17863 else
17865 m = adjust_address (op0, V2SFmode, 0);
17866 emit_insn (gen_sse_storelps (m, op1));
17867 m = adjust_address (op0, V2SFmode, 8);
17868 emit_insn (gen_sse_storehps (m, op1));
17872 else
17873 gcc_unreachable ();
17876 /* Helper function of ix86_fixup_binary_operands to canonicalize
17877 operand order. Returns true if the operands should be swapped. */
17879 static bool
17880 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
17881 rtx operands[])
17883 rtx dst = operands[0];
17884 rtx src1 = operands[1];
17885 rtx src2 = operands[2];
17887 /* If the operation is not commutative, we can't do anything. */
17888 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17889 return false;
17891 /* Highest priority is that src1 should match dst. */
17892 if (rtx_equal_p (dst, src1))
17893 return false;
17894 if (rtx_equal_p (dst, src2))
17895 return true;
17897 /* Next highest priority is that immediate constants come second. */
17898 if (immediate_operand (src2, mode))
17899 return false;
17900 if (immediate_operand (src1, mode))
17901 return true;
17903 /* Lowest priority is that memory references should come second. */
17904 if (MEM_P (src2))
17905 return false;
17906 if (MEM_P (src1))
17907 return true;
17909 return false;
17913 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17914 destination to use for the operation. If different from the true
17915 destination in operands[0], a copy operation will be required. */
17918 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
17919 rtx operands[])
17921 rtx dst = operands[0];
17922 rtx src1 = operands[1];
17923 rtx src2 = operands[2];
17925 /* Canonicalize operand order. */
17926 if (ix86_swap_binary_operands_p (code, mode, operands))
17928 /* It is invalid to swap operands of different modes. */
17929 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17931 std::swap (src1, src2);
17934 /* Both source operands cannot be in memory. */
17935 if (MEM_P (src1) && MEM_P (src2))
17937 /* Optimization: Only read from memory once. */
17938 if (rtx_equal_p (src1, src2))
17940 src2 = force_reg (mode, src2);
17941 src1 = src2;
17943 else if (rtx_equal_p (dst, src1))
17944 src2 = force_reg (mode, src2);
17945 else
17946 src1 = force_reg (mode, src1);
17949 /* If the destination is memory, and we do not have matching source
17950 operands, do things in registers. */
17951 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17952 dst = gen_reg_rtx (mode);
17954 /* Source 1 cannot be a constant. */
17955 if (CONSTANT_P (src1))
17956 src1 = force_reg (mode, src1);
17958 /* Source 1 cannot be a non-matching memory. */
17959 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17960 src1 = force_reg (mode, src1);
17962 /* Improve address combine. */
17963 if (code == PLUS
17964 && GET_MODE_CLASS (mode) == MODE_INT
17965 && MEM_P (src2))
17966 src2 = force_reg (mode, src2);
17968 operands[1] = src1;
17969 operands[2] = src2;
17970 return dst;
17973 /* Similarly, but assume that the destination has already been
17974 set up properly. */
17976 void
17977 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17978 machine_mode mode, rtx operands[])
17980 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17981 gcc_assert (dst == operands[0]);
17984 /* Attempt to expand a binary operator. Make the expansion closer to the
17985 actual machine, then just general_operand, which will allow 3 separate
17986 memory references (one output, two input) in a single insn. */
17988 void
17989 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
17990 rtx operands[])
17992 rtx src1, src2, dst, op, clob;
17994 dst = ix86_fixup_binary_operands (code, mode, operands);
17995 src1 = operands[1];
17996 src2 = operands[2];
17998 /* Emit the instruction. */
18000 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2));
18002 if (reload_completed
18003 && code == PLUS
18004 && !rtx_equal_p (dst, src1))
18006 /* This is going to be an LEA; avoid splitting it later. */
18007 emit_insn (op);
18009 else
18011 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18012 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18015 /* Fix up the destination if needed. */
18016 if (dst != operands[0])
18017 emit_move_insn (operands[0], dst);
18020 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
18021 the given OPERANDS. */
18023 void
18024 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
18025 rtx operands[])
18027 rtx op1 = NULL_RTX, op2 = NULL_RTX;
18028 if (GET_CODE (operands[1]) == SUBREG)
18030 op1 = operands[1];
18031 op2 = operands[2];
18033 else if (GET_CODE (operands[2]) == SUBREG)
18035 op1 = operands[2];
18036 op2 = operands[1];
18038 /* Optimize (__m128i) d | (__m128i) e and similar code
18039 when d and e are float vectors into float vector logical
18040 insn. In C/C++ without using intrinsics there is no other way
18041 to express vector logical operation on float vectors than
18042 to cast them temporarily to integer vectors. */
18043 if (op1
18044 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
18045 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
18046 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
18047 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
18048 && SUBREG_BYTE (op1) == 0
18049 && (GET_CODE (op2) == CONST_VECTOR
18050 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
18051 && SUBREG_BYTE (op2) == 0))
18052 && can_create_pseudo_p ())
18054 rtx dst;
18055 switch (GET_MODE (SUBREG_REG (op1)))
18057 case V4SFmode:
18058 case V8SFmode:
18059 case V16SFmode:
18060 case V2DFmode:
18061 case V4DFmode:
18062 case V8DFmode:
18063 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
18064 if (GET_CODE (op2) == CONST_VECTOR)
18066 op2 = gen_lowpart (GET_MODE (dst), op2);
18067 op2 = force_reg (GET_MODE (dst), op2);
18069 else
18071 op1 = operands[1];
18072 op2 = SUBREG_REG (operands[2]);
18073 if (!nonimmediate_operand (op2, GET_MODE (dst)))
18074 op2 = force_reg (GET_MODE (dst), op2);
18076 op1 = SUBREG_REG (op1);
18077 if (!nonimmediate_operand (op1, GET_MODE (dst)))
18078 op1 = force_reg (GET_MODE (dst), op1);
18079 emit_insn (gen_rtx_SET (dst,
18080 gen_rtx_fmt_ee (code, GET_MODE (dst),
18081 op1, op2)));
18082 emit_move_insn (operands[0], gen_lowpart (mode, dst));
18083 return;
18084 default:
18085 break;
18088 if (!nonimmediate_operand (operands[1], mode))
18089 operands[1] = force_reg (mode, operands[1]);
18090 if (!nonimmediate_operand (operands[2], mode))
18091 operands[2] = force_reg (mode, operands[2]);
18092 ix86_fixup_binary_operands_no_copy (code, mode, operands);
18093 emit_insn (gen_rtx_SET (operands[0],
18094 gen_rtx_fmt_ee (code, mode, operands[1],
18095 operands[2])));
18098 /* Return TRUE or FALSE depending on whether the binary operator meets the
18099 appropriate constraints. */
18101 bool
18102 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
18103 rtx operands[3])
18105 rtx dst = operands[0];
18106 rtx src1 = operands[1];
18107 rtx src2 = operands[2];
18109 /* Both source operands cannot be in memory. */
18110 if (MEM_P (src1) && MEM_P (src2))
18111 return false;
18113 /* Canonicalize operand order for commutative operators. */
18114 if (ix86_swap_binary_operands_p (code, mode, operands))
18115 std::swap (src1, src2);
18117 /* If the destination is memory, we must have a matching source operand. */
18118 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18119 return false;
18121 /* Source 1 cannot be a constant. */
18122 if (CONSTANT_P (src1))
18123 return false;
18125 /* Source 1 cannot be a non-matching memory. */
18126 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18127 /* Support "andhi/andsi/anddi" as a zero-extending move. */
18128 return (code == AND
18129 && (mode == HImode
18130 || mode == SImode
18131 || (TARGET_64BIT && mode == DImode))
18132 && satisfies_constraint_L (src2));
18134 return true;
18137 /* Attempt to expand a unary operator. Make the expansion closer to the
18138 actual machine, then just general_operand, which will allow 2 separate
18139 memory references (one output, one input) in a single insn. */
18141 void
18142 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
18143 rtx operands[])
18145 bool matching_memory = false;
18146 rtx src, dst, op, clob;
18148 dst = operands[0];
18149 src = operands[1];
18151 /* If the destination is memory, and we do not have matching source
18152 operands, do things in registers. */
18153 if (MEM_P (dst))
18155 if (rtx_equal_p (dst, src))
18156 matching_memory = true;
18157 else
18158 dst = gen_reg_rtx (mode);
18161 /* When source operand is memory, destination must match. */
18162 if (MEM_P (src) && !matching_memory)
18163 src = force_reg (mode, src);
18165 /* Emit the instruction. */
18167 op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src));
18169 if (code == NOT)
18170 emit_insn (op);
18171 else
18173 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18174 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18177 /* Fix up the destination if needed. */
18178 if (dst != operands[0])
18179 emit_move_insn (operands[0], dst);
18182 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
18183 divisor are within the range [0-255]. */
18185 void
18186 ix86_split_idivmod (machine_mode mode, rtx operands[],
18187 bool signed_p)
18189 rtx_code_label *end_label, *qimode_label;
18190 rtx insn, div, mod;
18191 rtx scratch, tmp0, tmp1, tmp2;
18192 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
18193 rtx (*gen_zero_extend) (rtx, rtx);
18194 rtx (*gen_test_ccno_1) (rtx, rtx);
18196 switch (mode)
18198 case SImode:
18199 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
18200 gen_test_ccno_1 = gen_testsi_ccno_1;
18201 gen_zero_extend = gen_zero_extendqisi2;
18202 break;
18203 case DImode:
18204 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
18205 gen_test_ccno_1 = gen_testdi_ccno_1;
18206 gen_zero_extend = gen_zero_extendqidi2;
18207 break;
18208 default:
18209 gcc_unreachable ();
18212 end_label = gen_label_rtx ();
18213 qimode_label = gen_label_rtx ();
18215 scratch = gen_reg_rtx (mode);
18217 /* Use 8bit unsigned divimod if dividend and divisor are within
18218 the range [0-255]. */
18219 emit_move_insn (scratch, operands[2]);
18220 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
18221 scratch, 1, OPTAB_DIRECT);
18222 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
18223 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
18224 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
18225 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
18226 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
18227 pc_rtx);
18228 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0));
18229 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18230 JUMP_LABEL (insn) = qimode_label;
18232 /* Generate original signed/unsigned divimod. */
18233 div = gen_divmod4_1 (operands[0], operands[1],
18234 operands[2], operands[3]);
18235 emit_insn (div);
18237 /* Branch to the end. */
18238 emit_jump_insn (gen_jump (end_label));
18239 emit_barrier ();
18241 /* Generate 8bit unsigned divide. */
18242 emit_label (qimode_label);
18243 /* Don't use operands[0] for result of 8bit divide since not all
18244 registers support QImode ZERO_EXTRACT. */
18245 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
18246 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
18247 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
18248 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
18250 if (signed_p)
18252 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
18253 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
18255 else
18257 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
18258 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
18261 /* Extract remainder from AH. */
18262 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
18263 if (REG_P (operands[1]))
18264 insn = emit_move_insn (operands[1], tmp1);
18265 else
18267 /* Need a new scratch register since the old one has result
18268 of 8bit divide. */
18269 scratch = gen_reg_rtx (mode);
18270 emit_move_insn (scratch, tmp1);
18271 insn = emit_move_insn (operands[1], scratch);
18273 set_unique_reg_note (insn, REG_EQUAL, mod);
18275 /* Zero extend quotient from AL. */
18276 tmp1 = gen_lowpart (QImode, tmp0);
18277 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
18278 set_unique_reg_note (insn, REG_EQUAL, div);
18280 emit_label (end_label);
18283 #define LEA_MAX_STALL (3)
18284 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
18286 /* Increase given DISTANCE in half-cycles according to
18287 dependencies between PREV and NEXT instructions.
18288 Add 1 half-cycle if there is no dependency and
18289 go to next cycle if there is some dependecy. */
18291 static unsigned int
18292 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
18294 df_ref def, use;
18296 if (!prev || !next)
18297 return distance + (distance & 1) + 2;
18299 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
18300 return distance + 1;
18302 FOR_EACH_INSN_USE (use, next)
18303 FOR_EACH_INSN_DEF (def, prev)
18304 if (!DF_REF_IS_ARTIFICIAL (def)
18305 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
18306 return distance + (distance & 1) + 2;
18308 return distance + 1;
18311 /* Function checks if instruction INSN defines register number
18312 REGNO1 or REGNO2. */
18314 static bool
18315 insn_defines_reg (unsigned int regno1, unsigned int regno2,
18316 rtx_insn *insn)
18318 df_ref def;
18320 FOR_EACH_INSN_DEF (def, insn)
18321 if (DF_REF_REG_DEF_P (def)
18322 && !DF_REF_IS_ARTIFICIAL (def)
18323 && (regno1 == DF_REF_REGNO (def)
18324 || regno2 == DF_REF_REGNO (def)))
18325 return true;
18327 return false;
18330 /* Function checks if instruction INSN uses register number
18331 REGNO as a part of address expression. */
18333 static bool
18334 insn_uses_reg_mem (unsigned int regno, rtx insn)
18336 df_ref use;
18338 FOR_EACH_INSN_USE (use, insn)
18339 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
18340 return true;
18342 return false;
18345 /* Search backward for non-agu definition of register number REGNO1
18346 or register number REGNO2 in basic block starting from instruction
18347 START up to head of basic block or instruction INSN.
18349 Function puts true value into *FOUND var if definition was found
18350 and false otherwise.
18352 Distance in half-cycles between START and found instruction or head
18353 of BB is added to DISTANCE and returned. */
18355 static int
18356 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18357 rtx_insn *insn, int distance,
18358 rtx_insn *start, bool *found)
18360 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18361 rtx_insn *prev = start;
18362 rtx_insn *next = NULL;
18364 *found = false;
18366 while (prev
18367 && prev != insn
18368 && distance < LEA_SEARCH_THRESHOLD)
18370 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18372 distance = increase_distance (prev, next, distance);
18373 if (insn_defines_reg (regno1, regno2, prev))
18375 if (recog_memoized (prev) < 0
18376 || get_attr_type (prev) != TYPE_LEA)
18378 *found = true;
18379 return distance;
18383 next = prev;
18385 if (prev == BB_HEAD (bb))
18386 break;
18388 prev = PREV_INSN (prev);
18391 return distance;
18394 /* Search backward for non-agu definition of register number REGNO1
18395 or register number REGNO2 in INSN's basic block until
18396 1. Pass LEA_SEARCH_THRESHOLD instructions, or
18397 2. Reach neighbour BBs boundary, or
18398 3. Reach agu definition.
18399 Returns the distance between the non-agu definition point and INSN.
18400 If no definition point, returns -1. */
18402 static int
18403 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18404 rtx_insn *insn)
18406 basic_block bb = BLOCK_FOR_INSN (insn);
18407 int distance = 0;
18408 bool found = false;
18410 if (insn != BB_HEAD (bb))
18411 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18412 distance, PREV_INSN (insn),
18413 &found);
18415 if (!found && distance < LEA_SEARCH_THRESHOLD)
18417 edge e;
18418 edge_iterator ei;
18419 bool simple_loop = false;
18421 FOR_EACH_EDGE (e, ei, bb->preds)
18422 if (e->src == bb)
18424 simple_loop = true;
18425 break;
18428 if (simple_loop)
18429 distance = distance_non_agu_define_in_bb (regno1, regno2,
18430 insn, distance,
18431 BB_END (bb), &found);
18432 else
18434 int shortest_dist = -1;
18435 bool found_in_bb = false;
18437 FOR_EACH_EDGE (e, ei, bb->preds)
18439 int bb_dist
18440 = distance_non_agu_define_in_bb (regno1, regno2,
18441 insn, distance,
18442 BB_END (e->src),
18443 &found_in_bb);
18444 if (found_in_bb)
18446 if (shortest_dist < 0)
18447 shortest_dist = bb_dist;
18448 else if (bb_dist > 0)
18449 shortest_dist = MIN (bb_dist, shortest_dist);
18451 found = true;
18455 distance = shortest_dist;
18459 /* get_attr_type may modify recog data. We want to make sure
18460 that recog data is valid for instruction INSN, on which
18461 distance_non_agu_define is called. INSN is unchanged here. */
18462 extract_insn_cached (insn);
18464 if (!found)
18465 return -1;
18467 return distance >> 1;
18470 /* Return the distance in half-cycles between INSN and the next
18471 insn that uses register number REGNO in memory address added
18472 to DISTANCE. Return -1 if REGNO0 is set.
18474 Put true value into *FOUND if register usage was found and
18475 false otherwise.
18476 Put true value into *REDEFINED if register redefinition was
18477 found and false otherwise. */
18479 static int
18480 distance_agu_use_in_bb (unsigned int regno,
18481 rtx_insn *insn, int distance, rtx_insn *start,
18482 bool *found, bool *redefined)
18484 basic_block bb = NULL;
18485 rtx_insn *next = start;
18486 rtx_insn *prev = NULL;
18488 *found = false;
18489 *redefined = false;
18491 if (start != NULL_RTX)
18493 bb = BLOCK_FOR_INSN (start);
18494 if (start != BB_HEAD (bb))
18495 /* If insn and start belong to the same bb, set prev to insn,
18496 so the call to increase_distance will increase the distance
18497 between insns by 1. */
18498 prev = insn;
18501 while (next
18502 && next != insn
18503 && distance < LEA_SEARCH_THRESHOLD)
18505 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18507 distance = increase_distance(prev, next, distance);
18508 if (insn_uses_reg_mem (regno, next))
18510 /* Return DISTANCE if OP0 is used in memory
18511 address in NEXT. */
18512 *found = true;
18513 return distance;
18516 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18518 /* Return -1 if OP0 is set in NEXT. */
18519 *redefined = true;
18520 return -1;
18523 prev = next;
18526 if (next == BB_END (bb))
18527 break;
18529 next = NEXT_INSN (next);
18532 return distance;
18535 /* Return the distance between INSN and the next insn that uses
18536 register number REGNO0 in memory address. Return -1 if no such
18537 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18539 static int
18540 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18542 basic_block bb = BLOCK_FOR_INSN (insn);
18543 int distance = 0;
18544 bool found = false;
18545 bool redefined = false;
18547 if (insn != BB_END (bb))
18548 distance = distance_agu_use_in_bb (regno0, insn, distance,
18549 NEXT_INSN (insn),
18550 &found, &redefined);
18552 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18554 edge e;
18555 edge_iterator ei;
18556 bool simple_loop = false;
18558 FOR_EACH_EDGE (e, ei, bb->succs)
18559 if (e->dest == bb)
18561 simple_loop = true;
18562 break;
18565 if (simple_loop)
18566 distance = distance_agu_use_in_bb (regno0, insn,
18567 distance, BB_HEAD (bb),
18568 &found, &redefined);
18569 else
18571 int shortest_dist = -1;
18572 bool found_in_bb = false;
18573 bool redefined_in_bb = false;
18575 FOR_EACH_EDGE (e, ei, bb->succs)
18577 int bb_dist
18578 = distance_agu_use_in_bb (regno0, insn,
18579 distance, BB_HEAD (e->dest),
18580 &found_in_bb, &redefined_in_bb);
18581 if (found_in_bb)
18583 if (shortest_dist < 0)
18584 shortest_dist = bb_dist;
18585 else if (bb_dist > 0)
18586 shortest_dist = MIN (bb_dist, shortest_dist);
18588 found = true;
18592 distance = shortest_dist;
18596 if (!found || redefined)
18597 return -1;
18599 return distance >> 1;
18602 /* Define this macro to tune LEA priority vs ADD, it take effect when
18603 there is a dilemma of choicing LEA or ADD
18604 Negative value: ADD is more preferred than LEA
18605 Zero: Netrual
18606 Positive value: LEA is more preferred than ADD*/
18607 #define IX86_LEA_PRIORITY 0
18609 /* Return true if usage of lea INSN has performance advantage
18610 over a sequence of instructions. Instructions sequence has
18611 SPLIT_COST cycles higher latency than lea latency. */
18613 static bool
18614 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18615 unsigned int regno2, int split_cost, bool has_scale)
18617 int dist_define, dist_use;
18619 /* For Silvermont if using a 2-source or 3-source LEA for
18620 non-destructive destination purposes, or due to wanting
18621 ability to use SCALE, the use of LEA is justified. */
18622 if (TARGET_SILVERMONT || TARGET_INTEL)
18624 if (has_scale)
18625 return true;
18626 if (split_cost < 1)
18627 return false;
18628 if (regno0 == regno1 || regno0 == regno2)
18629 return false;
18630 return true;
18633 dist_define = distance_non_agu_define (regno1, regno2, insn);
18634 dist_use = distance_agu_use (regno0, insn);
18636 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18638 /* If there is no non AGU operand definition, no AGU
18639 operand usage and split cost is 0 then both lea
18640 and non lea variants have same priority. Currently
18641 we prefer lea for 64 bit code and non lea on 32 bit
18642 code. */
18643 if (dist_use < 0 && split_cost == 0)
18644 return TARGET_64BIT || IX86_LEA_PRIORITY;
18645 else
18646 return true;
18649 /* With longer definitions distance lea is more preferable.
18650 Here we change it to take into account splitting cost and
18651 lea priority. */
18652 dist_define += split_cost + IX86_LEA_PRIORITY;
18654 /* If there is no use in memory addess then we just check
18655 that split cost exceeds AGU stall. */
18656 if (dist_use < 0)
18657 return dist_define > LEA_MAX_STALL;
18659 /* If this insn has both backward non-agu dependence and forward
18660 agu dependence, the one with short distance takes effect. */
18661 return dist_define >= dist_use;
18664 /* Return true if it is legal to clobber flags by INSN and
18665 false otherwise. */
18667 static bool
18668 ix86_ok_to_clobber_flags (rtx_insn *insn)
18670 basic_block bb = BLOCK_FOR_INSN (insn);
18671 df_ref use;
18672 bitmap live;
18674 while (insn)
18676 if (NONDEBUG_INSN_P (insn))
18678 FOR_EACH_INSN_USE (use, insn)
18679 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18680 return false;
18682 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18683 return true;
18686 if (insn == BB_END (bb))
18687 break;
18689 insn = NEXT_INSN (insn);
18692 live = df_get_live_out(bb);
18693 return !REGNO_REG_SET_P (live, FLAGS_REG);
18696 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18697 move and add to avoid AGU stalls. */
18699 bool
18700 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18702 unsigned int regno0, regno1, regno2;
18704 /* Check if we need to optimize. */
18705 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18706 return false;
18708 /* Check it is correct to split here. */
18709 if (!ix86_ok_to_clobber_flags(insn))
18710 return false;
18712 regno0 = true_regnum (operands[0]);
18713 regno1 = true_regnum (operands[1]);
18714 regno2 = true_regnum (operands[2]);
18716 /* We need to split only adds with non destructive
18717 destination operand. */
18718 if (regno0 == regno1 || regno0 == regno2)
18719 return false;
18720 else
18721 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18724 /* Return true if we should emit lea instruction instead of mov
18725 instruction. */
18727 bool
18728 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18730 unsigned int regno0, regno1;
18732 /* Check if we need to optimize. */
18733 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18734 return false;
18736 /* Use lea for reg to reg moves only. */
18737 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18738 return false;
18740 regno0 = true_regnum (operands[0]);
18741 regno1 = true_regnum (operands[1]);
18743 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18746 /* Return true if we need to split lea into a sequence of
18747 instructions to avoid AGU stalls. */
18749 bool
18750 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18752 unsigned int regno0, regno1, regno2;
18753 int split_cost;
18754 struct ix86_address parts;
18755 int ok;
18757 /* Check we need to optimize. */
18758 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18759 return false;
18761 /* The "at least two components" test below might not catch simple
18762 move or zero extension insns if parts.base is non-NULL and parts.disp
18763 is const0_rtx as the only components in the address, e.g. if the
18764 register is %rbp or %r13. As this test is much cheaper and moves or
18765 zero extensions are the common case, do this check first. */
18766 if (REG_P (operands[1])
18767 || (SImode_address_operand (operands[1], VOIDmode)
18768 && REG_P (XEXP (operands[1], 0))))
18769 return false;
18771 /* Check if it is OK to split here. */
18772 if (!ix86_ok_to_clobber_flags (insn))
18773 return false;
18775 ok = ix86_decompose_address (operands[1], &parts);
18776 gcc_assert (ok);
18778 /* There should be at least two components in the address. */
18779 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18780 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18781 return false;
18783 /* We should not split into add if non legitimate pic
18784 operand is used as displacement. */
18785 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18786 return false;
18788 regno0 = true_regnum (operands[0]) ;
18789 regno1 = INVALID_REGNUM;
18790 regno2 = INVALID_REGNUM;
18792 if (parts.base)
18793 regno1 = true_regnum (parts.base);
18794 if (parts.index)
18795 regno2 = true_regnum (parts.index);
18797 split_cost = 0;
18799 /* Compute how many cycles we will add to execution time
18800 if split lea into a sequence of instructions. */
18801 if (parts.base || parts.index)
18803 /* Have to use mov instruction if non desctructive
18804 destination form is used. */
18805 if (regno1 != regno0 && regno2 != regno0)
18806 split_cost += 1;
18808 /* Have to add index to base if both exist. */
18809 if (parts.base && parts.index)
18810 split_cost += 1;
18812 /* Have to use shift and adds if scale is 2 or greater. */
18813 if (parts.scale > 1)
18815 if (regno0 != regno1)
18816 split_cost += 1;
18817 else if (regno2 == regno0)
18818 split_cost += 4;
18819 else
18820 split_cost += parts.scale;
18823 /* Have to use add instruction with immediate if
18824 disp is non zero. */
18825 if (parts.disp && parts.disp != const0_rtx)
18826 split_cost += 1;
18828 /* Subtract the price of lea. */
18829 split_cost -= 1;
18832 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18833 parts.scale > 1);
18836 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18837 matches destination. RTX includes clobber of FLAGS_REG. */
18839 static void
18840 ix86_emit_binop (enum rtx_code code, machine_mode mode,
18841 rtx dst, rtx src)
18843 rtx op, clob;
18845 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src));
18846 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18848 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18851 /* Return true if regno1 def is nearest to the insn. */
18853 static bool
18854 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18856 rtx_insn *prev = insn;
18857 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18859 if (insn == start)
18860 return false;
18861 while (prev && prev != start)
18863 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18865 prev = PREV_INSN (prev);
18866 continue;
18868 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18869 return true;
18870 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18871 return false;
18872 prev = PREV_INSN (prev);
18875 /* None of the regs is defined in the bb. */
18876 return false;
18879 /* Split lea instructions into a sequence of instructions
18880 which are executed on ALU to avoid AGU stalls.
18881 It is assumed that it is allowed to clobber flags register
18882 at lea position. */
18884 void
18885 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
18887 unsigned int regno0, regno1, regno2;
18888 struct ix86_address parts;
18889 rtx target, tmp;
18890 int ok, adds;
18892 ok = ix86_decompose_address (operands[1], &parts);
18893 gcc_assert (ok);
18895 target = gen_lowpart (mode, operands[0]);
18897 regno0 = true_regnum (target);
18898 regno1 = INVALID_REGNUM;
18899 regno2 = INVALID_REGNUM;
18901 if (parts.base)
18903 parts.base = gen_lowpart (mode, parts.base);
18904 regno1 = true_regnum (parts.base);
18907 if (parts.index)
18909 parts.index = gen_lowpart (mode, parts.index);
18910 regno2 = true_regnum (parts.index);
18913 if (parts.disp)
18914 parts.disp = gen_lowpart (mode, parts.disp);
18916 if (parts.scale > 1)
18918 /* Case r1 = r1 + ... */
18919 if (regno1 == regno0)
18921 /* If we have a case r1 = r1 + C * r2 then we
18922 should use multiplication which is very
18923 expensive. Assume cost model is wrong if we
18924 have such case here. */
18925 gcc_assert (regno2 != regno0);
18927 for (adds = parts.scale; adds > 0; adds--)
18928 ix86_emit_binop (PLUS, mode, target, parts.index);
18930 else
18932 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18933 if (regno0 != regno2)
18934 emit_insn (gen_rtx_SET (target, parts.index));
18936 /* Use shift for scaling. */
18937 ix86_emit_binop (ASHIFT, mode, target,
18938 GEN_INT (exact_log2 (parts.scale)));
18940 if (parts.base)
18941 ix86_emit_binop (PLUS, mode, target, parts.base);
18943 if (parts.disp && parts.disp != const0_rtx)
18944 ix86_emit_binop (PLUS, mode, target, parts.disp);
18947 else if (!parts.base && !parts.index)
18949 gcc_assert(parts.disp);
18950 emit_insn (gen_rtx_SET (target, parts.disp));
18952 else
18954 if (!parts.base)
18956 if (regno0 != regno2)
18957 emit_insn (gen_rtx_SET (target, parts.index));
18959 else if (!parts.index)
18961 if (regno0 != regno1)
18962 emit_insn (gen_rtx_SET (target, parts.base));
18964 else
18966 if (regno0 == regno1)
18967 tmp = parts.index;
18968 else if (regno0 == regno2)
18969 tmp = parts.base;
18970 else
18972 rtx tmp1;
18974 /* Find better operand for SET instruction, depending
18975 on which definition is farther from the insn. */
18976 if (find_nearest_reg_def (insn, regno1, regno2))
18977 tmp = parts.index, tmp1 = parts.base;
18978 else
18979 tmp = parts.base, tmp1 = parts.index;
18981 emit_insn (gen_rtx_SET (target, tmp));
18983 if (parts.disp && parts.disp != const0_rtx)
18984 ix86_emit_binop (PLUS, mode, target, parts.disp);
18986 ix86_emit_binop (PLUS, mode, target, tmp1);
18987 return;
18990 ix86_emit_binop (PLUS, mode, target, tmp);
18993 if (parts.disp && parts.disp != const0_rtx)
18994 ix86_emit_binop (PLUS, mode, target, parts.disp);
18998 /* Return true if it is ok to optimize an ADD operation to LEA
18999 operation to avoid flag register consumation. For most processors,
19000 ADD is faster than LEA. For the processors like BONNELL, if the
19001 destination register of LEA holds an actual address which will be
19002 used soon, LEA is better and otherwise ADD is better. */
19004 bool
19005 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
19007 unsigned int regno0 = true_regnum (operands[0]);
19008 unsigned int regno1 = true_regnum (operands[1]);
19009 unsigned int regno2 = true_regnum (operands[2]);
19011 /* If a = b + c, (a!=b && a!=c), must use lea form. */
19012 if (regno0 != regno1 && regno0 != regno2)
19013 return true;
19015 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
19016 return false;
19018 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
19021 /* Return true if destination reg of SET_BODY is shift count of
19022 USE_BODY. */
19024 static bool
19025 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
19027 rtx set_dest;
19028 rtx shift_rtx;
19029 int i;
19031 /* Retrieve destination of SET_BODY. */
19032 switch (GET_CODE (set_body))
19034 case SET:
19035 set_dest = SET_DEST (set_body);
19036 if (!set_dest || !REG_P (set_dest))
19037 return false;
19038 break;
19039 case PARALLEL:
19040 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
19041 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
19042 use_body))
19043 return true;
19044 default:
19045 return false;
19046 break;
19049 /* Retrieve shift count of USE_BODY. */
19050 switch (GET_CODE (use_body))
19052 case SET:
19053 shift_rtx = XEXP (use_body, 1);
19054 break;
19055 case PARALLEL:
19056 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
19057 if (ix86_dep_by_shift_count_body (set_body,
19058 XVECEXP (use_body, 0, i)))
19059 return true;
19060 default:
19061 return false;
19062 break;
19065 if (shift_rtx
19066 && (GET_CODE (shift_rtx) == ASHIFT
19067 || GET_CODE (shift_rtx) == LSHIFTRT
19068 || GET_CODE (shift_rtx) == ASHIFTRT
19069 || GET_CODE (shift_rtx) == ROTATE
19070 || GET_CODE (shift_rtx) == ROTATERT))
19072 rtx shift_count = XEXP (shift_rtx, 1);
19074 /* Return true if shift count is dest of SET_BODY. */
19075 if (REG_P (shift_count))
19077 /* Add check since it can be invoked before register
19078 allocation in pre-reload schedule. */
19079 if (reload_completed
19080 && true_regnum (set_dest) == true_regnum (shift_count))
19081 return true;
19082 else if (REGNO(set_dest) == REGNO(shift_count))
19083 return true;
19087 return false;
19090 /* Return true if destination reg of SET_INSN is shift count of
19091 USE_INSN. */
19093 bool
19094 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
19096 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
19097 PATTERN (use_insn));
19100 /* Return TRUE or FALSE depending on whether the unary operator meets the
19101 appropriate constraints. */
19103 bool
19104 ix86_unary_operator_ok (enum rtx_code,
19105 machine_mode,
19106 rtx operands[2])
19108 /* If one of operands is memory, source and destination must match. */
19109 if ((MEM_P (operands[0])
19110 || MEM_P (operands[1]))
19111 && ! rtx_equal_p (operands[0], operands[1]))
19112 return false;
19113 return true;
19116 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
19117 are ok, keeping in mind the possible movddup alternative. */
19119 bool
19120 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
19122 if (MEM_P (operands[0]))
19123 return rtx_equal_p (operands[0], operands[1 + high]);
19124 if (MEM_P (operands[1]) && MEM_P (operands[2]))
19125 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
19126 return true;
19129 /* Post-reload splitter for converting an SF or DFmode value in an
19130 SSE register into an unsigned SImode. */
19132 void
19133 ix86_split_convert_uns_si_sse (rtx operands[])
19135 machine_mode vecmode;
19136 rtx value, large, zero_or_two31, input, two31, x;
19138 large = operands[1];
19139 zero_or_two31 = operands[2];
19140 input = operands[3];
19141 two31 = operands[4];
19142 vecmode = GET_MODE (large);
19143 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
19145 /* Load up the value into the low element. We must ensure that the other
19146 elements are valid floats -- zero is the easiest such value. */
19147 if (MEM_P (input))
19149 if (vecmode == V4SFmode)
19150 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
19151 else
19152 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
19154 else
19156 input = gen_rtx_REG (vecmode, REGNO (input));
19157 emit_move_insn (value, CONST0_RTX (vecmode));
19158 if (vecmode == V4SFmode)
19159 emit_insn (gen_sse_movss (value, value, input));
19160 else
19161 emit_insn (gen_sse2_movsd (value, value, input));
19164 emit_move_insn (large, two31);
19165 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
19167 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
19168 emit_insn (gen_rtx_SET (large, x));
19170 x = gen_rtx_AND (vecmode, zero_or_two31, large);
19171 emit_insn (gen_rtx_SET (zero_or_two31, x));
19173 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
19174 emit_insn (gen_rtx_SET (value, x));
19176 large = gen_rtx_REG (V4SImode, REGNO (large));
19177 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
19179 x = gen_rtx_REG (V4SImode, REGNO (value));
19180 if (vecmode == V4SFmode)
19181 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
19182 else
19183 emit_insn (gen_sse2_cvttpd2dq (x, value));
19184 value = x;
19186 emit_insn (gen_xorv4si3 (value, value, large));
19189 /* Convert an unsigned DImode value into a DFmode, using only SSE.
19190 Expects the 64-bit DImode to be supplied in a pair of integral
19191 registers. Requires SSE2; will use SSE3 if available. For x86_32,
19192 -mfpmath=sse, !optimize_size only. */
19194 void
19195 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
19197 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
19198 rtx int_xmm, fp_xmm;
19199 rtx biases, exponents;
19200 rtx x;
19202 int_xmm = gen_reg_rtx (V4SImode);
19203 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
19204 emit_insn (gen_movdi_to_sse (int_xmm, input));
19205 else if (TARGET_SSE_SPLIT_REGS)
19207 emit_clobber (int_xmm);
19208 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
19210 else
19212 x = gen_reg_rtx (V2DImode);
19213 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
19214 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
19217 x = gen_rtx_CONST_VECTOR (V4SImode,
19218 gen_rtvec (4, GEN_INT (0x43300000UL),
19219 GEN_INT (0x45300000UL),
19220 const0_rtx, const0_rtx));
19221 exponents = validize_mem (force_const_mem (V4SImode, x));
19223 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
19224 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
19226 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
19227 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
19228 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
19229 (0x1.0p84 + double(fp_value_hi_xmm)).
19230 Note these exponents differ by 32. */
19232 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
19234 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
19235 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
19236 real_ldexp (&bias_lo_rvt, &dconst1, 52);
19237 real_ldexp (&bias_hi_rvt, &dconst1, 84);
19238 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
19239 x = const_double_from_real_value (bias_hi_rvt, DFmode);
19240 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
19241 biases = validize_mem (force_const_mem (V2DFmode, biases));
19242 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
19244 /* Add the upper and lower DFmode values together. */
19245 if (TARGET_SSE3)
19246 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
19247 else
19249 x = copy_to_mode_reg (V2DFmode, fp_xmm);
19250 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
19251 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
19254 ix86_expand_vector_extract (false, target, fp_xmm, 0);
19257 /* Not used, but eases macroization of patterns. */
19258 void
19259 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
19261 gcc_unreachable ();
19264 /* Convert an unsigned SImode value into a DFmode. Only currently used
19265 for SSE, but applicable anywhere. */
19267 void
19268 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
19270 REAL_VALUE_TYPE TWO31r;
19271 rtx x, fp;
19273 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
19274 NULL, 1, OPTAB_DIRECT);
19276 fp = gen_reg_rtx (DFmode);
19277 emit_insn (gen_floatsidf2 (fp, x));
19279 real_ldexp (&TWO31r, &dconst1, 31);
19280 x = const_double_from_real_value (TWO31r, DFmode);
19282 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
19283 if (x != target)
19284 emit_move_insn (target, x);
19287 /* Convert a signed DImode value into a DFmode. Only used for SSE in
19288 32-bit mode; otherwise we have a direct convert instruction. */
19290 void
19291 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
19293 REAL_VALUE_TYPE TWO32r;
19294 rtx fp_lo, fp_hi, x;
19296 fp_lo = gen_reg_rtx (DFmode);
19297 fp_hi = gen_reg_rtx (DFmode);
19299 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
19301 real_ldexp (&TWO32r, &dconst1, 32);
19302 x = const_double_from_real_value (TWO32r, DFmode);
19303 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
19305 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
19307 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
19308 0, OPTAB_DIRECT);
19309 if (x != target)
19310 emit_move_insn (target, x);
19313 /* Convert an unsigned SImode value into a SFmode, using only SSE.
19314 For x86_32, -mfpmath=sse, !optimize_size only. */
19315 void
19316 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19318 REAL_VALUE_TYPE ONE16r;
19319 rtx fp_hi, fp_lo, int_hi, int_lo, x;
19321 real_ldexp (&ONE16r, &dconst1, 16);
19322 x = const_double_from_real_value (ONE16r, SFmode);
19323 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19324 NULL, 0, OPTAB_DIRECT);
19325 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19326 NULL, 0, OPTAB_DIRECT);
19327 fp_hi = gen_reg_rtx (SFmode);
19328 fp_lo = gen_reg_rtx (SFmode);
19329 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19330 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19331 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19332 0, OPTAB_DIRECT);
19333 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19334 0, OPTAB_DIRECT);
19335 if (!rtx_equal_p (target, fp_hi))
19336 emit_move_insn (target, fp_hi);
19339 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
19340 a vector of unsigned ints VAL to vector of floats TARGET. */
19342 void
19343 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19345 rtx tmp[8];
19346 REAL_VALUE_TYPE TWO16r;
19347 machine_mode intmode = GET_MODE (val);
19348 machine_mode fltmode = GET_MODE (target);
19349 rtx (*cvt) (rtx, rtx);
19351 if (intmode == V4SImode)
19352 cvt = gen_floatv4siv4sf2;
19353 else
19354 cvt = gen_floatv8siv8sf2;
19355 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19356 tmp[0] = force_reg (intmode, tmp[0]);
19357 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19358 OPTAB_DIRECT);
19359 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19360 NULL_RTX, 1, OPTAB_DIRECT);
19361 tmp[3] = gen_reg_rtx (fltmode);
19362 emit_insn (cvt (tmp[3], tmp[1]));
19363 tmp[4] = gen_reg_rtx (fltmode);
19364 emit_insn (cvt (tmp[4], tmp[2]));
19365 real_ldexp (&TWO16r, &dconst1, 16);
19366 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19367 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19368 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19369 OPTAB_DIRECT);
19370 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19371 OPTAB_DIRECT);
19372 if (tmp[7] != target)
19373 emit_move_insn (target, tmp[7]);
19376 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19377 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19378 This is done by doing just signed conversion if < 0x1p31, and otherwise by
19379 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
19382 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19384 REAL_VALUE_TYPE TWO31r;
19385 rtx two31r, tmp[4];
19386 machine_mode mode = GET_MODE (val);
19387 machine_mode scalarmode = GET_MODE_INNER (mode);
19388 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19389 rtx (*cmp) (rtx, rtx, rtx, rtx);
19390 int i;
19392 for (i = 0; i < 3; i++)
19393 tmp[i] = gen_reg_rtx (mode);
19394 real_ldexp (&TWO31r, &dconst1, 31);
19395 two31r = const_double_from_real_value (TWO31r, scalarmode);
19396 two31r = ix86_build_const_vector (mode, 1, two31r);
19397 two31r = force_reg (mode, two31r);
19398 switch (mode)
19400 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19401 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19402 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19403 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19404 default: gcc_unreachable ();
19406 tmp[3] = gen_rtx_LE (mode, two31r, val);
19407 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19408 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19409 0, OPTAB_DIRECT);
19410 if (intmode == V4SImode || TARGET_AVX2)
19411 *xorp = expand_simple_binop (intmode, ASHIFT,
19412 gen_lowpart (intmode, tmp[0]),
19413 GEN_INT (31), NULL_RTX, 0,
19414 OPTAB_DIRECT);
19415 else
19417 rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31);
19418 two31 = ix86_build_const_vector (intmode, 1, two31);
19419 *xorp = expand_simple_binop (intmode, AND,
19420 gen_lowpart (intmode, tmp[0]),
19421 two31, NULL_RTX, 0,
19422 OPTAB_DIRECT);
19424 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19425 0, OPTAB_DIRECT);
19428 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
19429 then replicate the value for all elements of the vector
19430 register. */
19433 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
19435 int i, n_elt;
19436 rtvec v;
19437 machine_mode scalar_mode;
19439 switch (mode)
19441 case V64QImode:
19442 case V32QImode:
19443 case V16QImode:
19444 case V32HImode:
19445 case V16HImode:
19446 case V8HImode:
19447 case V16SImode:
19448 case V8SImode:
19449 case V4SImode:
19450 case V8DImode:
19451 case V4DImode:
19452 case V2DImode:
19453 gcc_assert (vect);
19454 case V16SFmode:
19455 case V8SFmode:
19456 case V4SFmode:
19457 case V8DFmode:
19458 case V4DFmode:
19459 case V2DFmode:
19460 n_elt = GET_MODE_NUNITS (mode);
19461 v = rtvec_alloc (n_elt);
19462 scalar_mode = GET_MODE_INNER (mode);
19464 RTVEC_ELT (v, 0) = value;
19466 for (i = 1; i < n_elt; ++i)
19467 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19469 return gen_rtx_CONST_VECTOR (mode, v);
19471 default:
19472 gcc_unreachable ();
19476 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19477 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19478 for an SSE register. If VECT is true, then replicate the mask for
19479 all elements of the vector register. If INVERT is true, then create
19480 a mask excluding the sign bit. */
19483 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
19485 machine_mode vec_mode, imode;
19486 wide_int w;
19487 rtx mask, v;
19489 switch (mode)
19491 case V16SImode:
19492 case V16SFmode:
19493 case V8SImode:
19494 case V4SImode:
19495 case V8SFmode:
19496 case V4SFmode:
19497 vec_mode = mode;
19498 mode = GET_MODE_INNER (mode);
19499 imode = SImode;
19500 break;
19502 case V8DImode:
19503 case V4DImode:
19504 case V2DImode:
19505 case V8DFmode:
19506 case V4DFmode:
19507 case V2DFmode:
19508 vec_mode = mode;
19509 mode = GET_MODE_INNER (mode);
19510 imode = DImode;
19511 break;
19513 case TImode:
19514 case TFmode:
19515 vec_mode = VOIDmode;
19516 imode = TImode;
19517 break;
19519 default:
19520 gcc_unreachable ();
19523 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (mode) - 1,
19524 GET_MODE_BITSIZE (mode));
19525 if (invert)
19526 w = wi::bit_not (w);
19528 /* Force this value into the low part of a fp vector constant. */
19529 mask = immed_wide_int_const (w, imode);
19530 mask = gen_lowpart (mode, mask);
19532 if (vec_mode == VOIDmode)
19533 return force_reg (mode, mask);
19535 v = ix86_build_const_vector (vec_mode, vect, mask);
19536 return force_reg (vec_mode, v);
19539 /* Generate code for floating point ABS or NEG. */
19541 void
19542 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
19543 rtx operands[])
19545 rtx mask, set, dst, src;
19546 bool use_sse = false;
19547 bool vector_mode = VECTOR_MODE_P (mode);
19548 machine_mode vmode = mode;
19550 if (vector_mode)
19551 use_sse = true;
19552 else if (mode == TFmode)
19553 use_sse = true;
19554 else if (TARGET_SSE_MATH)
19556 use_sse = SSE_FLOAT_MODE_P (mode);
19557 if (mode == SFmode)
19558 vmode = V4SFmode;
19559 else if (mode == DFmode)
19560 vmode = V2DFmode;
19563 /* NEG and ABS performed with SSE use bitwise mask operations.
19564 Create the appropriate mask now. */
19565 if (use_sse)
19566 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19567 else
19568 mask = NULL_RTX;
19570 dst = operands[0];
19571 src = operands[1];
19573 set = gen_rtx_fmt_e (code, mode, src);
19574 set = gen_rtx_SET (dst, set);
19576 if (mask)
19578 rtx use, clob;
19579 rtvec par;
19581 use = gen_rtx_USE (VOIDmode, mask);
19582 if (vector_mode)
19583 par = gen_rtvec (2, set, use);
19584 else
19586 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19587 par = gen_rtvec (3, set, use, clob);
19589 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19591 else
19592 emit_insn (set);
19595 /* Expand a copysign operation. Special case operand 0 being a constant. */
19597 void
19598 ix86_expand_copysign (rtx operands[])
19600 machine_mode mode, vmode;
19601 rtx dest, op0, op1, mask, nmask;
19603 dest = operands[0];
19604 op0 = operands[1];
19605 op1 = operands[2];
19607 mode = GET_MODE (dest);
19609 if (mode == SFmode)
19610 vmode = V4SFmode;
19611 else if (mode == DFmode)
19612 vmode = V2DFmode;
19613 else
19614 vmode = mode;
19616 if (CONST_DOUBLE_P (op0))
19618 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19620 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19621 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19623 if (mode == SFmode || mode == DFmode)
19625 if (op0 == CONST0_RTX (mode))
19626 op0 = CONST0_RTX (vmode);
19627 else
19629 rtx v = ix86_build_const_vector (vmode, false, op0);
19631 op0 = force_reg (vmode, v);
19634 else if (op0 != CONST0_RTX (mode))
19635 op0 = force_reg (mode, op0);
19637 mask = ix86_build_signbit_mask (vmode, 0, 0);
19639 if (mode == SFmode)
19640 copysign_insn = gen_copysignsf3_const;
19641 else if (mode == DFmode)
19642 copysign_insn = gen_copysigndf3_const;
19643 else
19644 copysign_insn = gen_copysigntf3_const;
19646 emit_insn (copysign_insn (dest, op0, op1, mask));
19648 else
19650 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19652 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19653 mask = ix86_build_signbit_mask (vmode, 0, 0);
19655 if (mode == SFmode)
19656 copysign_insn = gen_copysignsf3_var;
19657 else if (mode == DFmode)
19658 copysign_insn = gen_copysigndf3_var;
19659 else
19660 copysign_insn = gen_copysigntf3_var;
19662 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19666 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19667 be a constant, and so has already been expanded into a vector constant. */
19669 void
19670 ix86_split_copysign_const (rtx operands[])
19672 machine_mode mode, vmode;
19673 rtx dest, op0, mask, x;
19675 dest = operands[0];
19676 op0 = operands[1];
19677 mask = operands[3];
19679 mode = GET_MODE (dest);
19680 vmode = GET_MODE (mask);
19682 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19683 x = gen_rtx_AND (vmode, dest, mask);
19684 emit_insn (gen_rtx_SET (dest, x));
19686 if (op0 != CONST0_RTX (vmode))
19688 x = gen_rtx_IOR (vmode, dest, op0);
19689 emit_insn (gen_rtx_SET (dest, x));
19693 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19694 so we have to do two masks. */
19696 void
19697 ix86_split_copysign_var (rtx operands[])
19699 machine_mode mode, vmode;
19700 rtx dest, scratch, op0, op1, mask, nmask, x;
19702 dest = operands[0];
19703 scratch = operands[1];
19704 op0 = operands[2];
19705 op1 = operands[3];
19706 nmask = operands[4];
19707 mask = operands[5];
19709 mode = GET_MODE (dest);
19710 vmode = GET_MODE (mask);
19712 if (rtx_equal_p (op0, op1))
19714 /* Shouldn't happen often (it's useless, obviously), but when it does
19715 we'd generate incorrect code if we continue below. */
19716 emit_move_insn (dest, op0);
19717 return;
19720 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19722 gcc_assert (REGNO (op1) == REGNO (scratch));
19724 x = gen_rtx_AND (vmode, scratch, mask);
19725 emit_insn (gen_rtx_SET (scratch, x));
19727 dest = mask;
19728 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19729 x = gen_rtx_NOT (vmode, dest);
19730 x = gen_rtx_AND (vmode, x, op0);
19731 emit_insn (gen_rtx_SET (dest, x));
19733 else
19735 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19737 x = gen_rtx_AND (vmode, scratch, mask);
19739 else /* alternative 2,4 */
19741 gcc_assert (REGNO (mask) == REGNO (scratch));
19742 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19743 x = gen_rtx_AND (vmode, scratch, op1);
19745 emit_insn (gen_rtx_SET (scratch, x));
19747 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19749 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19750 x = gen_rtx_AND (vmode, dest, nmask);
19752 else /* alternative 3,4 */
19754 gcc_assert (REGNO (nmask) == REGNO (dest));
19755 dest = nmask;
19756 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19757 x = gen_rtx_AND (vmode, dest, op0);
19759 emit_insn (gen_rtx_SET (dest, x));
19762 x = gen_rtx_IOR (vmode, dest, scratch);
19763 emit_insn (gen_rtx_SET (dest, x));
19766 /* Return TRUE or FALSE depending on whether the first SET in INSN
19767 has source and destination with matching CC modes, and that the
19768 CC mode is at least as constrained as REQ_MODE. */
19770 bool
19771 ix86_match_ccmode (rtx insn, machine_mode req_mode)
19773 rtx set;
19774 machine_mode set_mode;
19776 set = PATTERN (insn);
19777 if (GET_CODE (set) == PARALLEL)
19778 set = XVECEXP (set, 0, 0);
19779 gcc_assert (GET_CODE (set) == SET);
19780 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19782 set_mode = GET_MODE (SET_DEST (set));
19783 switch (set_mode)
19785 case CCNOmode:
19786 if (req_mode != CCNOmode
19787 && (req_mode != CCmode
19788 || XEXP (SET_SRC (set), 1) != const0_rtx))
19789 return false;
19790 break;
19791 case CCmode:
19792 if (req_mode == CCGCmode)
19793 return false;
19794 /* FALLTHRU */
19795 case CCGCmode:
19796 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19797 return false;
19798 /* FALLTHRU */
19799 case CCGOCmode:
19800 if (req_mode == CCZmode)
19801 return false;
19802 /* FALLTHRU */
19803 case CCZmode:
19804 break;
19806 case CCAmode:
19807 case CCCmode:
19808 case CCOmode:
19809 case CCPmode:
19810 case CCSmode:
19811 if (set_mode != req_mode)
19812 return false;
19813 break;
19815 default:
19816 gcc_unreachable ();
19819 return GET_MODE (SET_SRC (set)) == set_mode;
19822 /* Generate insn patterns to do an integer compare of OPERANDS. */
19824 static rtx
19825 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19827 machine_mode cmpmode;
19828 rtx tmp, flags;
19830 cmpmode = SELECT_CC_MODE (code, op0, op1);
19831 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19833 /* This is very simple, but making the interface the same as in the
19834 FP case makes the rest of the code easier. */
19835 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19836 emit_insn (gen_rtx_SET (flags, tmp));
19838 /* Return the test that should be put into the flags user, i.e.
19839 the bcc, scc, or cmov instruction. */
19840 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19843 /* Figure out whether to use ordered or unordered fp comparisons.
19844 Return the appropriate mode to use. */
19846 machine_mode
19847 ix86_fp_compare_mode (enum rtx_code)
19849 /* ??? In order to make all comparisons reversible, we do all comparisons
19850 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19851 all forms trapping and nontrapping comparisons, we can make inequality
19852 comparisons trapping again, since it results in better code when using
19853 FCOM based compares. */
19854 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19857 machine_mode
19858 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19860 machine_mode mode = GET_MODE (op0);
19862 if (SCALAR_FLOAT_MODE_P (mode))
19864 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19865 return ix86_fp_compare_mode (code);
19868 switch (code)
19870 /* Only zero flag is needed. */
19871 case EQ: /* ZF=0 */
19872 case NE: /* ZF!=0 */
19873 return CCZmode;
19874 /* Codes needing carry flag. */
19875 case GEU: /* CF=0 */
19876 case LTU: /* CF=1 */
19877 /* Detect overflow checks. They need just the carry flag. */
19878 if (GET_CODE (op0) == PLUS
19879 && rtx_equal_p (op1, XEXP (op0, 0)))
19880 return CCCmode;
19881 else
19882 return CCmode;
19883 case GTU: /* CF=0 & ZF=0 */
19884 case LEU: /* CF=1 | ZF=1 */
19885 return CCmode;
19886 /* Codes possibly doable only with sign flag when
19887 comparing against zero. */
19888 case GE: /* SF=OF or SF=0 */
19889 case LT: /* SF<>OF or SF=1 */
19890 if (op1 == const0_rtx)
19891 return CCGOCmode;
19892 else
19893 /* For other cases Carry flag is not required. */
19894 return CCGCmode;
19895 /* Codes doable only with sign flag when comparing
19896 against zero, but we miss jump instruction for it
19897 so we need to use relational tests against overflow
19898 that thus needs to be zero. */
19899 case GT: /* ZF=0 & SF=OF */
19900 case LE: /* ZF=1 | SF<>OF */
19901 if (op1 == const0_rtx)
19902 return CCNOmode;
19903 else
19904 return CCGCmode;
19905 /* strcmp pattern do (use flags) and combine may ask us for proper
19906 mode. */
19907 case USE:
19908 return CCmode;
19909 default:
19910 gcc_unreachable ();
19914 /* Return the fixed registers used for condition codes. */
19916 static bool
19917 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19919 *p1 = FLAGS_REG;
19920 *p2 = FPSR_REG;
19921 return true;
19924 /* If two condition code modes are compatible, return a condition code
19925 mode which is compatible with both. Otherwise, return
19926 VOIDmode. */
19928 static machine_mode
19929 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
19931 if (m1 == m2)
19932 return m1;
19934 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19935 return VOIDmode;
19937 if ((m1 == CCGCmode && m2 == CCGOCmode)
19938 || (m1 == CCGOCmode && m2 == CCGCmode))
19939 return CCGCmode;
19941 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19942 return m2;
19943 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19944 return m1;
19946 switch (m1)
19948 default:
19949 gcc_unreachable ();
19951 case CCmode:
19952 case CCGCmode:
19953 case CCGOCmode:
19954 case CCNOmode:
19955 case CCAmode:
19956 case CCCmode:
19957 case CCOmode:
19958 case CCPmode:
19959 case CCSmode:
19960 case CCZmode:
19961 switch (m2)
19963 default:
19964 return VOIDmode;
19966 case CCmode:
19967 case CCGCmode:
19968 case CCGOCmode:
19969 case CCNOmode:
19970 case CCAmode:
19971 case CCCmode:
19972 case CCOmode:
19973 case CCPmode:
19974 case CCSmode:
19975 case CCZmode:
19976 return CCmode;
19979 case CCFPmode:
19980 case CCFPUmode:
19981 /* These are only compatible with themselves, which we already
19982 checked above. */
19983 return VOIDmode;
19988 /* Return a comparison we can do and that it is equivalent to
19989 swap_condition (code) apart possibly from orderedness.
19990 But, never change orderedness if TARGET_IEEE_FP, returning
19991 UNKNOWN in that case if necessary. */
19993 static enum rtx_code
19994 ix86_fp_swap_condition (enum rtx_code code)
19996 switch (code)
19998 case GT: /* GTU - CF=0 & ZF=0 */
19999 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
20000 case GE: /* GEU - CF=0 */
20001 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
20002 case UNLT: /* LTU - CF=1 */
20003 return TARGET_IEEE_FP ? UNKNOWN : GT;
20004 case UNLE: /* LEU - CF=1 | ZF=1 */
20005 return TARGET_IEEE_FP ? UNKNOWN : GE;
20006 default:
20007 return swap_condition (code);
20011 /* Return cost of comparison CODE using the best strategy for performance.
20012 All following functions do use number of instructions as a cost metrics.
20013 In future this should be tweaked to compute bytes for optimize_size and
20014 take into account performance of various instructions on various CPUs. */
20016 static int
20017 ix86_fp_comparison_cost (enum rtx_code code)
20019 int arith_cost;
20021 /* The cost of code using bit-twiddling on %ah. */
20022 switch (code)
20024 case UNLE:
20025 case UNLT:
20026 case LTGT:
20027 case GT:
20028 case GE:
20029 case UNORDERED:
20030 case ORDERED:
20031 case UNEQ:
20032 arith_cost = 4;
20033 break;
20034 case LT:
20035 case NE:
20036 case EQ:
20037 case UNGE:
20038 arith_cost = TARGET_IEEE_FP ? 5 : 4;
20039 break;
20040 case LE:
20041 case UNGT:
20042 arith_cost = TARGET_IEEE_FP ? 6 : 4;
20043 break;
20044 default:
20045 gcc_unreachable ();
20048 switch (ix86_fp_comparison_strategy (code))
20050 case IX86_FPCMP_COMI:
20051 return arith_cost > 4 ? 3 : 2;
20052 case IX86_FPCMP_SAHF:
20053 return arith_cost > 4 ? 4 : 3;
20054 default:
20055 return arith_cost;
20059 /* Return strategy to use for floating-point. We assume that fcomi is always
20060 preferrable where available, since that is also true when looking at size
20061 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
20063 enum ix86_fpcmp_strategy
20064 ix86_fp_comparison_strategy (enum rtx_code)
20066 /* Do fcomi/sahf based test when profitable. */
20068 if (TARGET_CMOVE)
20069 return IX86_FPCMP_COMI;
20071 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
20072 return IX86_FPCMP_SAHF;
20074 return IX86_FPCMP_ARITH;
20077 /* Swap, force into registers, or otherwise massage the two operands
20078 to a fp comparison. The operands are updated in place; the new
20079 comparison code is returned. */
20081 static enum rtx_code
20082 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
20084 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
20085 rtx op0 = *pop0, op1 = *pop1;
20086 machine_mode op_mode = GET_MODE (op0);
20087 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
20089 /* All of the unordered compare instructions only work on registers.
20090 The same is true of the fcomi compare instructions. The XFmode
20091 compare instructions require registers except when comparing
20092 against zero or when converting operand 1 from fixed point to
20093 floating point. */
20095 if (!is_sse
20096 && (fpcmp_mode == CCFPUmode
20097 || (op_mode == XFmode
20098 && ! (standard_80387_constant_p (op0) == 1
20099 || standard_80387_constant_p (op1) == 1)
20100 && GET_CODE (op1) != FLOAT)
20101 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
20103 op0 = force_reg (op_mode, op0);
20104 op1 = force_reg (op_mode, op1);
20106 else
20108 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
20109 things around if they appear profitable, otherwise force op0
20110 into a register. */
20112 if (standard_80387_constant_p (op0) == 0
20113 || (MEM_P (op0)
20114 && ! (standard_80387_constant_p (op1) == 0
20115 || MEM_P (op1))))
20117 enum rtx_code new_code = ix86_fp_swap_condition (code);
20118 if (new_code != UNKNOWN)
20120 std::swap (op0, op1);
20121 code = new_code;
20125 if (!REG_P (op0))
20126 op0 = force_reg (op_mode, op0);
20128 if (CONSTANT_P (op1))
20130 int tmp = standard_80387_constant_p (op1);
20131 if (tmp == 0)
20132 op1 = validize_mem (force_const_mem (op_mode, op1));
20133 else if (tmp == 1)
20135 if (TARGET_CMOVE)
20136 op1 = force_reg (op_mode, op1);
20138 else
20139 op1 = force_reg (op_mode, op1);
20143 /* Try to rearrange the comparison to make it cheaper. */
20144 if (ix86_fp_comparison_cost (code)
20145 > ix86_fp_comparison_cost (swap_condition (code))
20146 && (REG_P (op1) || can_create_pseudo_p ()))
20148 std::swap (op0, op1);
20149 code = swap_condition (code);
20150 if (!REG_P (op0))
20151 op0 = force_reg (op_mode, op0);
20154 *pop0 = op0;
20155 *pop1 = op1;
20156 return code;
20159 /* Convert comparison codes we use to represent FP comparison to integer
20160 code that will result in proper branch. Return UNKNOWN if no such code
20161 is available. */
20163 enum rtx_code
20164 ix86_fp_compare_code_to_integer (enum rtx_code code)
20166 switch (code)
20168 case GT:
20169 return GTU;
20170 case GE:
20171 return GEU;
20172 case ORDERED:
20173 case UNORDERED:
20174 return code;
20175 break;
20176 case UNEQ:
20177 return EQ;
20178 break;
20179 case UNLT:
20180 return LTU;
20181 break;
20182 case UNLE:
20183 return LEU;
20184 break;
20185 case LTGT:
20186 return NE;
20187 break;
20188 default:
20189 return UNKNOWN;
20193 /* Generate insn patterns to do a floating point compare of OPERANDS. */
20195 static rtx
20196 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
20198 machine_mode fpcmp_mode, intcmp_mode;
20199 rtx tmp, tmp2;
20201 fpcmp_mode = ix86_fp_compare_mode (code);
20202 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
20204 /* Do fcomi/sahf based test when profitable. */
20205 switch (ix86_fp_comparison_strategy (code))
20207 case IX86_FPCMP_COMI:
20208 intcmp_mode = fpcmp_mode;
20209 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20210 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
20211 emit_insn (tmp);
20212 break;
20214 case IX86_FPCMP_SAHF:
20215 intcmp_mode = fpcmp_mode;
20216 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20217 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
20219 if (!scratch)
20220 scratch = gen_reg_rtx (HImode);
20221 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
20222 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
20223 break;
20225 case IX86_FPCMP_ARITH:
20226 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
20227 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20228 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
20229 if (!scratch)
20230 scratch = gen_reg_rtx (HImode);
20231 emit_insn (gen_rtx_SET (scratch, tmp2));
20233 /* In the unordered case, we have to check C2 for NaN's, which
20234 doesn't happen to work out to anything nice combination-wise.
20235 So do some bit twiddling on the value we've got in AH to come
20236 up with an appropriate set of condition codes. */
20238 intcmp_mode = CCNOmode;
20239 switch (code)
20241 case GT:
20242 case UNGT:
20243 if (code == GT || !TARGET_IEEE_FP)
20245 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20246 code = EQ;
20248 else
20250 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20251 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20252 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
20253 intcmp_mode = CCmode;
20254 code = GEU;
20256 break;
20257 case LT:
20258 case UNLT:
20259 if (code == LT && TARGET_IEEE_FP)
20261 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20262 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
20263 intcmp_mode = CCmode;
20264 code = EQ;
20266 else
20268 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
20269 code = NE;
20271 break;
20272 case GE:
20273 case UNGE:
20274 if (code == GE || !TARGET_IEEE_FP)
20276 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
20277 code = EQ;
20279 else
20281 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20282 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20283 code = NE;
20285 break;
20286 case LE:
20287 case UNLE:
20288 if (code == LE && TARGET_IEEE_FP)
20290 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20291 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20292 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20293 intcmp_mode = CCmode;
20294 code = LTU;
20296 else
20298 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20299 code = NE;
20301 break;
20302 case EQ:
20303 case UNEQ:
20304 if (code == EQ && TARGET_IEEE_FP)
20306 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20307 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20308 intcmp_mode = CCmode;
20309 code = EQ;
20311 else
20313 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20314 code = NE;
20316 break;
20317 case NE:
20318 case LTGT:
20319 if (code == NE && TARGET_IEEE_FP)
20321 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20322 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20323 GEN_INT (0x40)));
20324 code = NE;
20326 else
20328 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20329 code = EQ;
20331 break;
20333 case UNORDERED:
20334 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20335 code = NE;
20336 break;
20337 case ORDERED:
20338 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20339 code = EQ;
20340 break;
20342 default:
20343 gcc_unreachable ();
20345 break;
20347 default:
20348 gcc_unreachable();
20351 /* Return the test that should be put into the flags user, i.e.
20352 the bcc, scc, or cmov instruction. */
20353 return gen_rtx_fmt_ee (code, VOIDmode,
20354 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20355 const0_rtx);
20358 static rtx
20359 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20361 rtx ret;
20363 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20364 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20366 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20368 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20369 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20371 else
20372 ret = ix86_expand_int_compare (code, op0, op1);
20374 return ret;
20377 void
20378 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20380 machine_mode mode = GET_MODE (op0);
20381 rtx tmp;
20383 switch (mode)
20385 case SFmode:
20386 case DFmode:
20387 case XFmode:
20388 case QImode:
20389 case HImode:
20390 case SImode:
20391 simple:
20392 tmp = ix86_expand_compare (code, op0, op1);
20393 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20394 gen_rtx_LABEL_REF (VOIDmode, label),
20395 pc_rtx);
20396 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
20397 return;
20399 case DImode:
20400 if (TARGET_64BIT)
20401 goto simple;
20402 case TImode:
20403 /* Expand DImode branch into multiple compare+branch. */
20405 rtx lo[2], hi[2];
20406 rtx_code_label *label2;
20407 enum rtx_code code1, code2, code3;
20408 machine_mode submode;
20410 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20412 std::swap (op0, op1);
20413 code = swap_condition (code);
20416 split_double_mode (mode, &op0, 1, lo+0, hi+0);
20417 split_double_mode (mode, &op1, 1, lo+1, hi+1);
20419 submode = mode == DImode ? SImode : DImode;
20421 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20422 avoid two branches. This costs one extra insn, so disable when
20423 optimizing for size. */
20425 if ((code == EQ || code == NE)
20426 && (!optimize_insn_for_size_p ()
20427 || hi[1] == const0_rtx || lo[1] == const0_rtx))
20429 rtx xor0, xor1;
20431 xor1 = hi[0];
20432 if (hi[1] != const0_rtx)
20433 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20434 NULL_RTX, 0, OPTAB_WIDEN);
20436 xor0 = lo[0];
20437 if (lo[1] != const0_rtx)
20438 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20439 NULL_RTX, 0, OPTAB_WIDEN);
20441 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20442 NULL_RTX, 0, OPTAB_WIDEN);
20444 ix86_expand_branch (code, tmp, const0_rtx, label);
20445 return;
20448 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20449 op1 is a constant and the low word is zero, then we can just
20450 examine the high word. Similarly for low word -1 and
20451 less-or-equal-than or greater-than. */
20453 if (CONST_INT_P (hi[1]))
20454 switch (code)
20456 case LT: case LTU: case GE: case GEU:
20457 if (lo[1] == const0_rtx)
20459 ix86_expand_branch (code, hi[0], hi[1], label);
20460 return;
20462 break;
20463 case LE: case LEU: case GT: case GTU:
20464 if (lo[1] == constm1_rtx)
20466 ix86_expand_branch (code, hi[0], hi[1], label);
20467 return;
20469 break;
20470 default:
20471 break;
20474 /* Otherwise, we need two or three jumps. */
20476 label2 = gen_label_rtx ();
20478 code1 = code;
20479 code2 = swap_condition (code);
20480 code3 = unsigned_condition (code);
20482 switch (code)
20484 case LT: case GT: case LTU: case GTU:
20485 break;
20487 case LE: code1 = LT; code2 = GT; break;
20488 case GE: code1 = GT; code2 = LT; break;
20489 case LEU: code1 = LTU; code2 = GTU; break;
20490 case GEU: code1 = GTU; code2 = LTU; break;
20492 case EQ: code1 = UNKNOWN; code2 = NE; break;
20493 case NE: code2 = UNKNOWN; break;
20495 default:
20496 gcc_unreachable ();
20500 * a < b =>
20501 * if (hi(a) < hi(b)) goto true;
20502 * if (hi(a) > hi(b)) goto false;
20503 * if (lo(a) < lo(b)) goto true;
20504 * false:
20507 if (code1 != UNKNOWN)
20508 ix86_expand_branch (code1, hi[0], hi[1], label);
20509 if (code2 != UNKNOWN)
20510 ix86_expand_branch (code2, hi[0], hi[1], label2);
20512 ix86_expand_branch (code3, lo[0], lo[1], label);
20514 if (code2 != UNKNOWN)
20515 emit_label (label2);
20516 return;
20519 default:
20520 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20521 goto simple;
20525 /* Split branch based on floating point condition. */
20526 void
20527 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20528 rtx target1, rtx target2, rtx tmp)
20530 rtx condition;
20531 rtx i;
20533 if (target2 != pc_rtx)
20535 std::swap (target1, target2);
20536 code = reverse_condition_maybe_unordered (code);
20539 condition = ix86_expand_fp_compare (code, op1, op2,
20540 tmp);
20542 i = emit_jump_insn (gen_rtx_SET
20543 (pc_rtx,
20544 gen_rtx_IF_THEN_ELSE (VOIDmode,
20545 condition, target1, target2)));
20546 if (split_branch_probability >= 0)
20547 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20550 void
20551 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20553 rtx ret;
20555 gcc_assert (GET_MODE (dest) == QImode);
20557 ret = ix86_expand_compare (code, op0, op1);
20558 PUT_MODE (ret, QImode);
20559 emit_insn (gen_rtx_SET (dest, ret));
20562 /* Expand comparison setting or clearing carry flag. Return true when
20563 successful and set pop for the operation. */
20564 static bool
20565 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20567 machine_mode mode =
20568 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20570 /* Do not handle double-mode compares that go through special path. */
20571 if (mode == (TARGET_64BIT ? TImode : DImode))
20572 return false;
20574 if (SCALAR_FLOAT_MODE_P (mode))
20576 rtx compare_op;
20577 rtx_insn *compare_seq;
20579 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20581 /* Shortcut: following common codes never translate
20582 into carry flag compares. */
20583 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20584 || code == ORDERED || code == UNORDERED)
20585 return false;
20587 /* These comparisons require zero flag; swap operands so they won't. */
20588 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20589 && !TARGET_IEEE_FP)
20591 std::swap (op0, op1);
20592 code = swap_condition (code);
20595 /* Try to expand the comparison and verify that we end up with
20596 carry flag based comparison. This fails to be true only when
20597 we decide to expand comparison using arithmetic that is not
20598 too common scenario. */
20599 start_sequence ();
20600 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20601 compare_seq = get_insns ();
20602 end_sequence ();
20604 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20605 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20606 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20607 else
20608 code = GET_CODE (compare_op);
20610 if (code != LTU && code != GEU)
20611 return false;
20613 emit_insn (compare_seq);
20614 *pop = compare_op;
20615 return true;
20618 if (!INTEGRAL_MODE_P (mode))
20619 return false;
20621 switch (code)
20623 case LTU:
20624 case GEU:
20625 break;
20627 /* Convert a==0 into (unsigned)a<1. */
20628 case EQ:
20629 case NE:
20630 if (op1 != const0_rtx)
20631 return false;
20632 op1 = const1_rtx;
20633 code = (code == EQ ? LTU : GEU);
20634 break;
20636 /* Convert a>b into b<a or a>=b-1. */
20637 case GTU:
20638 case LEU:
20639 if (CONST_INT_P (op1))
20641 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20642 /* Bail out on overflow. We still can swap operands but that
20643 would force loading of the constant into register. */
20644 if (op1 == const0_rtx
20645 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20646 return false;
20647 code = (code == GTU ? GEU : LTU);
20649 else
20651 std::swap (op0, op1);
20652 code = (code == GTU ? LTU : GEU);
20654 break;
20656 /* Convert a>=0 into (unsigned)a<0x80000000. */
20657 case LT:
20658 case GE:
20659 if (mode == DImode || op1 != const0_rtx)
20660 return false;
20661 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20662 code = (code == LT ? GEU : LTU);
20663 break;
20664 case LE:
20665 case GT:
20666 if (mode == DImode || op1 != constm1_rtx)
20667 return false;
20668 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20669 code = (code == LE ? GEU : LTU);
20670 break;
20672 default:
20673 return false;
20675 /* Swapping operands may cause constant to appear as first operand. */
20676 if (!nonimmediate_operand (op0, VOIDmode))
20678 if (!can_create_pseudo_p ())
20679 return false;
20680 op0 = force_reg (mode, op0);
20682 *pop = ix86_expand_compare (code, op0, op1);
20683 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20684 return true;
20687 bool
20688 ix86_expand_int_movcc (rtx operands[])
20690 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20691 rtx_insn *compare_seq;
20692 rtx compare_op;
20693 machine_mode mode = GET_MODE (operands[0]);
20694 bool sign_bit_compare_p = false;
20695 rtx op0 = XEXP (operands[1], 0);
20696 rtx op1 = XEXP (operands[1], 1);
20698 if (GET_MODE (op0) == TImode
20699 || (GET_MODE (op0) == DImode
20700 && !TARGET_64BIT))
20701 return false;
20703 start_sequence ();
20704 compare_op = ix86_expand_compare (code, op0, op1);
20705 compare_seq = get_insns ();
20706 end_sequence ();
20708 compare_code = GET_CODE (compare_op);
20710 if ((op1 == const0_rtx && (code == GE || code == LT))
20711 || (op1 == constm1_rtx && (code == GT || code == LE)))
20712 sign_bit_compare_p = true;
20714 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20715 HImode insns, we'd be swallowed in word prefix ops. */
20717 if ((mode != HImode || TARGET_FAST_PREFIX)
20718 && (mode != (TARGET_64BIT ? TImode : DImode))
20719 && CONST_INT_P (operands[2])
20720 && CONST_INT_P (operands[3]))
20722 rtx out = operands[0];
20723 HOST_WIDE_INT ct = INTVAL (operands[2]);
20724 HOST_WIDE_INT cf = INTVAL (operands[3]);
20725 HOST_WIDE_INT diff;
20727 diff = ct - cf;
20728 /* Sign bit compares are better done using shifts than we do by using
20729 sbb. */
20730 if (sign_bit_compare_p
20731 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20733 /* Detect overlap between destination and compare sources. */
20734 rtx tmp = out;
20736 if (!sign_bit_compare_p)
20738 rtx flags;
20739 bool fpcmp = false;
20741 compare_code = GET_CODE (compare_op);
20743 flags = XEXP (compare_op, 0);
20745 if (GET_MODE (flags) == CCFPmode
20746 || GET_MODE (flags) == CCFPUmode)
20748 fpcmp = true;
20749 compare_code
20750 = ix86_fp_compare_code_to_integer (compare_code);
20753 /* To simplify rest of code, restrict to the GEU case. */
20754 if (compare_code == LTU)
20756 std::swap (ct, cf);
20757 compare_code = reverse_condition (compare_code);
20758 code = reverse_condition (code);
20760 else
20762 if (fpcmp)
20763 PUT_CODE (compare_op,
20764 reverse_condition_maybe_unordered
20765 (GET_CODE (compare_op)));
20766 else
20767 PUT_CODE (compare_op,
20768 reverse_condition (GET_CODE (compare_op)));
20770 diff = ct - cf;
20772 if (reg_overlap_mentioned_p (out, op0)
20773 || reg_overlap_mentioned_p (out, op1))
20774 tmp = gen_reg_rtx (mode);
20776 if (mode == DImode)
20777 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20778 else
20779 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20780 flags, compare_op));
20782 else
20784 if (code == GT || code == GE)
20785 code = reverse_condition (code);
20786 else
20788 std::swap (ct, cf);
20789 diff = ct - cf;
20791 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20794 if (diff == 1)
20797 * cmpl op0,op1
20798 * sbbl dest,dest
20799 * [addl dest, ct]
20801 * Size 5 - 8.
20803 if (ct)
20804 tmp = expand_simple_binop (mode, PLUS,
20805 tmp, GEN_INT (ct),
20806 copy_rtx (tmp), 1, OPTAB_DIRECT);
20808 else if (cf == -1)
20811 * cmpl op0,op1
20812 * sbbl dest,dest
20813 * orl $ct, dest
20815 * Size 8.
20817 tmp = expand_simple_binop (mode, IOR,
20818 tmp, GEN_INT (ct),
20819 copy_rtx (tmp), 1, OPTAB_DIRECT);
20821 else if (diff == -1 && ct)
20824 * cmpl op0,op1
20825 * sbbl dest,dest
20826 * notl dest
20827 * [addl dest, cf]
20829 * Size 8 - 11.
20831 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20832 if (cf)
20833 tmp = expand_simple_binop (mode, PLUS,
20834 copy_rtx (tmp), GEN_INT (cf),
20835 copy_rtx (tmp), 1, OPTAB_DIRECT);
20837 else
20840 * cmpl op0,op1
20841 * sbbl dest,dest
20842 * [notl dest]
20843 * andl cf - ct, dest
20844 * [addl dest, ct]
20846 * Size 8 - 11.
20849 if (cf == 0)
20851 cf = ct;
20852 ct = 0;
20853 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20856 tmp = expand_simple_binop (mode, AND,
20857 copy_rtx (tmp),
20858 gen_int_mode (cf - ct, mode),
20859 copy_rtx (tmp), 1, OPTAB_DIRECT);
20860 if (ct)
20861 tmp = expand_simple_binop (mode, PLUS,
20862 copy_rtx (tmp), GEN_INT (ct),
20863 copy_rtx (tmp), 1, OPTAB_DIRECT);
20866 if (!rtx_equal_p (tmp, out))
20867 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20869 return true;
20872 if (diff < 0)
20874 machine_mode cmp_mode = GET_MODE (op0);
20875 enum rtx_code new_code;
20877 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20879 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20881 /* We may be reversing unordered compare to normal compare, that
20882 is not valid in general (we may convert non-trapping condition
20883 to trapping one), however on i386 we currently emit all
20884 comparisons unordered. */
20885 new_code = reverse_condition_maybe_unordered (code);
20887 else
20888 new_code = ix86_reverse_condition (code, cmp_mode);
20889 if (new_code != UNKNOWN)
20891 std::swap (ct, cf);
20892 diff = -diff;
20893 code = new_code;
20897 compare_code = UNKNOWN;
20898 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20899 && CONST_INT_P (op1))
20901 if (op1 == const0_rtx
20902 && (code == LT || code == GE))
20903 compare_code = code;
20904 else if (op1 == constm1_rtx)
20906 if (code == LE)
20907 compare_code = LT;
20908 else if (code == GT)
20909 compare_code = GE;
20913 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20914 if (compare_code != UNKNOWN
20915 && GET_MODE (op0) == GET_MODE (out)
20916 && (cf == -1 || ct == -1))
20918 /* If lea code below could be used, only optimize
20919 if it results in a 2 insn sequence. */
20921 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20922 || diff == 3 || diff == 5 || diff == 9)
20923 || (compare_code == LT && ct == -1)
20924 || (compare_code == GE && cf == -1))
20927 * notl op1 (if necessary)
20928 * sarl $31, op1
20929 * orl cf, op1
20931 if (ct != -1)
20933 cf = ct;
20934 ct = -1;
20935 code = reverse_condition (code);
20938 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20940 out = expand_simple_binop (mode, IOR,
20941 out, GEN_INT (cf),
20942 out, 1, OPTAB_DIRECT);
20943 if (out != operands[0])
20944 emit_move_insn (operands[0], out);
20946 return true;
20951 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20952 || diff == 3 || diff == 5 || diff == 9)
20953 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20954 && (mode != DImode
20955 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20958 * xorl dest,dest
20959 * cmpl op1,op2
20960 * setcc dest
20961 * lea cf(dest*(ct-cf)),dest
20963 * Size 14.
20965 * This also catches the degenerate setcc-only case.
20968 rtx tmp;
20969 int nops;
20971 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20973 nops = 0;
20974 /* On x86_64 the lea instruction operates on Pmode, so we need
20975 to get arithmetics done in proper mode to match. */
20976 if (diff == 1)
20977 tmp = copy_rtx (out);
20978 else
20980 rtx out1;
20981 out1 = copy_rtx (out);
20982 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20983 nops++;
20984 if (diff & 1)
20986 tmp = gen_rtx_PLUS (mode, tmp, out1);
20987 nops++;
20990 if (cf != 0)
20992 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20993 nops++;
20995 if (!rtx_equal_p (tmp, out))
20997 if (nops == 1)
20998 out = force_operand (tmp, copy_rtx (out));
20999 else
21000 emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp)));
21002 if (!rtx_equal_p (out, operands[0]))
21003 emit_move_insn (operands[0], copy_rtx (out));
21005 return true;
21009 * General case: Jumpful:
21010 * xorl dest,dest cmpl op1, op2
21011 * cmpl op1, op2 movl ct, dest
21012 * setcc dest jcc 1f
21013 * decl dest movl cf, dest
21014 * andl (cf-ct),dest 1:
21015 * addl ct,dest
21017 * Size 20. Size 14.
21019 * This is reasonably steep, but branch mispredict costs are
21020 * high on modern cpus, so consider failing only if optimizing
21021 * for space.
21024 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21025 && BRANCH_COST (optimize_insn_for_speed_p (),
21026 false) >= 2)
21028 if (cf == 0)
21030 machine_mode cmp_mode = GET_MODE (op0);
21031 enum rtx_code new_code;
21033 if (SCALAR_FLOAT_MODE_P (cmp_mode))
21035 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
21037 /* We may be reversing unordered compare to normal compare,
21038 that is not valid in general (we may convert non-trapping
21039 condition to trapping one), however on i386 we currently
21040 emit all comparisons unordered. */
21041 new_code = reverse_condition_maybe_unordered (code);
21043 else
21045 new_code = ix86_reverse_condition (code, cmp_mode);
21046 if (compare_code != UNKNOWN && new_code != UNKNOWN)
21047 compare_code = reverse_condition (compare_code);
21050 if (new_code != UNKNOWN)
21052 cf = ct;
21053 ct = 0;
21054 code = new_code;
21058 if (compare_code != UNKNOWN)
21060 /* notl op1 (if needed)
21061 sarl $31, op1
21062 andl (cf-ct), op1
21063 addl ct, op1
21065 For x < 0 (resp. x <= -1) there will be no notl,
21066 so if possible swap the constants to get rid of the
21067 complement.
21068 True/false will be -1/0 while code below (store flag
21069 followed by decrement) is 0/-1, so the constants need
21070 to be exchanged once more. */
21072 if (compare_code == GE || !cf)
21074 code = reverse_condition (code);
21075 compare_code = LT;
21077 else
21078 std::swap (ct, cf);
21080 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21082 else
21084 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21086 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
21087 constm1_rtx,
21088 copy_rtx (out), 1, OPTAB_DIRECT);
21091 out = expand_simple_binop (mode, AND, copy_rtx (out),
21092 gen_int_mode (cf - ct, mode),
21093 copy_rtx (out), 1, OPTAB_DIRECT);
21094 if (ct)
21095 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
21096 copy_rtx (out), 1, OPTAB_DIRECT);
21097 if (!rtx_equal_p (out, operands[0]))
21098 emit_move_insn (operands[0], copy_rtx (out));
21100 return true;
21104 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21106 /* Try a few things more with specific constants and a variable. */
21108 optab op;
21109 rtx var, orig_out, out, tmp;
21111 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
21112 return false;
21114 /* If one of the two operands is an interesting constant, load a
21115 constant with the above and mask it in with a logical operation. */
21117 if (CONST_INT_P (operands[2]))
21119 var = operands[3];
21120 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
21121 operands[3] = constm1_rtx, op = and_optab;
21122 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
21123 operands[3] = const0_rtx, op = ior_optab;
21124 else
21125 return false;
21127 else if (CONST_INT_P (operands[3]))
21129 var = operands[2];
21130 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
21131 operands[2] = constm1_rtx, op = and_optab;
21132 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
21133 operands[2] = const0_rtx, op = ior_optab;
21134 else
21135 return false;
21137 else
21138 return false;
21140 orig_out = operands[0];
21141 tmp = gen_reg_rtx (mode);
21142 operands[0] = tmp;
21144 /* Recurse to get the constant loaded. */
21145 if (ix86_expand_int_movcc (operands) == 0)
21146 return false;
21148 /* Mask in the interesting variable. */
21149 out = expand_binop (mode, op, var, tmp, orig_out, 0,
21150 OPTAB_WIDEN);
21151 if (!rtx_equal_p (out, orig_out))
21152 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
21154 return true;
21158 * For comparison with above,
21160 * movl cf,dest
21161 * movl ct,tmp
21162 * cmpl op1,op2
21163 * cmovcc tmp,dest
21165 * Size 15.
21168 if (! nonimmediate_operand (operands[2], mode))
21169 operands[2] = force_reg (mode, operands[2]);
21170 if (! nonimmediate_operand (operands[3], mode))
21171 operands[3] = force_reg (mode, operands[3]);
21173 if (! register_operand (operands[2], VOIDmode)
21174 && (mode == QImode
21175 || ! register_operand (operands[3], VOIDmode)))
21176 operands[2] = force_reg (mode, operands[2]);
21178 if (mode == QImode
21179 && ! register_operand (operands[3], VOIDmode))
21180 operands[3] = force_reg (mode, operands[3]);
21182 emit_insn (compare_seq);
21183 emit_insn (gen_rtx_SET (operands[0],
21184 gen_rtx_IF_THEN_ELSE (mode,
21185 compare_op, operands[2],
21186 operands[3])));
21187 return true;
21190 /* Swap, force into registers, or otherwise massage the two operands
21191 to an sse comparison with a mask result. Thus we differ a bit from
21192 ix86_prepare_fp_compare_args which expects to produce a flags result.
21194 The DEST operand exists to help determine whether to commute commutative
21195 operators. The POP0/POP1 operands are updated in place. The new
21196 comparison code is returned, or UNKNOWN if not implementable. */
21198 static enum rtx_code
21199 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
21200 rtx *pop0, rtx *pop1)
21202 switch (code)
21204 case LTGT:
21205 case UNEQ:
21206 /* AVX supports all the needed comparisons. */
21207 if (TARGET_AVX)
21208 break;
21209 /* We have no LTGT as an operator. We could implement it with
21210 NE & ORDERED, but this requires an extra temporary. It's
21211 not clear that it's worth it. */
21212 return UNKNOWN;
21214 case LT:
21215 case LE:
21216 case UNGT:
21217 case UNGE:
21218 /* These are supported directly. */
21219 break;
21221 case EQ:
21222 case NE:
21223 case UNORDERED:
21224 case ORDERED:
21225 /* AVX has 3 operand comparisons, no need to swap anything. */
21226 if (TARGET_AVX)
21227 break;
21228 /* For commutative operators, try to canonicalize the destination
21229 operand to be first in the comparison - this helps reload to
21230 avoid extra moves. */
21231 if (!dest || !rtx_equal_p (dest, *pop1))
21232 break;
21233 /* FALLTHRU */
21235 case GE:
21236 case GT:
21237 case UNLE:
21238 case UNLT:
21239 /* These are not supported directly before AVX, and furthermore
21240 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
21241 comparison operands to transform into something that is
21242 supported. */
21243 std::swap (*pop0, *pop1);
21244 code = swap_condition (code);
21245 break;
21247 default:
21248 gcc_unreachable ();
21251 return code;
21254 /* Detect conditional moves that exactly match min/max operational
21255 semantics. Note that this is IEEE safe, as long as we don't
21256 interchange the operands.
21258 Returns FALSE if this conditional move doesn't match a MIN/MAX,
21259 and TRUE if the operation is successful and instructions are emitted. */
21261 static bool
21262 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
21263 rtx cmp_op1, rtx if_true, rtx if_false)
21265 machine_mode mode;
21266 bool is_min;
21267 rtx tmp;
21269 if (code == LT)
21271 else if (code == UNGE)
21272 std::swap (if_true, if_false);
21273 else
21274 return false;
21276 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21277 is_min = true;
21278 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21279 is_min = false;
21280 else
21281 return false;
21283 mode = GET_MODE (dest);
21285 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21286 but MODE may be a vector mode and thus not appropriate. */
21287 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21289 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21290 rtvec v;
21292 if_true = force_reg (mode, if_true);
21293 v = gen_rtvec (2, if_true, if_false);
21294 tmp = gen_rtx_UNSPEC (mode, v, u);
21296 else
21298 code = is_min ? SMIN : SMAX;
21299 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21302 emit_insn (gen_rtx_SET (dest, tmp));
21303 return true;
21306 /* Expand an sse vector comparison. Return the register with the result. */
21308 static rtx
21309 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21310 rtx op_true, rtx op_false)
21312 machine_mode mode = GET_MODE (dest);
21313 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21315 /* In general case result of comparison can differ from operands' type. */
21316 machine_mode cmp_mode;
21318 /* In AVX512F the result of comparison is an integer mask. */
21319 bool maskcmp = false;
21320 rtx x;
21322 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21324 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21325 gcc_assert (cmp_mode != BLKmode);
21327 maskcmp = true;
21329 else
21330 cmp_mode = cmp_ops_mode;
21333 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21334 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21335 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21337 if (optimize
21338 || reg_overlap_mentioned_p (dest, op_true)
21339 || reg_overlap_mentioned_p (dest, op_false))
21340 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21342 /* Compare patterns for int modes are unspec in AVX512F only. */
21343 if (maskcmp && (code == GT || code == EQ))
21345 rtx (*gen)(rtx, rtx, rtx);
21347 switch (cmp_ops_mode)
21349 case V64QImode:
21350 gcc_assert (TARGET_AVX512BW);
21351 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
21352 break;
21353 case V32HImode:
21354 gcc_assert (TARGET_AVX512BW);
21355 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
21356 break;
21357 case V16SImode:
21358 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21359 break;
21360 case V8DImode:
21361 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21362 break;
21363 default:
21364 gen = NULL;
21367 if (gen)
21369 emit_insn (gen (dest, cmp_op0, cmp_op1));
21370 return dest;
21373 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21375 if (cmp_mode != mode && !maskcmp)
21377 x = force_reg (cmp_ops_mode, x);
21378 convert_move (dest, x, false);
21380 else
21381 emit_insn (gen_rtx_SET (dest, x));
21383 return dest;
21386 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21387 operations. This is used for both scalar and vector conditional moves. */
21389 static void
21390 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21392 machine_mode mode = GET_MODE (dest);
21393 machine_mode cmpmode = GET_MODE (cmp);
21395 /* In AVX512F the result of comparison is an integer mask. */
21396 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21398 rtx t2, t3, x;
21400 if (vector_all_ones_operand (op_true, mode)
21401 && rtx_equal_p (op_false, CONST0_RTX (mode))
21402 && !maskcmp)
21404 emit_insn (gen_rtx_SET (dest, cmp));
21406 else if (op_false == CONST0_RTX (mode)
21407 && !maskcmp)
21409 op_true = force_reg (mode, op_true);
21410 x = gen_rtx_AND (mode, cmp, op_true);
21411 emit_insn (gen_rtx_SET (dest, x));
21413 else if (op_true == CONST0_RTX (mode)
21414 && !maskcmp)
21416 op_false = force_reg (mode, op_false);
21417 x = gen_rtx_NOT (mode, cmp);
21418 x = gen_rtx_AND (mode, x, op_false);
21419 emit_insn (gen_rtx_SET (dest, x));
21421 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21422 && !maskcmp)
21424 op_false = force_reg (mode, op_false);
21425 x = gen_rtx_IOR (mode, cmp, op_false);
21426 emit_insn (gen_rtx_SET (dest, x));
21428 else if (TARGET_XOP
21429 && !maskcmp)
21431 op_true = force_reg (mode, op_true);
21433 if (!nonimmediate_operand (op_false, mode))
21434 op_false = force_reg (mode, op_false);
21436 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp,
21437 op_true,
21438 op_false)));
21440 else
21442 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21443 rtx d = dest;
21445 if (!nonimmediate_operand (op_true, mode))
21446 op_true = force_reg (mode, op_true);
21448 op_false = force_reg (mode, op_false);
21450 switch (mode)
21452 case V4SFmode:
21453 if (TARGET_SSE4_1)
21454 gen = gen_sse4_1_blendvps;
21455 break;
21456 case V2DFmode:
21457 if (TARGET_SSE4_1)
21458 gen = gen_sse4_1_blendvpd;
21459 break;
21460 case V16QImode:
21461 case V8HImode:
21462 case V4SImode:
21463 case V2DImode:
21464 if (TARGET_SSE4_1)
21466 gen = gen_sse4_1_pblendvb;
21467 if (mode != V16QImode)
21468 d = gen_reg_rtx (V16QImode);
21469 op_false = gen_lowpart (V16QImode, op_false);
21470 op_true = gen_lowpart (V16QImode, op_true);
21471 cmp = gen_lowpart (V16QImode, cmp);
21473 break;
21474 case V8SFmode:
21475 if (TARGET_AVX)
21476 gen = gen_avx_blendvps256;
21477 break;
21478 case V4DFmode:
21479 if (TARGET_AVX)
21480 gen = gen_avx_blendvpd256;
21481 break;
21482 case V32QImode:
21483 case V16HImode:
21484 case V8SImode:
21485 case V4DImode:
21486 if (TARGET_AVX2)
21488 gen = gen_avx2_pblendvb;
21489 if (mode != V32QImode)
21490 d = gen_reg_rtx (V32QImode);
21491 op_false = gen_lowpart (V32QImode, op_false);
21492 op_true = gen_lowpart (V32QImode, op_true);
21493 cmp = gen_lowpart (V32QImode, cmp);
21495 break;
21497 case V64QImode:
21498 gen = gen_avx512bw_blendmv64qi;
21499 break;
21500 case V32HImode:
21501 gen = gen_avx512bw_blendmv32hi;
21502 break;
21503 case V16SImode:
21504 gen = gen_avx512f_blendmv16si;
21505 break;
21506 case V8DImode:
21507 gen = gen_avx512f_blendmv8di;
21508 break;
21509 case V8DFmode:
21510 gen = gen_avx512f_blendmv8df;
21511 break;
21512 case V16SFmode:
21513 gen = gen_avx512f_blendmv16sf;
21514 break;
21516 default:
21517 break;
21520 if (gen != NULL)
21522 emit_insn (gen (d, op_false, op_true, cmp));
21523 if (d != dest)
21524 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21526 else
21528 op_true = force_reg (mode, op_true);
21530 t2 = gen_reg_rtx (mode);
21531 if (optimize)
21532 t3 = gen_reg_rtx (mode);
21533 else
21534 t3 = dest;
21536 x = gen_rtx_AND (mode, op_true, cmp);
21537 emit_insn (gen_rtx_SET (t2, x));
21539 x = gen_rtx_NOT (mode, cmp);
21540 x = gen_rtx_AND (mode, x, op_false);
21541 emit_insn (gen_rtx_SET (t3, x));
21543 x = gen_rtx_IOR (mode, t3, t2);
21544 emit_insn (gen_rtx_SET (dest, x));
21549 /* Expand a floating-point conditional move. Return true if successful. */
21551 bool
21552 ix86_expand_fp_movcc (rtx operands[])
21554 machine_mode mode = GET_MODE (operands[0]);
21555 enum rtx_code code = GET_CODE (operands[1]);
21556 rtx tmp, compare_op;
21557 rtx op0 = XEXP (operands[1], 0);
21558 rtx op1 = XEXP (operands[1], 1);
21560 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21562 machine_mode cmode;
21564 /* Since we've no cmove for sse registers, don't force bad register
21565 allocation just to gain access to it. Deny movcc when the
21566 comparison mode doesn't match the move mode. */
21567 cmode = GET_MODE (op0);
21568 if (cmode == VOIDmode)
21569 cmode = GET_MODE (op1);
21570 if (cmode != mode)
21571 return false;
21573 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21574 if (code == UNKNOWN)
21575 return false;
21577 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21578 operands[2], operands[3]))
21579 return true;
21581 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21582 operands[2], operands[3]);
21583 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21584 return true;
21587 if (GET_MODE (op0) == TImode
21588 || (GET_MODE (op0) == DImode
21589 && !TARGET_64BIT))
21590 return false;
21592 /* The floating point conditional move instructions don't directly
21593 support conditions resulting from a signed integer comparison. */
21595 compare_op = ix86_expand_compare (code, op0, op1);
21596 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21598 tmp = gen_reg_rtx (QImode);
21599 ix86_expand_setcc (tmp, code, op0, op1);
21601 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21604 emit_insn (gen_rtx_SET (operands[0],
21605 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21606 operands[2], operands[3])));
21608 return true;
21611 /* Expand a floating-point vector conditional move; a vcond operation
21612 rather than a movcc operation. */
21614 bool
21615 ix86_expand_fp_vcond (rtx operands[])
21617 enum rtx_code code = GET_CODE (operands[3]);
21618 rtx cmp;
21620 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21621 &operands[4], &operands[5]);
21622 if (code == UNKNOWN)
21624 rtx temp;
21625 switch (GET_CODE (operands[3]))
21627 case LTGT:
21628 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21629 operands[5], operands[0], operands[0]);
21630 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21631 operands[5], operands[1], operands[2]);
21632 code = AND;
21633 break;
21634 case UNEQ:
21635 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21636 operands[5], operands[0], operands[0]);
21637 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21638 operands[5], operands[1], operands[2]);
21639 code = IOR;
21640 break;
21641 default:
21642 gcc_unreachable ();
21644 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21645 OPTAB_DIRECT);
21646 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21647 return true;
21650 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21651 operands[5], operands[1], operands[2]))
21652 return true;
21654 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21655 operands[1], operands[2]);
21656 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21657 return true;
21660 /* Expand a signed/unsigned integral vector conditional move. */
21662 bool
21663 ix86_expand_int_vcond (rtx operands[])
21665 machine_mode data_mode = GET_MODE (operands[0]);
21666 machine_mode mode = GET_MODE (operands[4]);
21667 enum rtx_code code = GET_CODE (operands[3]);
21668 bool negate = false;
21669 rtx x, cop0, cop1;
21671 cop0 = operands[4];
21672 cop1 = operands[5];
21674 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21675 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21676 if ((code == LT || code == GE)
21677 && data_mode == mode
21678 && cop1 == CONST0_RTX (mode)
21679 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21680 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21681 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21682 && (GET_MODE_SIZE (data_mode) == 16
21683 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21685 rtx negop = operands[2 - (code == LT)];
21686 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21687 if (negop == CONST1_RTX (data_mode))
21689 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21690 operands[0], 1, OPTAB_DIRECT);
21691 if (res != operands[0])
21692 emit_move_insn (operands[0], res);
21693 return true;
21695 else if (GET_MODE_INNER (data_mode) != DImode
21696 && vector_all_ones_operand (negop, data_mode))
21698 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21699 operands[0], 0, OPTAB_DIRECT);
21700 if (res != operands[0])
21701 emit_move_insn (operands[0], res);
21702 return true;
21706 if (!nonimmediate_operand (cop1, mode))
21707 cop1 = force_reg (mode, cop1);
21708 if (!general_operand (operands[1], data_mode))
21709 operands[1] = force_reg (data_mode, operands[1]);
21710 if (!general_operand (operands[2], data_mode))
21711 operands[2] = force_reg (data_mode, operands[2]);
21713 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21714 if (TARGET_XOP
21715 && (mode == V16QImode || mode == V8HImode
21716 || mode == V4SImode || mode == V2DImode))
21718 else
21720 /* Canonicalize the comparison to EQ, GT, GTU. */
21721 switch (code)
21723 case EQ:
21724 case GT:
21725 case GTU:
21726 break;
21728 case NE:
21729 case LE:
21730 case LEU:
21731 code = reverse_condition (code);
21732 negate = true;
21733 break;
21735 case GE:
21736 case GEU:
21737 code = reverse_condition (code);
21738 negate = true;
21739 /* FALLTHRU */
21741 case LT:
21742 case LTU:
21743 std::swap (cop0, cop1);
21744 code = swap_condition (code);
21745 break;
21747 default:
21748 gcc_unreachable ();
21751 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21752 if (mode == V2DImode)
21754 switch (code)
21756 case EQ:
21757 /* SSE4.1 supports EQ. */
21758 if (!TARGET_SSE4_1)
21759 return false;
21760 break;
21762 case GT:
21763 case GTU:
21764 /* SSE4.2 supports GT/GTU. */
21765 if (!TARGET_SSE4_2)
21766 return false;
21767 break;
21769 default:
21770 gcc_unreachable ();
21774 /* Unsigned parallel compare is not supported by the hardware.
21775 Play some tricks to turn this into a signed comparison
21776 against 0. */
21777 if (code == GTU)
21779 cop0 = force_reg (mode, cop0);
21781 switch (mode)
21783 case V16SImode:
21784 case V8DImode:
21785 case V8SImode:
21786 case V4DImode:
21787 case V4SImode:
21788 case V2DImode:
21790 rtx t1, t2, mask;
21791 rtx (*gen_sub3) (rtx, rtx, rtx);
21793 switch (mode)
21795 case V16SImode: gen_sub3 = gen_subv16si3; break;
21796 case V8DImode: gen_sub3 = gen_subv8di3; break;
21797 case V8SImode: gen_sub3 = gen_subv8si3; break;
21798 case V4DImode: gen_sub3 = gen_subv4di3; break;
21799 case V4SImode: gen_sub3 = gen_subv4si3; break;
21800 case V2DImode: gen_sub3 = gen_subv2di3; break;
21801 default:
21802 gcc_unreachable ();
21804 /* Subtract (-(INT MAX) - 1) from both operands to make
21805 them signed. */
21806 mask = ix86_build_signbit_mask (mode, true, false);
21807 t1 = gen_reg_rtx (mode);
21808 emit_insn (gen_sub3 (t1, cop0, mask));
21810 t2 = gen_reg_rtx (mode);
21811 emit_insn (gen_sub3 (t2, cop1, mask));
21813 cop0 = t1;
21814 cop1 = t2;
21815 code = GT;
21817 break;
21819 case V64QImode:
21820 case V32HImode:
21821 case V32QImode:
21822 case V16HImode:
21823 case V16QImode:
21824 case V8HImode:
21825 /* Perform a parallel unsigned saturating subtraction. */
21826 x = gen_reg_rtx (mode);
21827 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0, cop1)));
21829 cop0 = x;
21830 cop1 = CONST0_RTX (mode);
21831 code = EQ;
21832 negate = !negate;
21833 break;
21835 default:
21836 gcc_unreachable ();
21841 /* Allow the comparison to be done in one mode, but the movcc to
21842 happen in another mode. */
21843 if (data_mode == mode)
21845 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21846 operands[1+negate], operands[2-negate]);
21848 else
21850 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21851 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21852 operands[1+negate], operands[2-negate]);
21853 if (GET_MODE (x) == mode)
21854 x = gen_lowpart (data_mode, x);
21857 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21858 operands[2-negate]);
21859 return true;
21862 /* AVX512F does support 64-byte integer vector operations,
21863 thus the longest vector we are faced with is V64QImode. */
21864 #define MAX_VECT_LEN 64
21866 struct expand_vec_perm_d
21868 rtx target, op0, op1;
21869 unsigned char perm[MAX_VECT_LEN];
21870 machine_mode vmode;
21871 unsigned char nelt;
21872 bool one_operand_p;
21873 bool testing_p;
21876 static bool
21877 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
21878 struct expand_vec_perm_d *d)
21880 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21881 expander, so args are either in d, or in op0, op1 etc. */
21882 machine_mode mode = GET_MODE (d ? d->op0 : op0);
21883 machine_mode maskmode = mode;
21884 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21886 switch (mode)
21888 case V8HImode:
21889 if (TARGET_AVX512VL && TARGET_AVX512BW)
21890 gen = gen_avx512vl_vpermi2varv8hi3;
21891 break;
21892 case V16HImode:
21893 if (TARGET_AVX512VL && TARGET_AVX512BW)
21894 gen = gen_avx512vl_vpermi2varv16hi3;
21895 break;
21896 case V64QImode:
21897 if (TARGET_AVX512VBMI)
21898 gen = gen_avx512bw_vpermi2varv64qi3;
21899 break;
21900 case V32HImode:
21901 if (TARGET_AVX512BW)
21902 gen = gen_avx512bw_vpermi2varv32hi3;
21903 break;
21904 case V4SImode:
21905 if (TARGET_AVX512VL)
21906 gen = gen_avx512vl_vpermi2varv4si3;
21907 break;
21908 case V8SImode:
21909 if (TARGET_AVX512VL)
21910 gen = gen_avx512vl_vpermi2varv8si3;
21911 break;
21912 case V16SImode:
21913 if (TARGET_AVX512F)
21914 gen = gen_avx512f_vpermi2varv16si3;
21915 break;
21916 case V4SFmode:
21917 if (TARGET_AVX512VL)
21919 gen = gen_avx512vl_vpermi2varv4sf3;
21920 maskmode = V4SImode;
21922 break;
21923 case V8SFmode:
21924 if (TARGET_AVX512VL)
21926 gen = gen_avx512vl_vpermi2varv8sf3;
21927 maskmode = V8SImode;
21929 break;
21930 case V16SFmode:
21931 if (TARGET_AVX512F)
21933 gen = gen_avx512f_vpermi2varv16sf3;
21934 maskmode = V16SImode;
21936 break;
21937 case V2DImode:
21938 if (TARGET_AVX512VL)
21939 gen = gen_avx512vl_vpermi2varv2di3;
21940 break;
21941 case V4DImode:
21942 if (TARGET_AVX512VL)
21943 gen = gen_avx512vl_vpermi2varv4di3;
21944 break;
21945 case V8DImode:
21946 if (TARGET_AVX512F)
21947 gen = gen_avx512f_vpermi2varv8di3;
21948 break;
21949 case V2DFmode:
21950 if (TARGET_AVX512VL)
21952 gen = gen_avx512vl_vpermi2varv2df3;
21953 maskmode = V2DImode;
21955 break;
21956 case V4DFmode:
21957 if (TARGET_AVX512VL)
21959 gen = gen_avx512vl_vpermi2varv4df3;
21960 maskmode = V4DImode;
21962 break;
21963 case V8DFmode:
21964 if (TARGET_AVX512F)
21966 gen = gen_avx512f_vpermi2varv8df3;
21967 maskmode = V8DImode;
21969 break;
21970 default:
21971 break;
21974 if (gen == NULL)
21975 return false;
21977 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21978 expander, so args are either in d, or in op0, op1 etc. */
21979 if (d)
21981 rtx vec[64];
21982 target = d->target;
21983 op0 = d->op0;
21984 op1 = d->op1;
21985 for (int i = 0; i < d->nelt; ++i)
21986 vec[i] = GEN_INT (d->perm[i]);
21987 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
21990 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
21991 return true;
21994 /* Expand a variable vector permutation. */
21996 void
21997 ix86_expand_vec_perm (rtx operands[])
21999 rtx target = operands[0];
22000 rtx op0 = operands[1];
22001 rtx op1 = operands[2];
22002 rtx mask = operands[3];
22003 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
22004 machine_mode mode = GET_MODE (op0);
22005 machine_mode maskmode = GET_MODE (mask);
22006 int w, e, i;
22007 bool one_operand_shuffle = rtx_equal_p (op0, op1);
22009 /* Number of elements in the vector. */
22010 w = GET_MODE_NUNITS (mode);
22011 e = GET_MODE_UNIT_SIZE (mode);
22012 gcc_assert (w <= 64);
22014 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
22015 return;
22017 if (TARGET_AVX2)
22019 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
22021 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
22022 an constant shuffle operand. With a tiny bit of effort we can
22023 use VPERMD instead. A re-interpretation stall for V4DFmode is
22024 unfortunate but there's no avoiding it.
22025 Similarly for V16HImode we don't have instructions for variable
22026 shuffling, while for V32QImode we can use after preparing suitable
22027 masks vpshufb; vpshufb; vpermq; vpor. */
22029 if (mode == V16HImode)
22031 maskmode = mode = V32QImode;
22032 w = 32;
22033 e = 1;
22035 else
22037 maskmode = mode = V8SImode;
22038 w = 8;
22039 e = 4;
22041 t1 = gen_reg_rtx (maskmode);
22043 /* Replicate the low bits of the V4DImode mask into V8SImode:
22044 mask = { A B C D }
22045 t1 = { A A B B C C D D }. */
22046 for (i = 0; i < w / 2; ++i)
22047 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
22048 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22049 vt = force_reg (maskmode, vt);
22050 mask = gen_lowpart (maskmode, mask);
22051 if (maskmode == V8SImode)
22052 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
22053 else
22054 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
22056 /* Multiply the shuffle indicies by two. */
22057 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
22058 OPTAB_DIRECT);
22060 /* Add one to the odd shuffle indicies:
22061 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
22062 for (i = 0; i < w / 2; ++i)
22064 vec[i * 2] = const0_rtx;
22065 vec[i * 2 + 1] = const1_rtx;
22067 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22068 vt = validize_mem (force_const_mem (maskmode, vt));
22069 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
22070 OPTAB_DIRECT);
22072 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
22073 operands[3] = mask = t1;
22074 target = gen_reg_rtx (mode);
22075 op0 = gen_lowpart (mode, op0);
22076 op1 = gen_lowpart (mode, op1);
22079 switch (mode)
22081 case V8SImode:
22082 /* The VPERMD and VPERMPS instructions already properly ignore
22083 the high bits of the shuffle elements. No need for us to
22084 perform an AND ourselves. */
22085 if (one_operand_shuffle)
22087 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
22088 if (target != operands[0])
22089 emit_move_insn (operands[0],
22090 gen_lowpart (GET_MODE (operands[0]), target));
22092 else
22094 t1 = gen_reg_rtx (V8SImode);
22095 t2 = gen_reg_rtx (V8SImode);
22096 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
22097 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
22098 goto merge_two;
22100 return;
22102 case V8SFmode:
22103 mask = gen_lowpart (V8SImode, mask);
22104 if (one_operand_shuffle)
22105 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
22106 else
22108 t1 = gen_reg_rtx (V8SFmode);
22109 t2 = gen_reg_rtx (V8SFmode);
22110 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
22111 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
22112 goto merge_two;
22114 return;
22116 case V4SImode:
22117 /* By combining the two 128-bit input vectors into one 256-bit
22118 input vector, we can use VPERMD and VPERMPS for the full
22119 two-operand shuffle. */
22120 t1 = gen_reg_rtx (V8SImode);
22121 t2 = gen_reg_rtx (V8SImode);
22122 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
22123 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22124 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
22125 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
22126 return;
22128 case V4SFmode:
22129 t1 = gen_reg_rtx (V8SFmode);
22130 t2 = gen_reg_rtx (V8SImode);
22131 mask = gen_lowpart (V4SImode, mask);
22132 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
22133 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22134 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
22135 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
22136 return;
22138 case V32QImode:
22139 t1 = gen_reg_rtx (V32QImode);
22140 t2 = gen_reg_rtx (V32QImode);
22141 t3 = gen_reg_rtx (V32QImode);
22142 vt2 = GEN_INT (-128);
22143 for (i = 0; i < 32; i++)
22144 vec[i] = vt2;
22145 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22146 vt = force_reg (V32QImode, vt);
22147 for (i = 0; i < 32; i++)
22148 vec[i] = i < 16 ? vt2 : const0_rtx;
22149 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22150 vt2 = force_reg (V32QImode, vt2);
22151 /* From mask create two adjusted masks, which contain the same
22152 bits as mask in the low 7 bits of each vector element.
22153 The first mask will have the most significant bit clear
22154 if it requests element from the same 128-bit lane
22155 and MSB set if it requests element from the other 128-bit lane.
22156 The second mask will have the opposite values of the MSB,
22157 and additionally will have its 128-bit lanes swapped.
22158 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
22159 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
22160 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
22161 stands for other 12 bytes. */
22162 /* The bit whether element is from the same lane or the other
22163 lane is bit 4, so shift it up by 3 to the MSB position. */
22164 t5 = gen_reg_rtx (V4DImode);
22165 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
22166 GEN_INT (3)));
22167 /* Clear MSB bits from the mask just in case it had them set. */
22168 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
22169 /* After this t1 will have MSB set for elements from other lane. */
22170 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
22171 /* Clear bits other than MSB. */
22172 emit_insn (gen_andv32qi3 (t1, t1, vt));
22173 /* Or in the lower bits from mask into t3. */
22174 emit_insn (gen_iorv32qi3 (t3, t1, t2));
22175 /* And invert MSB bits in t1, so MSB is set for elements from the same
22176 lane. */
22177 emit_insn (gen_xorv32qi3 (t1, t1, vt));
22178 /* Swap 128-bit lanes in t3. */
22179 t6 = gen_reg_rtx (V4DImode);
22180 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
22181 const2_rtx, GEN_INT (3),
22182 const0_rtx, const1_rtx));
22183 /* And or in the lower bits from mask into t1. */
22184 emit_insn (gen_iorv32qi3 (t1, t1, t2));
22185 if (one_operand_shuffle)
22187 /* Each of these shuffles will put 0s in places where
22188 element from the other 128-bit lane is needed, otherwise
22189 will shuffle in the requested value. */
22190 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
22191 gen_lowpart (V32QImode, t6)));
22192 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
22193 /* For t3 the 128-bit lanes are swapped again. */
22194 t7 = gen_reg_rtx (V4DImode);
22195 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
22196 const2_rtx, GEN_INT (3),
22197 const0_rtx, const1_rtx));
22198 /* And oring both together leads to the result. */
22199 emit_insn (gen_iorv32qi3 (target, t1,
22200 gen_lowpart (V32QImode, t7)));
22201 if (target != operands[0])
22202 emit_move_insn (operands[0],
22203 gen_lowpart (GET_MODE (operands[0]), target));
22204 return;
22207 t4 = gen_reg_rtx (V32QImode);
22208 /* Similarly to the above one_operand_shuffle code,
22209 just for repeated twice for each operand. merge_two:
22210 code will merge the two results together. */
22211 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
22212 gen_lowpart (V32QImode, t6)));
22213 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
22214 gen_lowpart (V32QImode, t6)));
22215 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
22216 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
22217 t7 = gen_reg_rtx (V4DImode);
22218 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
22219 const2_rtx, GEN_INT (3),
22220 const0_rtx, const1_rtx));
22221 t8 = gen_reg_rtx (V4DImode);
22222 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
22223 const2_rtx, GEN_INT (3),
22224 const0_rtx, const1_rtx));
22225 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
22226 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
22227 t1 = t4;
22228 t2 = t3;
22229 goto merge_two;
22231 default:
22232 gcc_assert (GET_MODE_SIZE (mode) <= 16);
22233 break;
22237 if (TARGET_XOP)
22239 /* The XOP VPPERM insn supports three inputs. By ignoring the
22240 one_operand_shuffle special case, we avoid creating another
22241 set of constant vectors in memory. */
22242 one_operand_shuffle = false;
22244 /* mask = mask & {2*w-1, ...} */
22245 vt = GEN_INT (2*w - 1);
22247 else
22249 /* mask = mask & {w-1, ...} */
22250 vt = GEN_INT (w - 1);
22253 for (i = 0; i < w; i++)
22254 vec[i] = vt;
22255 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22256 mask = expand_simple_binop (maskmode, AND, mask, vt,
22257 NULL_RTX, 0, OPTAB_DIRECT);
22259 /* For non-QImode operations, convert the word permutation control
22260 into a byte permutation control. */
22261 if (mode != V16QImode)
22263 mask = expand_simple_binop (maskmode, ASHIFT, mask,
22264 GEN_INT (exact_log2 (e)),
22265 NULL_RTX, 0, OPTAB_DIRECT);
22267 /* Convert mask to vector of chars. */
22268 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
22270 /* Replicate each of the input bytes into byte positions:
22271 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
22272 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
22273 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
22274 for (i = 0; i < 16; ++i)
22275 vec[i] = GEN_INT (i/e * e);
22276 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22277 vt = validize_mem (force_const_mem (V16QImode, vt));
22278 if (TARGET_XOP)
22279 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
22280 else
22281 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
22283 /* Convert it into the byte positions by doing
22284 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
22285 for (i = 0; i < 16; ++i)
22286 vec[i] = GEN_INT (i % e);
22287 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22288 vt = validize_mem (force_const_mem (V16QImode, vt));
22289 emit_insn (gen_addv16qi3 (mask, mask, vt));
22292 /* The actual shuffle operations all operate on V16QImode. */
22293 op0 = gen_lowpart (V16QImode, op0);
22294 op1 = gen_lowpart (V16QImode, op1);
22296 if (TARGET_XOP)
22298 if (GET_MODE (target) != V16QImode)
22299 target = gen_reg_rtx (V16QImode);
22300 emit_insn (gen_xop_pperm (target, op0, op1, mask));
22301 if (target != operands[0])
22302 emit_move_insn (operands[0],
22303 gen_lowpart (GET_MODE (operands[0]), target));
22305 else if (one_operand_shuffle)
22307 if (GET_MODE (target) != V16QImode)
22308 target = gen_reg_rtx (V16QImode);
22309 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
22310 if (target != operands[0])
22311 emit_move_insn (operands[0],
22312 gen_lowpart (GET_MODE (operands[0]), target));
22314 else
22316 rtx xops[6];
22317 bool ok;
22319 /* Shuffle the two input vectors independently. */
22320 t1 = gen_reg_rtx (V16QImode);
22321 t2 = gen_reg_rtx (V16QImode);
22322 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
22323 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
22325 merge_two:
22326 /* Then merge them together. The key is whether any given control
22327 element contained a bit set that indicates the second word. */
22328 mask = operands[3];
22329 vt = GEN_INT (w);
22330 if (maskmode == V2DImode && !TARGET_SSE4_1)
22332 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
22333 more shuffle to convert the V2DI input mask into a V4SI
22334 input mask. At which point the masking that expand_int_vcond
22335 will work as desired. */
22336 rtx t3 = gen_reg_rtx (V4SImode);
22337 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
22338 const0_rtx, const0_rtx,
22339 const2_rtx, const2_rtx));
22340 mask = t3;
22341 maskmode = V4SImode;
22342 e = w = 4;
22345 for (i = 0; i < w; i++)
22346 vec[i] = vt;
22347 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22348 vt = force_reg (maskmode, vt);
22349 mask = expand_simple_binop (maskmode, AND, mask, vt,
22350 NULL_RTX, 0, OPTAB_DIRECT);
22352 if (GET_MODE (target) != mode)
22353 target = gen_reg_rtx (mode);
22354 xops[0] = target;
22355 xops[1] = gen_lowpart (mode, t2);
22356 xops[2] = gen_lowpart (mode, t1);
22357 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
22358 xops[4] = mask;
22359 xops[5] = vt;
22360 ok = ix86_expand_int_vcond (xops);
22361 gcc_assert (ok);
22362 if (target != operands[0])
22363 emit_move_insn (operands[0],
22364 gen_lowpart (GET_MODE (operands[0]), target));
22368 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
22369 true if we should do zero extension, else sign extension. HIGH_P is
22370 true if we want the N/2 high elements, else the low elements. */
22372 void
22373 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
22375 machine_mode imode = GET_MODE (src);
22376 rtx tmp;
22378 if (TARGET_SSE4_1)
22380 rtx (*unpack)(rtx, rtx);
22381 rtx (*extract)(rtx, rtx) = NULL;
22382 machine_mode halfmode = BLKmode;
22384 switch (imode)
22386 case V64QImode:
22387 if (unsigned_p)
22388 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
22389 else
22390 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
22391 halfmode = V32QImode;
22392 extract
22393 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
22394 break;
22395 case V32QImode:
22396 if (unsigned_p)
22397 unpack = gen_avx2_zero_extendv16qiv16hi2;
22398 else
22399 unpack = gen_avx2_sign_extendv16qiv16hi2;
22400 halfmode = V16QImode;
22401 extract
22402 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22403 break;
22404 case V32HImode:
22405 if (unsigned_p)
22406 unpack = gen_avx512f_zero_extendv16hiv16si2;
22407 else
22408 unpack = gen_avx512f_sign_extendv16hiv16si2;
22409 halfmode = V16HImode;
22410 extract
22411 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22412 break;
22413 case V16HImode:
22414 if (unsigned_p)
22415 unpack = gen_avx2_zero_extendv8hiv8si2;
22416 else
22417 unpack = gen_avx2_sign_extendv8hiv8si2;
22418 halfmode = V8HImode;
22419 extract
22420 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22421 break;
22422 case V16SImode:
22423 if (unsigned_p)
22424 unpack = gen_avx512f_zero_extendv8siv8di2;
22425 else
22426 unpack = gen_avx512f_sign_extendv8siv8di2;
22427 halfmode = V8SImode;
22428 extract
22429 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22430 break;
22431 case V8SImode:
22432 if (unsigned_p)
22433 unpack = gen_avx2_zero_extendv4siv4di2;
22434 else
22435 unpack = gen_avx2_sign_extendv4siv4di2;
22436 halfmode = V4SImode;
22437 extract
22438 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22439 break;
22440 case V16QImode:
22441 if (unsigned_p)
22442 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22443 else
22444 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22445 break;
22446 case V8HImode:
22447 if (unsigned_p)
22448 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22449 else
22450 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22451 break;
22452 case V4SImode:
22453 if (unsigned_p)
22454 unpack = gen_sse4_1_zero_extendv2siv2di2;
22455 else
22456 unpack = gen_sse4_1_sign_extendv2siv2di2;
22457 break;
22458 default:
22459 gcc_unreachable ();
22462 if (GET_MODE_SIZE (imode) >= 32)
22464 tmp = gen_reg_rtx (halfmode);
22465 emit_insn (extract (tmp, src));
22467 else if (high_p)
22469 /* Shift higher 8 bytes to lower 8 bytes. */
22470 tmp = gen_reg_rtx (V1TImode);
22471 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22472 GEN_INT (64)));
22473 tmp = gen_lowpart (imode, tmp);
22475 else
22476 tmp = src;
22478 emit_insn (unpack (dest, tmp));
22480 else
22482 rtx (*unpack)(rtx, rtx, rtx);
22484 switch (imode)
22486 case V16QImode:
22487 if (high_p)
22488 unpack = gen_vec_interleave_highv16qi;
22489 else
22490 unpack = gen_vec_interleave_lowv16qi;
22491 break;
22492 case V8HImode:
22493 if (high_p)
22494 unpack = gen_vec_interleave_highv8hi;
22495 else
22496 unpack = gen_vec_interleave_lowv8hi;
22497 break;
22498 case V4SImode:
22499 if (high_p)
22500 unpack = gen_vec_interleave_highv4si;
22501 else
22502 unpack = gen_vec_interleave_lowv4si;
22503 break;
22504 default:
22505 gcc_unreachable ();
22508 if (unsigned_p)
22509 tmp = force_reg (imode, CONST0_RTX (imode));
22510 else
22511 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22512 src, pc_rtx, pc_rtx);
22514 rtx tmp2 = gen_reg_rtx (imode);
22515 emit_insn (unpack (tmp2, src, tmp));
22516 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22520 /* Expand conditional increment or decrement using adb/sbb instructions.
22521 The default case using setcc followed by the conditional move can be
22522 done by generic code. */
22523 bool
22524 ix86_expand_int_addcc (rtx operands[])
22526 enum rtx_code code = GET_CODE (operands[1]);
22527 rtx flags;
22528 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22529 rtx compare_op;
22530 rtx val = const0_rtx;
22531 bool fpcmp = false;
22532 machine_mode mode;
22533 rtx op0 = XEXP (operands[1], 0);
22534 rtx op1 = XEXP (operands[1], 1);
22536 if (operands[3] != const1_rtx
22537 && operands[3] != constm1_rtx)
22538 return false;
22539 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22540 return false;
22541 code = GET_CODE (compare_op);
22543 flags = XEXP (compare_op, 0);
22545 if (GET_MODE (flags) == CCFPmode
22546 || GET_MODE (flags) == CCFPUmode)
22548 fpcmp = true;
22549 code = ix86_fp_compare_code_to_integer (code);
22552 if (code != LTU)
22554 val = constm1_rtx;
22555 if (fpcmp)
22556 PUT_CODE (compare_op,
22557 reverse_condition_maybe_unordered
22558 (GET_CODE (compare_op)));
22559 else
22560 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22563 mode = GET_MODE (operands[0]);
22565 /* Construct either adc or sbb insn. */
22566 if ((code == LTU) == (operands[3] == constm1_rtx))
22568 switch (mode)
22570 case QImode:
22571 insn = gen_subqi3_carry;
22572 break;
22573 case HImode:
22574 insn = gen_subhi3_carry;
22575 break;
22576 case SImode:
22577 insn = gen_subsi3_carry;
22578 break;
22579 case DImode:
22580 insn = gen_subdi3_carry;
22581 break;
22582 default:
22583 gcc_unreachable ();
22586 else
22588 switch (mode)
22590 case QImode:
22591 insn = gen_addqi3_carry;
22592 break;
22593 case HImode:
22594 insn = gen_addhi3_carry;
22595 break;
22596 case SImode:
22597 insn = gen_addsi3_carry;
22598 break;
22599 case DImode:
22600 insn = gen_adddi3_carry;
22601 break;
22602 default:
22603 gcc_unreachable ();
22606 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22608 return true;
22612 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22613 but works for floating pointer parameters and nonoffsetable memories.
22614 For pushes, it returns just stack offsets; the values will be saved
22615 in the right order. Maximally three parts are generated. */
22617 static int
22618 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
22620 int size;
22622 if (!TARGET_64BIT)
22623 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22624 else
22625 size = (GET_MODE_SIZE (mode) + 4) / 8;
22627 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22628 gcc_assert (size >= 2 && size <= 4);
22630 /* Optimize constant pool reference to immediates. This is used by fp
22631 moves, that force all constants to memory to allow combining. */
22632 if (MEM_P (operand) && MEM_READONLY_P (operand))
22634 rtx tmp = maybe_get_pool_constant (operand);
22635 if (tmp)
22636 operand = tmp;
22639 if (MEM_P (operand) && !offsettable_memref_p (operand))
22641 /* The only non-offsetable memories we handle are pushes. */
22642 int ok = push_operand (operand, VOIDmode);
22644 gcc_assert (ok);
22646 operand = copy_rtx (operand);
22647 PUT_MODE (operand, word_mode);
22648 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22649 return size;
22652 if (GET_CODE (operand) == CONST_VECTOR)
22654 machine_mode imode = int_mode_for_mode (mode);
22655 /* Caution: if we looked through a constant pool memory above,
22656 the operand may actually have a different mode now. That's
22657 ok, since we want to pun this all the way back to an integer. */
22658 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22659 gcc_assert (operand != NULL);
22660 mode = imode;
22663 if (!TARGET_64BIT)
22665 if (mode == DImode)
22666 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22667 else
22669 int i;
22671 if (REG_P (operand))
22673 gcc_assert (reload_completed);
22674 for (i = 0; i < size; i++)
22675 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22677 else if (offsettable_memref_p (operand))
22679 operand = adjust_address (operand, SImode, 0);
22680 parts[0] = operand;
22681 for (i = 1; i < size; i++)
22682 parts[i] = adjust_address (operand, SImode, 4 * i);
22684 else if (CONST_DOUBLE_P (operand))
22686 REAL_VALUE_TYPE r;
22687 long l[4];
22689 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22690 switch (mode)
22692 case TFmode:
22693 real_to_target (l, &r, mode);
22694 parts[3] = gen_int_mode (l[3], SImode);
22695 parts[2] = gen_int_mode (l[2], SImode);
22696 break;
22697 case XFmode:
22698 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22699 long double may not be 80-bit. */
22700 real_to_target (l, &r, mode);
22701 parts[2] = gen_int_mode (l[2], SImode);
22702 break;
22703 case DFmode:
22704 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22705 break;
22706 default:
22707 gcc_unreachable ();
22709 parts[1] = gen_int_mode (l[1], SImode);
22710 parts[0] = gen_int_mode (l[0], SImode);
22712 else
22713 gcc_unreachable ();
22716 else
22718 if (mode == TImode)
22719 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22720 if (mode == XFmode || mode == TFmode)
22722 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22723 if (REG_P (operand))
22725 gcc_assert (reload_completed);
22726 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22727 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22729 else if (offsettable_memref_p (operand))
22731 operand = adjust_address (operand, DImode, 0);
22732 parts[0] = operand;
22733 parts[1] = adjust_address (operand, upper_mode, 8);
22735 else if (CONST_DOUBLE_P (operand))
22737 REAL_VALUE_TYPE r;
22738 long l[4];
22740 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22741 real_to_target (l, &r, mode);
22743 /* real_to_target puts 32-bit pieces in each long. */
22744 parts[0] =
22745 gen_int_mode
22746 ((l[0] & (HOST_WIDE_INT) 0xffffffff)
22747 | ((l[1] & (HOST_WIDE_INT) 0xffffffff) << 32),
22748 DImode);
22750 if (upper_mode == SImode)
22751 parts[1] = gen_int_mode (l[2], SImode);
22752 else
22753 parts[1] =
22754 gen_int_mode
22755 ((l[2] & (HOST_WIDE_INT) 0xffffffff)
22756 | ((l[3] & (HOST_WIDE_INT) 0xffffffff) << 32),
22757 DImode);
22759 else
22760 gcc_unreachable ();
22764 return size;
22767 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22768 Return false when normal moves are needed; true when all required
22769 insns have been emitted. Operands 2-4 contain the input values
22770 int the correct order; operands 5-7 contain the output values. */
22772 void
22773 ix86_split_long_move (rtx operands[])
22775 rtx part[2][4];
22776 int nparts, i, j;
22777 int push = 0;
22778 int collisions = 0;
22779 machine_mode mode = GET_MODE (operands[0]);
22780 bool collisionparts[4];
22782 /* The DFmode expanders may ask us to move double.
22783 For 64bit target this is single move. By hiding the fact
22784 here we simplify i386.md splitters. */
22785 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22787 /* Optimize constant pool reference to immediates. This is used by
22788 fp moves, that force all constants to memory to allow combining. */
22790 if (MEM_P (operands[1])
22791 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22792 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22793 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22794 if (push_operand (operands[0], VOIDmode))
22796 operands[0] = copy_rtx (operands[0]);
22797 PUT_MODE (operands[0], word_mode);
22799 else
22800 operands[0] = gen_lowpart (DImode, operands[0]);
22801 operands[1] = gen_lowpart (DImode, operands[1]);
22802 emit_move_insn (operands[0], operands[1]);
22803 return;
22806 /* The only non-offsettable memory we handle is push. */
22807 if (push_operand (operands[0], VOIDmode))
22808 push = 1;
22809 else
22810 gcc_assert (!MEM_P (operands[0])
22811 || offsettable_memref_p (operands[0]));
22813 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22814 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22816 /* When emitting push, take care for source operands on the stack. */
22817 if (push && MEM_P (operands[1])
22818 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22820 rtx src_base = XEXP (part[1][nparts - 1], 0);
22822 /* Compensate for the stack decrement by 4. */
22823 if (!TARGET_64BIT && nparts == 3
22824 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22825 src_base = plus_constant (Pmode, src_base, 4);
22827 /* src_base refers to the stack pointer and is
22828 automatically decreased by emitted push. */
22829 for (i = 0; i < nparts; i++)
22830 part[1][i] = change_address (part[1][i],
22831 GET_MODE (part[1][i]), src_base);
22834 /* We need to do copy in the right order in case an address register
22835 of the source overlaps the destination. */
22836 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22838 rtx tmp;
22840 for (i = 0; i < nparts; i++)
22842 collisionparts[i]
22843 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22844 if (collisionparts[i])
22845 collisions++;
22848 /* Collision in the middle part can be handled by reordering. */
22849 if (collisions == 1 && nparts == 3 && collisionparts [1])
22851 std::swap (part[0][1], part[0][2]);
22852 std::swap (part[1][1], part[1][2]);
22854 else if (collisions == 1
22855 && nparts == 4
22856 && (collisionparts [1] || collisionparts [2]))
22858 if (collisionparts [1])
22860 std::swap (part[0][1], part[0][2]);
22861 std::swap (part[1][1], part[1][2]);
22863 else
22865 std::swap (part[0][2], part[0][3]);
22866 std::swap (part[1][2], part[1][3]);
22870 /* If there are more collisions, we can't handle it by reordering.
22871 Do an lea to the last part and use only one colliding move. */
22872 else if (collisions > 1)
22874 rtx base, addr, tls_base = NULL_RTX;
22876 collisions = 1;
22878 base = part[0][nparts - 1];
22880 /* Handle the case when the last part isn't valid for lea.
22881 Happens in 64-bit mode storing the 12-byte XFmode. */
22882 if (GET_MODE (base) != Pmode)
22883 base = gen_rtx_REG (Pmode, REGNO (base));
22885 addr = XEXP (part[1][0], 0);
22886 if (TARGET_TLS_DIRECT_SEG_REFS)
22888 struct ix86_address parts;
22889 int ok = ix86_decompose_address (addr, &parts);
22890 gcc_assert (ok);
22891 if (parts.seg == DEFAULT_TLS_SEG_REG)
22893 /* It is not valid to use %gs: or %fs: in
22894 lea though, so we need to remove it from the
22895 address used for lea and add it to each individual
22896 memory loads instead. */
22897 addr = copy_rtx (addr);
22898 rtx *x = &addr;
22899 while (GET_CODE (*x) == PLUS)
22901 for (i = 0; i < 2; i++)
22903 rtx u = XEXP (*x, i);
22904 if (GET_CODE (u) == ZERO_EXTEND)
22905 u = XEXP (u, 0);
22906 if (GET_CODE (u) == UNSPEC
22907 && XINT (u, 1) == UNSPEC_TP)
22909 tls_base = XEXP (*x, i);
22910 *x = XEXP (*x, 1 - i);
22911 break;
22914 if (tls_base)
22915 break;
22916 x = &XEXP (*x, 0);
22918 gcc_assert (tls_base);
22921 emit_insn (gen_rtx_SET (base, addr));
22922 if (tls_base)
22923 base = gen_rtx_PLUS (GET_MODE (base), base, tls_base);
22924 part[1][0] = replace_equiv_address (part[1][0], base);
22925 for (i = 1; i < nparts; i++)
22927 if (tls_base)
22928 base = copy_rtx (base);
22929 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22930 part[1][i] = replace_equiv_address (part[1][i], tmp);
22935 if (push)
22937 if (!TARGET_64BIT)
22939 if (nparts == 3)
22941 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22942 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22943 stack_pointer_rtx, GEN_INT (-4)));
22944 emit_move_insn (part[0][2], part[1][2]);
22946 else if (nparts == 4)
22948 emit_move_insn (part[0][3], part[1][3]);
22949 emit_move_insn (part[0][2], part[1][2]);
22952 else
22954 /* In 64bit mode we don't have 32bit push available. In case this is
22955 register, it is OK - we will just use larger counterpart. We also
22956 retype memory - these comes from attempt to avoid REX prefix on
22957 moving of second half of TFmode value. */
22958 if (GET_MODE (part[1][1]) == SImode)
22960 switch (GET_CODE (part[1][1]))
22962 case MEM:
22963 part[1][1] = adjust_address (part[1][1], DImode, 0);
22964 break;
22966 case REG:
22967 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22968 break;
22970 default:
22971 gcc_unreachable ();
22974 if (GET_MODE (part[1][0]) == SImode)
22975 part[1][0] = part[1][1];
22978 emit_move_insn (part[0][1], part[1][1]);
22979 emit_move_insn (part[0][0], part[1][0]);
22980 return;
22983 /* Choose correct order to not overwrite the source before it is copied. */
22984 if ((REG_P (part[0][0])
22985 && REG_P (part[1][1])
22986 && (REGNO (part[0][0]) == REGNO (part[1][1])
22987 || (nparts == 3
22988 && REGNO (part[0][0]) == REGNO (part[1][2]))
22989 || (nparts == 4
22990 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22991 || (collisions > 0
22992 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22994 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22996 operands[2 + i] = part[0][j];
22997 operands[6 + i] = part[1][j];
23000 else
23002 for (i = 0; i < nparts; i++)
23004 operands[2 + i] = part[0][i];
23005 operands[6 + i] = part[1][i];
23009 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
23010 if (optimize_insn_for_size_p ())
23012 for (j = 0; j < nparts - 1; j++)
23013 if (CONST_INT_P (operands[6 + j])
23014 && operands[6 + j] != const0_rtx
23015 && REG_P (operands[2 + j]))
23016 for (i = j; i < nparts - 1; i++)
23017 if (CONST_INT_P (operands[7 + i])
23018 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
23019 operands[7 + i] = operands[2 + j];
23022 for (i = 0; i < nparts; i++)
23023 emit_move_insn (operands[2 + i], operands[6 + i]);
23025 return;
23028 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
23029 left shift by a constant, either using a single shift or
23030 a sequence of add instructions. */
23032 static void
23033 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
23035 rtx (*insn)(rtx, rtx, rtx);
23037 if (count == 1
23038 || (count * ix86_cost->add <= ix86_cost->shift_const
23039 && !optimize_insn_for_size_p ()))
23041 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
23042 while (count-- > 0)
23043 emit_insn (insn (operand, operand, operand));
23045 else
23047 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23048 emit_insn (insn (operand, operand, GEN_INT (count)));
23052 void
23053 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
23055 rtx (*gen_ashl3)(rtx, rtx, rtx);
23056 rtx (*gen_shld)(rtx, rtx, rtx);
23057 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23059 rtx low[2], high[2];
23060 int count;
23062 if (CONST_INT_P (operands[2]))
23064 split_double_mode (mode, operands, 2, low, high);
23065 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23067 if (count >= half_width)
23069 emit_move_insn (high[0], low[1]);
23070 emit_move_insn (low[0], const0_rtx);
23072 if (count > half_width)
23073 ix86_expand_ashl_const (high[0], count - half_width, mode);
23075 else
23077 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23079 if (!rtx_equal_p (operands[0], operands[1]))
23080 emit_move_insn (operands[0], operands[1]);
23082 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
23083 ix86_expand_ashl_const (low[0], count, mode);
23085 return;
23088 split_double_mode (mode, operands, 1, low, high);
23090 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23092 if (operands[1] == const1_rtx)
23094 /* Assuming we've chosen a QImode capable registers, then 1 << N
23095 can be done with two 32/64-bit shifts, no branches, no cmoves. */
23096 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
23098 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
23100 ix86_expand_clear (low[0]);
23101 ix86_expand_clear (high[0]);
23102 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
23104 d = gen_lowpart (QImode, low[0]);
23105 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23106 s = gen_rtx_EQ (QImode, flags, const0_rtx);
23107 emit_insn (gen_rtx_SET (d, s));
23109 d = gen_lowpart (QImode, high[0]);
23110 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23111 s = gen_rtx_NE (QImode, flags, const0_rtx);
23112 emit_insn (gen_rtx_SET (d, s));
23115 /* Otherwise, we can get the same results by manually performing
23116 a bit extract operation on bit 5/6, and then performing the two
23117 shifts. The two methods of getting 0/1 into low/high are exactly
23118 the same size. Avoiding the shift in the bit extract case helps
23119 pentium4 a bit; no one else seems to care much either way. */
23120 else
23122 machine_mode half_mode;
23123 rtx (*gen_lshr3)(rtx, rtx, rtx);
23124 rtx (*gen_and3)(rtx, rtx, rtx);
23125 rtx (*gen_xor3)(rtx, rtx, rtx);
23126 HOST_WIDE_INT bits;
23127 rtx x;
23129 if (mode == DImode)
23131 half_mode = SImode;
23132 gen_lshr3 = gen_lshrsi3;
23133 gen_and3 = gen_andsi3;
23134 gen_xor3 = gen_xorsi3;
23135 bits = 5;
23137 else
23139 half_mode = DImode;
23140 gen_lshr3 = gen_lshrdi3;
23141 gen_and3 = gen_anddi3;
23142 gen_xor3 = gen_xordi3;
23143 bits = 6;
23146 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
23147 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
23148 else
23149 x = gen_lowpart (half_mode, operands[2]);
23150 emit_insn (gen_rtx_SET (high[0], x));
23152 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
23153 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
23154 emit_move_insn (low[0], high[0]);
23155 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
23158 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23159 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
23160 return;
23163 if (operands[1] == constm1_rtx)
23165 /* For -1 << N, we can avoid the shld instruction, because we
23166 know that we're shifting 0...31/63 ones into a -1. */
23167 emit_move_insn (low[0], constm1_rtx);
23168 if (optimize_insn_for_size_p ())
23169 emit_move_insn (high[0], low[0]);
23170 else
23171 emit_move_insn (high[0], constm1_rtx);
23173 else
23175 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23177 if (!rtx_equal_p (operands[0], operands[1]))
23178 emit_move_insn (operands[0], operands[1]);
23180 split_double_mode (mode, operands, 1, low, high);
23181 emit_insn (gen_shld (high[0], low[0], operands[2]));
23184 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23186 if (TARGET_CMOVE && scratch)
23188 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23189 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23191 ix86_expand_clear (scratch);
23192 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
23194 else
23196 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23197 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23199 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
23203 void
23204 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
23206 rtx (*gen_ashr3)(rtx, rtx, rtx)
23207 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
23208 rtx (*gen_shrd)(rtx, rtx, rtx);
23209 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23211 rtx low[2], high[2];
23212 int count;
23214 if (CONST_INT_P (operands[2]))
23216 split_double_mode (mode, operands, 2, low, high);
23217 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23219 if (count == GET_MODE_BITSIZE (mode) - 1)
23221 emit_move_insn (high[0], high[1]);
23222 emit_insn (gen_ashr3 (high[0], high[0],
23223 GEN_INT (half_width - 1)));
23224 emit_move_insn (low[0], high[0]);
23227 else if (count >= half_width)
23229 emit_move_insn (low[0], high[1]);
23230 emit_move_insn (high[0], low[0]);
23231 emit_insn (gen_ashr3 (high[0], high[0],
23232 GEN_INT (half_width - 1)));
23234 if (count > half_width)
23235 emit_insn (gen_ashr3 (low[0], low[0],
23236 GEN_INT (count - half_width)));
23238 else
23240 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23242 if (!rtx_equal_p (operands[0], operands[1]))
23243 emit_move_insn (operands[0], operands[1]);
23245 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23246 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
23249 else
23251 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23253 if (!rtx_equal_p (operands[0], operands[1]))
23254 emit_move_insn (operands[0], operands[1]);
23256 split_double_mode (mode, operands, 1, low, high);
23258 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23259 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
23261 if (TARGET_CMOVE && scratch)
23263 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23264 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23266 emit_move_insn (scratch, high[0]);
23267 emit_insn (gen_ashr3 (scratch, scratch,
23268 GEN_INT (half_width - 1)));
23269 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23270 scratch));
23272 else
23274 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
23275 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
23277 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
23282 void
23283 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
23285 rtx (*gen_lshr3)(rtx, rtx, rtx)
23286 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
23287 rtx (*gen_shrd)(rtx, rtx, rtx);
23288 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23290 rtx low[2], high[2];
23291 int count;
23293 if (CONST_INT_P (operands[2]))
23295 split_double_mode (mode, operands, 2, low, high);
23296 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23298 if (count >= half_width)
23300 emit_move_insn (low[0], high[1]);
23301 ix86_expand_clear (high[0]);
23303 if (count > half_width)
23304 emit_insn (gen_lshr3 (low[0], low[0],
23305 GEN_INT (count - half_width)));
23307 else
23309 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23311 if (!rtx_equal_p (operands[0], operands[1]))
23312 emit_move_insn (operands[0], operands[1]);
23314 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23315 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
23318 else
23320 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23322 if (!rtx_equal_p (operands[0], operands[1]))
23323 emit_move_insn (operands[0], operands[1]);
23325 split_double_mode (mode, operands, 1, low, high);
23327 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23328 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
23330 if (TARGET_CMOVE && scratch)
23332 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23333 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23335 ix86_expand_clear (scratch);
23336 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23337 scratch));
23339 else
23341 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23342 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23344 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
23349 /* Predict just emitted jump instruction to be taken with probability PROB. */
23350 static void
23351 predict_jump (int prob)
23353 rtx insn = get_last_insn ();
23354 gcc_assert (JUMP_P (insn));
23355 add_int_reg_note (insn, REG_BR_PROB, prob);
23358 /* Helper function for the string operations below. Dest VARIABLE whether
23359 it is aligned to VALUE bytes. If true, jump to the label. */
23360 static rtx_code_label *
23361 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
23363 rtx_code_label *label = gen_label_rtx ();
23364 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
23365 if (GET_MODE (variable) == DImode)
23366 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
23367 else
23368 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
23369 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
23370 1, label);
23371 if (epilogue)
23372 predict_jump (REG_BR_PROB_BASE * 50 / 100);
23373 else
23374 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23375 return label;
23378 /* Adjust COUNTER by the VALUE. */
23379 static void
23380 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
23382 rtx (*gen_add)(rtx, rtx, rtx)
23383 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
23385 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
23388 /* Zero extend possibly SImode EXP to Pmode register. */
23390 ix86_zero_extend_to_Pmode (rtx exp)
23392 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
23395 /* Divide COUNTREG by SCALE. */
23396 static rtx
23397 scale_counter (rtx countreg, int scale)
23399 rtx sc;
23401 if (scale == 1)
23402 return countreg;
23403 if (CONST_INT_P (countreg))
23404 return GEN_INT (INTVAL (countreg) / scale);
23405 gcc_assert (REG_P (countreg));
23407 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
23408 GEN_INT (exact_log2 (scale)),
23409 NULL, 1, OPTAB_DIRECT);
23410 return sc;
23413 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
23414 DImode for constant loop counts. */
23416 static machine_mode
23417 counter_mode (rtx count_exp)
23419 if (GET_MODE (count_exp) != VOIDmode)
23420 return GET_MODE (count_exp);
23421 if (!CONST_INT_P (count_exp))
23422 return Pmode;
23423 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
23424 return DImode;
23425 return SImode;
23428 /* Copy the address to a Pmode register. This is used for x32 to
23429 truncate DImode TLS address to a SImode register. */
23431 static rtx
23432 ix86_copy_addr_to_reg (rtx addr)
23434 rtx reg;
23435 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23437 reg = copy_addr_to_reg (addr);
23438 REG_POINTER (reg) = 1;
23439 return reg;
23441 else
23443 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23444 reg = copy_to_mode_reg (DImode, addr);
23445 REG_POINTER (reg) = 1;
23446 return gen_rtx_SUBREG (SImode, reg, 0);
23450 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23451 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23452 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
23453 memory by VALUE (supposed to be in MODE).
23455 The size is rounded down to whole number of chunk size moved at once.
23456 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
23459 static void
23460 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23461 rtx destptr, rtx srcptr, rtx value,
23462 rtx count, machine_mode mode, int unroll,
23463 int expected_size, bool issetmem)
23465 rtx_code_label *out_label, *top_label;
23466 rtx iter, tmp;
23467 machine_mode iter_mode = counter_mode (count);
23468 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23469 rtx piece_size = GEN_INT (piece_size_n);
23470 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23471 rtx size;
23472 int i;
23474 top_label = gen_label_rtx ();
23475 out_label = gen_label_rtx ();
23476 iter = gen_reg_rtx (iter_mode);
23478 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23479 NULL, 1, OPTAB_DIRECT);
23480 /* Those two should combine. */
23481 if (piece_size == const1_rtx)
23483 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23484 true, out_label);
23485 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23487 emit_move_insn (iter, const0_rtx);
23489 emit_label (top_label);
23491 tmp = convert_modes (Pmode, iter_mode, iter, true);
23493 /* This assert could be relaxed - in this case we'll need to compute
23494 smallest power of two, containing in PIECE_SIZE_N and pass it to
23495 offset_address. */
23496 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23497 destmem = offset_address (destmem, tmp, piece_size_n);
23498 destmem = adjust_address (destmem, mode, 0);
23500 if (!issetmem)
23502 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23503 srcmem = adjust_address (srcmem, mode, 0);
23505 /* When unrolling for chips that reorder memory reads and writes,
23506 we can save registers by using single temporary.
23507 Also using 4 temporaries is overkill in 32bit mode. */
23508 if (!TARGET_64BIT && 0)
23510 for (i = 0; i < unroll; i++)
23512 if (i)
23514 destmem =
23515 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23516 srcmem =
23517 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23519 emit_move_insn (destmem, srcmem);
23522 else
23524 rtx tmpreg[4];
23525 gcc_assert (unroll <= 4);
23526 for (i = 0; i < unroll; i++)
23528 tmpreg[i] = gen_reg_rtx (mode);
23529 if (i)
23531 srcmem =
23532 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23534 emit_move_insn (tmpreg[i], srcmem);
23536 for (i = 0; i < unroll; i++)
23538 if (i)
23540 destmem =
23541 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23543 emit_move_insn (destmem, tmpreg[i]);
23547 else
23548 for (i = 0; i < unroll; i++)
23550 if (i)
23551 destmem =
23552 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23553 emit_move_insn (destmem, value);
23556 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23557 true, OPTAB_LIB_WIDEN);
23558 if (tmp != iter)
23559 emit_move_insn (iter, tmp);
23561 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23562 true, top_label);
23563 if (expected_size != -1)
23565 expected_size /= GET_MODE_SIZE (mode) * unroll;
23566 if (expected_size == 0)
23567 predict_jump (0);
23568 else if (expected_size > REG_BR_PROB_BASE)
23569 predict_jump (REG_BR_PROB_BASE - 1);
23570 else
23571 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23573 else
23574 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23575 iter = ix86_zero_extend_to_Pmode (iter);
23576 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23577 true, OPTAB_LIB_WIDEN);
23578 if (tmp != destptr)
23579 emit_move_insn (destptr, tmp);
23580 if (!issetmem)
23582 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23583 true, OPTAB_LIB_WIDEN);
23584 if (tmp != srcptr)
23585 emit_move_insn (srcptr, tmp);
23587 emit_label (out_label);
23590 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23591 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23592 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23593 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23594 ORIG_VALUE is the original value passed to memset to fill the memory with.
23595 Other arguments have same meaning as for previous function. */
23597 static void
23598 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23599 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23600 rtx count,
23601 machine_mode mode, bool issetmem)
23603 rtx destexp;
23604 rtx srcexp;
23605 rtx countreg;
23606 HOST_WIDE_INT rounded_count;
23608 /* If possible, it is shorter to use rep movs.
23609 TODO: Maybe it is better to move this logic to decide_alg. */
23610 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23611 && (!issetmem || orig_value == const0_rtx))
23612 mode = SImode;
23614 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23615 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23617 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23618 GET_MODE_SIZE (mode)));
23619 if (mode != QImode)
23621 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23622 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23623 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23625 else
23626 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23627 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23629 rounded_count = (INTVAL (count)
23630 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23631 destmem = shallow_copy_rtx (destmem);
23632 set_mem_size (destmem, rounded_count);
23634 else if (MEM_SIZE_KNOWN_P (destmem))
23635 clear_mem_size (destmem);
23637 if (issetmem)
23639 value = force_reg (mode, gen_lowpart (mode, value));
23640 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23642 else
23644 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23645 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23646 if (mode != QImode)
23648 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23649 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23650 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23652 else
23653 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23654 if (CONST_INT_P (count))
23656 rounded_count = (INTVAL (count)
23657 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23658 srcmem = shallow_copy_rtx (srcmem);
23659 set_mem_size (srcmem, rounded_count);
23661 else
23663 if (MEM_SIZE_KNOWN_P (srcmem))
23664 clear_mem_size (srcmem);
23666 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23667 destexp, srcexp));
23671 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23672 DESTMEM.
23673 SRC is passed by pointer to be updated on return.
23674 Return value is updated DST. */
23675 static rtx
23676 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23677 HOST_WIDE_INT size_to_move)
23679 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23680 enum insn_code code;
23681 machine_mode move_mode;
23682 int piece_size, i;
23684 /* Find the widest mode in which we could perform moves.
23685 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23686 it until move of such size is supported. */
23687 piece_size = 1 << floor_log2 (size_to_move);
23688 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23689 code = optab_handler (mov_optab, move_mode);
23690 while (code == CODE_FOR_nothing && piece_size > 1)
23692 piece_size >>= 1;
23693 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23694 code = optab_handler (mov_optab, move_mode);
23697 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23698 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23699 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23701 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23702 move_mode = mode_for_vector (word_mode, nunits);
23703 code = optab_handler (mov_optab, move_mode);
23704 if (code == CODE_FOR_nothing)
23706 move_mode = word_mode;
23707 piece_size = GET_MODE_SIZE (move_mode);
23708 code = optab_handler (mov_optab, move_mode);
23711 gcc_assert (code != CODE_FOR_nothing);
23713 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23714 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23716 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23717 gcc_assert (size_to_move % piece_size == 0);
23718 adjust = GEN_INT (piece_size);
23719 for (i = 0; i < size_to_move; i += piece_size)
23721 /* We move from memory to memory, so we'll need to do it via
23722 a temporary register. */
23723 tempreg = gen_reg_rtx (move_mode);
23724 emit_insn (GEN_FCN (code) (tempreg, src));
23725 emit_insn (GEN_FCN (code) (dst, tempreg));
23727 emit_move_insn (destptr,
23728 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23729 emit_move_insn (srcptr,
23730 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23732 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23733 piece_size);
23734 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23735 piece_size);
23738 /* Update DST and SRC rtx. */
23739 *srcmem = src;
23740 return dst;
23743 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23744 static void
23745 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23746 rtx destptr, rtx srcptr, rtx count, int max_size)
23748 rtx src, dest;
23749 if (CONST_INT_P (count))
23751 HOST_WIDE_INT countval = INTVAL (count);
23752 HOST_WIDE_INT epilogue_size = countval % max_size;
23753 int i;
23755 /* For now MAX_SIZE should be a power of 2. This assert could be
23756 relaxed, but it'll require a bit more complicated epilogue
23757 expanding. */
23758 gcc_assert ((max_size & (max_size - 1)) == 0);
23759 for (i = max_size; i >= 1; i >>= 1)
23761 if (epilogue_size & i)
23762 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23764 return;
23766 if (max_size > 8)
23768 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23769 count, 1, OPTAB_DIRECT);
23770 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23771 count, QImode, 1, 4, false);
23772 return;
23775 /* When there are stringops, we can cheaply increase dest and src pointers.
23776 Otherwise we save code size by maintaining offset (zero is readily
23777 available from preceding rep operation) and using x86 addressing modes.
23779 if (TARGET_SINGLE_STRINGOP)
23781 if (max_size > 4)
23783 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23784 src = change_address (srcmem, SImode, srcptr);
23785 dest = change_address (destmem, SImode, destptr);
23786 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23787 emit_label (label);
23788 LABEL_NUSES (label) = 1;
23790 if (max_size > 2)
23792 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23793 src = change_address (srcmem, HImode, srcptr);
23794 dest = change_address (destmem, HImode, destptr);
23795 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23796 emit_label (label);
23797 LABEL_NUSES (label) = 1;
23799 if (max_size > 1)
23801 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23802 src = change_address (srcmem, QImode, srcptr);
23803 dest = change_address (destmem, QImode, destptr);
23804 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23805 emit_label (label);
23806 LABEL_NUSES (label) = 1;
23809 else
23811 rtx offset = force_reg (Pmode, const0_rtx);
23812 rtx tmp;
23814 if (max_size > 4)
23816 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23817 src = change_address (srcmem, SImode, srcptr);
23818 dest = change_address (destmem, SImode, destptr);
23819 emit_move_insn (dest, src);
23820 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23821 true, OPTAB_LIB_WIDEN);
23822 if (tmp != offset)
23823 emit_move_insn (offset, tmp);
23824 emit_label (label);
23825 LABEL_NUSES (label) = 1;
23827 if (max_size > 2)
23829 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23830 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23831 src = change_address (srcmem, HImode, tmp);
23832 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23833 dest = change_address (destmem, HImode, tmp);
23834 emit_move_insn (dest, src);
23835 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23836 true, OPTAB_LIB_WIDEN);
23837 if (tmp != offset)
23838 emit_move_insn (offset, tmp);
23839 emit_label (label);
23840 LABEL_NUSES (label) = 1;
23842 if (max_size > 1)
23844 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23845 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23846 src = change_address (srcmem, QImode, tmp);
23847 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23848 dest = change_address (destmem, QImode, tmp);
23849 emit_move_insn (dest, src);
23850 emit_label (label);
23851 LABEL_NUSES (label) = 1;
23856 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23857 with value PROMOTED_VAL.
23858 SRC is passed by pointer to be updated on return.
23859 Return value is updated DST. */
23860 static rtx
23861 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23862 HOST_WIDE_INT size_to_move)
23864 rtx dst = destmem, adjust;
23865 enum insn_code code;
23866 machine_mode move_mode;
23867 int piece_size, i;
23869 /* Find the widest mode in which we could perform moves.
23870 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23871 it until move of such size is supported. */
23872 move_mode = GET_MODE (promoted_val);
23873 if (move_mode == VOIDmode)
23874 move_mode = QImode;
23875 if (size_to_move < GET_MODE_SIZE (move_mode))
23877 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23878 promoted_val = gen_lowpart (move_mode, promoted_val);
23880 piece_size = GET_MODE_SIZE (move_mode);
23881 code = optab_handler (mov_optab, move_mode);
23882 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23884 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23886 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23887 gcc_assert (size_to_move % piece_size == 0);
23888 adjust = GEN_INT (piece_size);
23889 for (i = 0; i < size_to_move; i += piece_size)
23891 if (piece_size <= GET_MODE_SIZE (word_mode))
23893 emit_insn (gen_strset (destptr, dst, promoted_val));
23894 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23895 piece_size);
23896 continue;
23899 emit_insn (GEN_FCN (code) (dst, promoted_val));
23901 emit_move_insn (destptr,
23902 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23904 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23905 piece_size);
23908 /* Update DST rtx. */
23909 return dst;
23911 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23912 static void
23913 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23914 rtx count, int max_size)
23916 count =
23917 expand_simple_binop (counter_mode (count), AND, count,
23918 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23919 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23920 gen_lowpart (QImode, value), count, QImode,
23921 1, max_size / 2, true);
23924 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23925 static void
23926 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23927 rtx count, int max_size)
23929 rtx dest;
23931 if (CONST_INT_P (count))
23933 HOST_WIDE_INT countval = INTVAL (count);
23934 HOST_WIDE_INT epilogue_size = countval % max_size;
23935 int i;
23937 /* For now MAX_SIZE should be a power of 2. This assert could be
23938 relaxed, but it'll require a bit more complicated epilogue
23939 expanding. */
23940 gcc_assert ((max_size & (max_size - 1)) == 0);
23941 for (i = max_size; i >= 1; i >>= 1)
23943 if (epilogue_size & i)
23945 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23946 destmem = emit_memset (destmem, destptr, vec_value, i);
23947 else
23948 destmem = emit_memset (destmem, destptr, value, i);
23951 return;
23953 if (max_size > 32)
23955 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23956 return;
23958 if (max_size > 16)
23960 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23961 if (TARGET_64BIT)
23963 dest = change_address (destmem, DImode, destptr);
23964 emit_insn (gen_strset (destptr, dest, value));
23965 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23966 emit_insn (gen_strset (destptr, dest, value));
23968 else
23970 dest = change_address (destmem, SImode, destptr);
23971 emit_insn (gen_strset (destptr, dest, value));
23972 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23973 emit_insn (gen_strset (destptr, dest, value));
23974 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23975 emit_insn (gen_strset (destptr, dest, value));
23976 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23977 emit_insn (gen_strset (destptr, dest, value));
23979 emit_label (label);
23980 LABEL_NUSES (label) = 1;
23982 if (max_size > 8)
23984 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
23985 if (TARGET_64BIT)
23987 dest = change_address (destmem, DImode, destptr);
23988 emit_insn (gen_strset (destptr, dest, value));
23990 else
23992 dest = change_address (destmem, SImode, destptr);
23993 emit_insn (gen_strset (destptr, dest, value));
23994 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23995 emit_insn (gen_strset (destptr, dest, value));
23997 emit_label (label);
23998 LABEL_NUSES (label) = 1;
24000 if (max_size > 4)
24002 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
24003 dest = change_address (destmem, SImode, destptr);
24004 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
24005 emit_label (label);
24006 LABEL_NUSES (label) = 1;
24008 if (max_size > 2)
24010 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
24011 dest = change_address (destmem, HImode, destptr);
24012 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
24013 emit_label (label);
24014 LABEL_NUSES (label) = 1;
24016 if (max_size > 1)
24018 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
24019 dest = change_address (destmem, QImode, destptr);
24020 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
24021 emit_label (label);
24022 LABEL_NUSES (label) = 1;
24026 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
24027 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
24028 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
24029 ignored.
24030 Return value is updated DESTMEM. */
24031 static rtx
24032 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
24033 rtx destptr, rtx srcptr, rtx value,
24034 rtx vec_value, rtx count, int align,
24035 int desired_alignment, bool issetmem)
24037 int i;
24038 for (i = 1; i < desired_alignment; i <<= 1)
24040 if (align <= i)
24042 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
24043 if (issetmem)
24045 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
24046 destmem = emit_memset (destmem, destptr, vec_value, i);
24047 else
24048 destmem = emit_memset (destmem, destptr, value, i);
24050 else
24051 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
24052 ix86_adjust_counter (count, i);
24053 emit_label (label);
24054 LABEL_NUSES (label) = 1;
24055 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
24058 return destmem;
24061 /* Test if COUNT&SIZE is nonzero and if so, expand movme
24062 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
24063 and jump to DONE_LABEL. */
24064 static void
24065 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
24066 rtx destptr, rtx srcptr,
24067 rtx value, rtx vec_value,
24068 rtx count, int size,
24069 rtx done_label, bool issetmem)
24071 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
24072 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
24073 rtx modesize;
24074 int n;
24076 /* If we do not have vector value to copy, we must reduce size. */
24077 if (issetmem)
24079 if (!vec_value)
24081 if (GET_MODE (value) == VOIDmode && size > 8)
24082 mode = Pmode;
24083 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
24084 mode = GET_MODE (value);
24086 else
24087 mode = GET_MODE (vec_value), value = vec_value;
24089 else
24091 /* Choose appropriate vector mode. */
24092 if (size >= 32)
24093 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
24094 else if (size >= 16)
24095 mode = TARGET_SSE ? V16QImode : DImode;
24096 srcmem = change_address (srcmem, mode, srcptr);
24098 destmem = change_address (destmem, mode, destptr);
24099 modesize = GEN_INT (GET_MODE_SIZE (mode));
24100 gcc_assert (GET_MODE_SIZE (mode) <= size);
24101 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24103 if (issetmem)
24104 emit_move_insn (destmem, gen_lowpart (mode, value));
24105 else
24107 emit_move_insn (destmem, srcmem);
24108 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24110 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24113 destmem = offset_address (destmem, count, 1);
24114 destmem = offset_address (destmem, GEN_INT (-2 * size),
24115 GET_MODE_SIZE (mode));
24116 if (!issetmem)
24118 srcmem = offset_address (srcmem, count, 1);
24119 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
24120 GET_MODE_SIZE (mode));
24122 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24124 if (issetmem)
24125 emit_move_insn (destmem, gen_lowpart (mode, value));
24126 else
24128 emit_move_insn (destmem, srcmem);
24129 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24131 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24133 emit_jump_insn (gen_jump (done_label));
24134 emit_barrier ();
24136 emit_label (label);
24137 LABEL_NUSES (label) = 1;
24140 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
24141 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
24142 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
24143 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
24144 DONE_LABEL is a label after the whole copying sequence. The label is created
24145 on demand if *DONE_LABEL is NULL.
24146 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
24147 bounds after the initial copies.
24149 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
24150 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
24151 we will dispatch to a library call for large blocks.
24153 In pseudocode we do:
24155 if (COUNT < SIZE)
24157 Assume that SIZE is 4. Bigger sizes are handled analogously
24158 if (COUNT & 4)
24160 copy 4 bytes from SRCPTR to DESTPTR
24161 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
24162 goto done_label
24164 if (!COUNT)
24165 goto done_label;
24166 copy 1 byte from SRCPTR to DESTPTR
24167 if (COUNT & 2)
24169 copy 2 bytes from SRCPTR to DESTPTR
24170 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
24173 else
24175 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
24176 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
24178 OLD_DESPTR = DESTPTR;
24179 Align DESTPTR up to DESIRED_ALIGN
24180 SRCPTR += DESTPTR - OLD_DESTPTR
24181 COUNT -= DEST_PTR - OLD_DESTPTR
24182 if (DYNAMIC_CHECK)
24183 Round COUNT down to multiple of SIZE
24184 << optional caller supplied zero size guard is here >>
24185 << optional caller suppplied dynamic check is here >>
24186 << caller supplied main copy loop is here >>
24188 done_label:
24190 static void
24191 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
24192 rtx *destptr, rtx *srcptr,
24193 machine_mode mode,
24194 rtx value, rtx vec_value,
24195 rtx *count,
24196 rtx_code_label **done_label,
24197 int size,
24198 int desired_align,
24199 int align,
24200 unsigned HOST_WIDE_INT *min_size,
24201 bool dynamic_check,
24202 bool issetmem)
24204 rtx_code_label *loop_label = NULL, *label;
24205 int n;
24206 rtx modesize;
24207 int prolog_size = 0;
24208 rtx mode_value;
24210 /* Chose proper value to copy. */
24211 if (issetmem && VECTOR_MODE_P (mode))
24212 mode_value = vec_value;
24213 else
24214 mode_value = value;
24215 gcc_assert (GET_MODE_SIZE (mode) <= size);
24217 /* See if block is big or small, handle small blocks. */
24218 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
24220 int size2 = size;
24221 loop_label = gen_label_rtx ();
24223 if (!*done_label)
24224 *done_label = gen_label_rtx ();
24226 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
24227 1, loop_label);
24228 size2 >>= 1;
24230 /* Handle sizes > 3. */
24231 for (;size2 > 2; size2 >>= 1)
24232 expand_small_movmem_or_setmem (destmem, srcmem,
24233 *destptr, *srcptr,
24234 value, vec_value,
24235 *count,
24236 size2, *done_label, issetmem);
24237 /* Nothing to copy? Jump to DONE_LABEL if so */
24238 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
24239 1, *done_label);
24241 /* Do a byte copy. */
24242 destmem = change_address (destmem, QImode, *destptr);
24243 if (issetmem)
24244 emit_move_insn (destmem, gen_lowpart (QImode, value));
24245 else
24247 srcmem = change_address (srcmem, QImode, *srcptr);
24248 emit_move_insn (destmem, srcmem);
24251 /* Handle sizes 2 and 3. */
24252 label = ix86_expand_aligntest (*count, 2, false);
24253 destmem = change_address (destmem, HImode, *destptr);
24254 destmem = offset_address (destmem, *count, 1);
24255 destmem = offset_address (destmem, GEN_INT (-2), 2);
24256 if (issetmem)
24257 emit_move_insn (destmem, gen_lowpart (HImode, value));
24258 else
24260 srcmem = change_address (srcmem, HImode, *srcptr);
24261 srcmem = offset_address (srcmem, *count, 1);
24262 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
24263 emit_move_insn (destmem, srcmem);
24266 emit_label (label);
24267 LABEL_NUSES (label) = 1;
24268 emit_jump_insn (gen_jump (*done_label));
24269 emit_barrier ();
24271 else
24272 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
24273 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
24275 /* Start memcpy for COUNT >= SIZE. */
24276 if (loop_label)
24278 emit_label (loop_label);
24279 LABEL_NUSES (loop_label) = 1;
24282 /* Copy first desired_align bytes. */
24283 if (!issetmem)
24284 srcmem = change_address (srcmem, mode, *srcptr);
24285 destmem = change_address (destmem, mode, *destptr);
24286 modesize = GEN_INT (GET_MODE_SIZE (mode));
24287 for (n = 0; prolog_size < desired_align - align; n++)
24289 if (issetmem)
24290 emit_move_insn (destmem, mode_value);
24291 else
24293 emit_move_insn (destmem, srcmem);
24294 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24296 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24297 prolog_size += GET_MODE_SIZE (mode);
24301 /* Copy last SIZE bytes. */
24302 destmem = offset_address (destmem, *count, 1);
24303 destmem = offset_address (destmem,
24304 GEN_INT (-size - prolog_size),
24306 if (issetmem)
24307 emit_move_insn (destmem, mode_value);
24308 else
24310 srcmem = offset_address (srcmem, *count, 1);
24311 srcmem = offset_address (srcmem,
24312 GEN_INT (-size - prolog_size),
24314 emit_move_insn (destmem, srcmem);
24316 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
24318 destmem = offset_address (destmem, modesize, 1);
24319 if (issetmem)
24320 emit_move_insn (destmem, mode_value);
24321 else
24323 srcmem = offset_address (srcmem, modesize, 1);
24324 emit_move_insn (destmem, srcmem);
24328 /* Align destination. */
24329 if (desired_align > 1 && desired_align > align)
24331 rtx saveddest = *destptr;
24333 gcc_assert (desired_align <= size);
24334 /* Align destptr up, place it to new register. */
24335 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
24336 GEN_INT (prolog_size),
24337 NULL_RTX, 1, OPTAB_DIRECT);
24338 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
24339 REG_POINTER (*destptr) = 1;
24340 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
24341 GEN_INT (-desired_align),
24342 *destptr, 1, OPTAB_DIRECT);
24343 /* See how many bytes we skipped. */
24344 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
24345 *destptr,
24346 saveddest, 1, OPTAB_DIRECT);
24347 /* Adjust srcptr and count. */
24348 if (!issetmem)
24349 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
24350 saveddest, *srcptr, 1, OPTAB_DIRECT);
24351 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24352 saveddest, *count, 1, OPTAB_DIRECT);
24353 /* We copied at most size + prolog_size. */
24354 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
24355 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
24356 else
24357 *min_size = 0;
24359 /* Our loops always round down the bock size, but for dispatch to library
24360 we need precise value. */
24361 if (dynamic_check)
24362 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
24363 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
24365 else
24367 gcc_assert (prolog_size == 0);
24368 /* Decrease count, so we won't end up copying last word twice. */
24369 if (!CONST_INT_P (*count))
24370 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24371 constm1_rtx, *count, 1, OPTAB_DIRECT);
24372 else
24373 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
24374 if (*min_size)
24375 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
24380 /* This function is like the previous one, except here we know how many bytes
24381 need to be copied. That allows us to update alignment not only of DST, which
24382 is returned, but also of SRC, which is passed as a pointer for that
24383 reason. */
24384 static rtx
24385 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
24386 rtx srcreg, rtx value, rtx vec_value,
24387 int desired_align, int align_bytes,
24388 bool issetmem)
24390 rtx src = NULL;
24391 rtx orig_dst = dst;
24392 rtx orig_src = NULL;
24393 int piece_size = 1;
24394 int copied_bytes = 0;
24396 if (!issetmem)
24398 gcc_assert (srcp != NULL);
24399 src = *srcp;
24400 orig_src = src;
24403 for (piece_size = 1;
24404 piece_size <= desired_align && copied_bytes < align_bytes;
24405 piece_size <<= 1)
24407 if (align_bytes & piece_size)
24409 if (issetmem)
24411 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
24412 dst = emit_memset (dst, destreg, vec_value, piece_size);
24413 else
24414 dst = emit_memset (dst, destreg, value, piece_size);
24416 else
24417 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
24418 copied_bytes += piece_size;
24421 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
24422 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24423 if (MEM_SIZE_KNOWN_P (orig_dst))
24424 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
24426 if (!issetmem)
24428 int src_align_bytes = get_mem_align_offset (src, desired_align
24429 * BITS_PER_UNIT);
24430 if (src_align_bytes >= 0)
24431 src_align_bytes = desired_align - src_align_bytes;
24432 if (src_align_bytes >= 0)
24434 unsigned int src_align;
24435 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
24437 if ((src_align_bytes & (src_align - 1))
24438 == (align_bytes & (src_align - 1)))
24439 break;
24441 if (src_align > (unsigned int) desired_align)
24442 src_align = desired_align;
24443 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24444 set_mem_align (src, src_align * BITS_PER_UNIT);
24446 if (MEM_SIZE_KNOWN_P (orig_src))
24447 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24448 *srcp = src;
24451 return dst;
24454 /* Return true if ALG can be used in current context.
24455 Assume we expand memset if MEMSET is true. */
24456 static bool
24457 alg_usable_p (enum stringop_alg alg, bool memset)
24459 if (alg == no_stringop)
24460 return false;
24461 if (alg == vector_loop)
24462 return TARGET_SSE || TARGET_AVX;
24463 /* Algorithms using the rep prefix want at least edi and ecx;
24464 additionally, memset wants eax and memcpy wants esi. Don't
24465 consider such algorithms if the user has appropriated those
24466 registers for their own purposes. */
24467 if (alg == rep_prefix_1_byte
24468 || alg == rep_prefix_4_byte
24469 || alg == rep_prefix_8_byte)
24470 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24471 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24472 return true;
24475 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
24476 static enum stringop_alg
24477 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24478 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24479 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24481 const struct stringop_algs * algs;
24482 bool optimize_for_speed;
24483 int max = 0;
24484 const struct processor_costs *cost;
24485 int i;
24486 bool any_alg_usable_p = false;
24488 *noalign = false;
24489 *dynamic_check = -1;
24491 /* Even if the string operation call is cold, we still might spend a lot
24492 of time processing large blocks. */
24493 if (optimize_function_for_size_p (cfun)
24494 || (optimize_insn_for_size_p ()
24495 && (max_size < 256
24496 || (expected_size != -1 && expected_size < 256))))
24497 optimize_for_speed = false;
24498 else
24499 optimize_for_speed = true;
24501 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24502 if (memset)
24503 algs = &cost->memset[TARGET_64BIT != 0];
24504 else
24505 algs = &cost->memcpy[TARGET_64BIT != 0];
24507 /* See maximal size for user defined algorithm. */
24508 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24510 enum stringop_alg candidate = algs->size[i].alg;
24511 bool usable = alg_usable_p (candidate, memset);
24512 any_alg_usable_p |= usable;
24514 if (candidate != libcall && candidate && usable)
24515 max = algs->size[i].max;
24518 /* If expected size is not known but max size is small enough
24519 so inline version is a win, set expected size into
24520 the range. */
24521 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24522 && expected_size == -1)
24523 expected_size = min_size / 2 + max_size / 2;
24525 /* If user specified the algorithm, honnor it if possible. */
24526 if (ix86_stringop_alg != no_stringop
24527 && alg_usable_p (ix86_stringop_alg, memset))
24528 return ix86_stringop_alg;
24529 /* rep; movq or rep; movl is the smallest variant. */
24530 else if (!optimize_for_speed)
24532 *noalign = true;
24533 if (!count || (count & 3) || (memset && !zero_memset))
24534 return alg_usable_p (rep_prefix_1_byte, memset)
24535 ? rep_prefix_1_byte : loop_1_byte;
24536 else
24537 return alg_usable_p (rep_prefix_4_byte, memset)
24538 ? rep_prefix_4_byte : loop;
24540 /* Very tiny blocks are best handled via the loop, REP is expensive to
24541 setup. */
24542 else if (expected_size != -1 && expected_size < 4)
24543 return loop_1_byte;
24544 else if (expected_size != -1)
24546 enum stringop_alg alg = libcall;
24547 bool alg_noalign = false;
24548 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24550 /* We get here if the algorithms that were not libcall-based
24551 were rep-prefix based and we are unable to use rep prefixes
24552 based on global register usage. Break out of the loop and
24553 use the heuristic below. */
24554 if (algs->size[i].max == 0)
24555 break;
24556 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24558 enum stringop_alg candidate = algs->size[i].alg;
24560 if (candidate != libcall && alg_usable_p (candidate, memset))
24562 alg = candidate;
24563 alg_noalign = algs->size[i].noalign;
24565 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24566 last non-libcall inline algorithm. */
24567 if (TARGET_INLINE_ALL_STRINGOPS)
24569 /* When the current size is best to be copied by a libcall,
24570 but we are still forced to inline, run the heuristic below
24571 that will pick code for medium sized blocks. */
24572 if (alg != libcall)
24574 *noalign = alg_noalign;
24575 return alg;
24577 else if (!any_alg_usable_p)
24578 break;
24580 else if (alg_usable_p (candidate, memset))
24582 *noalign = algs->size[i].noalign;
24583 return candidate;
24588 /* When asked to inline the call anyway, try to pick meaningful choice.
24589 We look for maximal size of block that is faster to copy by hand and
24590 take blocks of at most of that size guessing that average size will
24591 be roughly half of the block.
24593 If this turns out to be bad, we might simply specify the preferred
24594 choice in ix86_costs. */
24595 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24596 && (algs->unknown_size == libcall
24597 || !alg_usable_p (algs->unknown_size, memset)))
24599 enum stringop_alg alg;
24601 /* If there aren't any usable algorithms, then recursing on
24602 smaller sizes isn't going to find anything. Just return the
24603 simple byte-at-a-time copy loop. */
24604 if (!any_alg_usable_p)
24606 /* Pick something reasonable. */
24607 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24608 *dynamic_check = 128;
24609 return loop_1_byte;
24611 if (max <= 0)
24612 max = 4096;
24613 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24614 zero_memset, dynamic_check, noalign);
24615 gcc_assert (*dynamic_check == -1);
24616 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24617 *dynamic_check = max;
24618 else
24619 gcc_assert (alg != libcall);
24620 return alg;
24622 return (alg_usable_p (algs->unknown_size, memset)
24623 ? algs->unknown_size : libcall);
24626 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24627 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24628 static int
24629 decide_alignment (int align,
24630 enum stringop_alg alg,
24631 int expected_size,
24632 machine_mode move_mode)
24634 int desired_align = 0;
24636 gcc_assert (alg != no_stringop);
24638 if (alg == libcall)
24639 return 0;
24640 if (move_mode == VOIDmode)
24641 return 0;
24643 desired_align = GET_MODE_SIZE (move_mode);
24644 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24645 copying whole cacheline at once. */
24646 if (TARGET_PENTIUMPRO
24647 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24648 desired_align = 8;
24650 if (optimize_size)
24651 desired_align = 1;
24652 if (desired_align < align)
24653 desired_align = align;
24654 if (expected_size != -1 && expected_size < 4)
24655 desired_align = align;
24657 return desired_align;
24661 /* Helper function for memcpy. For QImode value 0xXY produce
24662 0xXYXYXYXY of wide specified by MODE. This is essentially
24663 a * 0x10101010, but we can do slightly better than
24664 synth_mult by unwinding the sequence by hand on CPUs with
24665 slow multiply. */
24666 static rtx
24667 promote_duplicated_reg (machine_mode mode, rtx val)
24669 machine_mode valmode = GET_MODE (val);
24670 rtx tmp;
24671 int nops = mode == DImode ? 3 : 2;
24673 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24674 if (val == const0_rtx)
24675 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24676 if (CONST_INT_P (val))
24678 HOST_WIDE_INT v = INTVAL (val) & 255;
24680 v |= v << 8;
24681 v |= v << 16;
24682 if (mode == DImode)
24683 v |= (v << 16) << 16;
24684 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24687 if (valmode == VOIDmode)
24688 valmode = QImode;
24689 if (valmode != QImode)
24690 val = gen_lowpart (QImode, val);
24691 if (mode == QImode)
24692 return val;
24693 if (!TARGET_PARTIAL_REG_STALL)
24694 nops--;
24695 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24696 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24697 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24698 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24700 rtx reg = convert_modes (mode, QImode, val, true);
24701 tmp = promote_duplicated_reg (mode, const1_rtx);
24702 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24703 OPTAB_DIRECT);
24705 else
24707 rtx reg = convert_modes (mode, QImode, val, true);
24709 if (!TARGET_PARTIAL_REG_STALL)
24710 if (mode == SImode)
24711 emit_insn (gen_movsi_insv_1 (reg, reg));
24712 else
24713 emit_insn (gen_movdi_insv_1 (reg, reg));
24714 else
24716 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24717 NULL, 1, OPTAB_DIRECT);
24718 reg =
24719 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24721 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24722 NULL, 1, OPTAB_DIRECT);
24723 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24724 if (mode == SImode)
24725 return reg;
24726 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24727 NULL, 1, OPTAB_DIRECT);
24728 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24729 return reg;
24733 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24734 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24735 alignment from ALIGN to DESIRED_ALIGN. */
24736 static rtx
24737 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24738 int align)
24740 rtx promoted_val;
24742 if (TARGET_64BIT
24743 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24744 promoted_val = promote_duplicated_reg (DImode, val);
24745 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24746 promoted_val = promote_duplicated_reg (SImode, val);
24747 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24748 promoted_val = promote_duplicated_reg (HImode, val);
24749 else
24750 promoted_val = val;
24752 return promoted_val;
24755 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24756 operations when profitable. The code depends upon architecture, block size
24757 and alignment, but always has one of the following overall structures:
24759 Aligned move sequence:
24761 1) Prologue guard: Conditional that jumps up to epilogues for small
24762 blocks that can be handled by epilogue alone. This is faster
24763 but also needed for correctness, since prologue assume the block
24764 is larger than the desired alignment.
24766 Optional dynamic check for size and libcall for large
24767 blocks is emitted here too, with -minline-stringops-dynamically.
24769 2) Prologue: copy first few bytes in order to get destination
24770 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24771 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24772 copied. We emit either a jump tree on power of two sized
24773 blocks, or a byte loop.
24775 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24776 with specified algorithm.
24778 4) Epilogue: code copying tail of the block that is too small to be
24779 handled by main body (or up to size guarded by prologue guard).
24781 Misaligned move sequence
24783 1) missaligned move prologue/epilogue containing:
24784 a) Prologue handling small memory blocks and jumping to done_label
24785 (skipped if blocks are known to be large enough)
24786 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24787 needed by single possibly misaligned move
24788 (skipped if alignment is not needed)
24789 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24791 2) Zero size guard dispatching to done_label, if needed
24793 3) dispatch to library call, if needed,
24795 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24796 with specified algorithm. */
24797 bool
24798 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24799 rtx align_exp, rtx expected_align_exp,
24800 rtx expected_size_exp, rtx min_size_exp,
24801 rtx max_size_exp, rtx probable_max_size_exp,
24802 bool issetmem)
24804 rtx destreg;
24805 rtx srcreg = NULL;
24806 rtx_code_label *label = NULL;
24807 rtx tmp;
24808 rtx_code_label *jump_around_label = NULL;
24809 HOST_WIDE_INT align = 1;
24810 unsigned HOST_WIDE_INT count = 0;
24811 HOST_WIDE_INT expected_size = -1;
24812 int size_needed = 0, epilogue_size_needed;
24813 int desired_align = 0, align_bytes = 0;
24814 enum stringop_alg alg;
24815 rtx promoted_val = NULL;
24816 rtx vec_promoted_val = NULL;
24817 bool force_loopy_epilogue = false;
24818 int dynamic_check;
24819 bool need_zero_guard = false;
24820 bool noalign;
24821 machine_mode move_mode = VOIDmode;
24822 int unroll_factor = 1;
24823 /* TODO: Once value ranges are available, fill in proper data. */
24824 unsigned HOST_WIDE_INT min_size = 0;
24825 unsigned HOST_WIDE_INT max_size = -1;
24826 unsigned HOST_WIDE_INT probable_max_size = -1;
24827 bool misaligned_prologue_used = false;
24829 if (CONST_INT_P (align_exp))
24830 align = INTVAL (align_exp);
24831 /* i386 can do misaligned access on reasonably increased cost. */
24832 if (CONST_INT_P (expected_align_exp)
24833 && INTVAL (expected_align_exp) > align)
24834 align = INTVAL (expected_align_exp);
24835 /* ALIGN is the minimum of destination and source alignment, but we care here
24836 just about destination alignment. */
24837 else if (!issetmem
24838 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24839 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24841 if (CONST_INT_P (count_exp))
24843 min_size = max_size = probable_max_size = count = expected_size
24844 = INTVAL (count_exp);
24845 /* When COUNT is 0, there is nothing to do. */
24846 if (!count)
24847 return true;
24849 else
24851 if (min_size_exp)
24852 min_size = INTVAL (min_size_exp);
24853 if (max_size_exp)
24854 max_size = INTVAL (max_size_exp);
24855 if (probable_max_size_exp)
24856 probable_max_size = INTVAL (probable_max_size_exp);
24857 if (CONST_INT_P (expected_size_exp))
24858 expected_size = INTVAL (expected_size_exp);
24861 /* Make sure we don't need to care about overflow later on. */
24862 if (count > (HOST_WIDE_INT_1U << 30))
24863 return false;
24865 /* Step 0: Decide on preferred algorithm, desired alignment and
24866 size of chunks to be copied by main loop. */
24867 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24868 issetmem,
24869 issetmem && val_exp == const0_rtx,
24870 &dynamic_check, &noalign);
24871 if (alg == libcall)
24872 return false;
24873 gcc_assert (alg != no_stringop);
24875 /* For now vector-version of memset is generated only for memory zeroing, as
24876 creating of promoted vector value is very cheap in this case. */
24877 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24878 alg = unrolled_loop;
24880 if (!count)
24881 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24882 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24883 if (!issetmem)
24884 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24886 unroll_factor = 1;
24887 move_mode = word_mode;
24888 switch (alg)
24890 case libcall:
24891 case no_stringop:
24892 case last_alg:
24893 gcc_unreachable ();
24894 case loop_1_byte:
24895 need_zero_guard = true;
24896 move_mode = QImode;
24897 break;
24898 case loop:
24899 need_zero_guard = true;
24900 break;
24901 case unrolled_loop:
24902 need_zero_guard = true;
24903 unroll_factor = (TARGET_64BIT ? 4 : 2);
24904 break;
24905 case vector_loop:
24906 need_zero_guard = true;
24907 unroll_factor = 4;
24908 /* Find the widest supported mode. */
24909 move_mode = word_mode;
24910 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24911 != CODE_FOR_nothing)
24912 move_mode = GET_MODE_WIDER_MODE (move_mode);
24914 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24915 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24916 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24918 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24919 move_mode = mode_for_vector (word_mode, nunits);
24920 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24921 move_mode = word_mode;
24923 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24924 break;
24925 case rep_prefix_8_byte:
24926 move_mode = DImode;
24927 break;
24928 case rep_prefix_4_byte:
24929 move_mode = SImode;
24930 break;
24931 case rep_prefix_1_byte:
24932 move_mode = QImode;
24933 break;
24935 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24936 epilogue_size_needed = size_needed;
24938 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24939 if (!TARGET_ALIGN_STRINGOPS || noalign)
24940 align = desired_align;
24942 /* Step 1: Prologue guard. */
24944 /* Alignment code needs count to be in register. */
24945 if (CONST_INT_P (count_exp) && desired_align > align)
24947 if (INTVAL (count_exp) > desired_align
24948 && INTVAL (count_exp) > size_needed)
24950 align_bytes
24951 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24952 if (align_bytes <= 0)
24953 align_bytes = 0;
24954 else
24955 align_bytes = desired_align - align_bytes;
24957 if (align_bytes == 0)
24958 count_exp = force_reg (counter_mode (count_exp), count_exp);
24960 gcc_assert (desired_align >= 1 && align >= 1);
24962 /* Misaligned move sequences handle both prologue and epilogue at once.
24963 Default code generation results in a smaller code for large alignments
24964 and also avoids redundant job when sizes are known precisely. */
24965 misaligned_prologue_used
24966 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24967 && MAX (desired_align, epilogue_size_needed) <= 32
24968 && desired_align <= epilogue_size_needed
24969 && ((desired_align > align && !align_bytes)
24970 || (!count && epilogue_size_needed > 1)));
24972 /* Do the cheap promotion to allow better CSE across the
24973 main loop and epilogue (ie one load of the big constant in the
24974 front of all code.
24975 For now the misaligned move sequences do not have fast path
24976 without broadcasting. */
24977 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24979 if (alg == vector_loop)
24981 gcc_assert (val_exp == const0_rtx);
24982 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24983 promoted_val = promote_duplicated_reg_to_size (val_exp,
24984 GET_MODE_SIZE (word_mode),
24985 desired_align, align);
24987 else
24989 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24990 desired_align, align);
24993 /* Misaligned move sequences handles both prologues and epilogues at once.
24994 Default code generation results in smaller code for large alignments and
24995 also avoids redundant job when sizes are known precisely. */
24996 if (misaligned_prologue_used)
24998 /* Misaligned move prologue handled small blocks by itself. */
24999 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
25000 (dst, src, &destreg, &srcreg,
25001 move_mode, promoted_val, vec_promoted_val,
25002 &count_exp,
25003 &jump_around_label,
25004 desired_align < align
25005 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
25006 desired_align, align, &min_size, dynamic_check, issetmem);
25007 if (!issetmem)
25008 src = change_address (src, BLKmode, srcreg);
25009 dst = change_address (dst, BLKmode, destreg);
25010 set_mem_align (dst, desired_align * BITS_PER_UNIT);
25011 epilogue_size_needed = 0;
25012 if (need_zero_guard && !min_size)
25014 /* It is possible that we copied enough so the main loop will not
25015 execute. */
25016 gcc_assert (size_needed > 1);
25017 if (jump_around_label == NULL_RTX)
25018 jump_around_label = gen_label_rtx ();
25019 emit_cmp_and_jump_insns (count_exp,
25020 GEN_INT (size_needed),
25021 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
25022 if (expected_size == -1
25023 || expected_size < (desired_align - align) / 2 + size_needed)
25024 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25025 else
25026 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25029 /* Ensure that alignment prologue won't copy past end of block. */
25030 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
25032 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
25033 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
25034 Make sure it is power of 2. */
25035 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
25037 /* To improve performance of small blocks, we jump around the VAL
25038 promoting mode. This mean that if the promoted VAL is not constant,
25039 we might not use it in the epilogue and have to use byte
25040 loop variant. */
25041 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
25042 force_loopy_epilogue = true;
25043 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25044 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25046 /* If main algorithm works on QImode, no epilogue is needed.
25047 For small sizes just don't align anything. */
25048 if (size_needed == 1)
25049 desired_align = align;
25050 else
25051 goto epilogue;
25053 else if (!count
25054 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25056 label = gen_label_rtx ();
25057 emit_cmp_and_jump_insns (count_exp,
25058 GEN_INT (epilogue_size_needed),
25059 LTU, 0, counter_mode (count_exp), 1, label);
25060 if (expected_size == -1 || expected_size < epilogue_size_needed)
25061 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25062 else
25063 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25067 /* Emit code to decide on runtime whether library call or inline should be
25068 used. */
25069 if (dynamic_check != -1)
25071 if (!issetmem && CONST_INT_P (count_exp))
25073 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
25075 emit_block_move_via_libcall (dst, src, count_exp, false);
25076 count_exp = const0_rtx;
25077 goto epilogue;
25080 else
25082 rtx_code_label *hot_label = gen_label_rtx ();
25083 if (jump_around_label == NULL_RTX)
25084 jump_around_label = gen_label_rtx ();
25085 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
25086 LEU, 0, counter_mode (count_exp),
25087 1, hot_label);
25088 predict_jump (REG_BR_PROB_BASE * 90 / 100);
25089 if (issetmem)
25090 set_storage_via_libcall (dst, count_exp, val_exp, false);
25091 else
25092 emit_block_move_via_libcall (dst, src, count_exp, false);
25093 emit_jump (jump_around_label);
25094 emit_label (hot_label);
25098 /* Step 2: Alignment prologue. */
25099 /* Do the expensive promotion once we branched off the small blocks. */
25100 if (issetmem && !promoted_val)
25101 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25102 desired_align, align);
25104 if (desired_align > align && !misaligned_prologue_used)
25106 if (align_bytes == 0)
25108 /* Except for the first move in prologue, we no longer know
25109 constant offset in aliasing info. It don't seems to worth
25110 the pain to maintain it for the first move, so throw away
25111 the info early. */
25112 dst = change_address (dst, BLKmode, destreg);
25113 if (!issetmem)
25114 src = change_address (src, BLKmode, srcreg);
25115 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
25116 promoted_val, vec_promoted_val,
25117 count_exp, align, desired_align,
25118 issetmem);
25119 /* At most desired_align - align bytes are copied. */
25120 if (min_size < (unsigned)(desired_align - align))
25121 min_size = 0;
25122 else
25123 min_size -= desired_align - align;
25125 else
25127 /* If we know how many bytes need to be stored before dst is
25128 sufficiently aligned, maintain aliasing info accurately. */
25129 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
25130 srcreg,
25131 promoted_val,
25132 vec_promoted_val,
25133 desired_align,
25134 align_bytes,
25135 issetmem);
25137 count_exp = plus_constant (counter_mode (count_exp),
25138 count_exp, -align_bytes);
25139 count -= align_bytes;
25140 min_size -= align_bytes;
25141 max_size -= align_bytes;
25143 if (need_zero_guard
25144 && !min_size
25145 && (count < (unsigned HOST_WIDE_INT) size_needed
25146 || (align_bytes == 0
25147 && count < ((unsigned HOST_WIDE_INT) size_needed
25148 + desired_align - align))))
25150 /* It is possible that we copied enough so the main loop will not
25151 execute. */
25152 gcc_assert (size_needed > 1);
25153 if (label == NULL_RTX)
25154 label = gen_label_rtx ();
25155 emit_cmp_and_jump_insns (count_exp,
25156 GEN_INT (size_needed),
25157 LTU, 0, counter_mode (count_exp), 1, label);
25158 if (expected_size == -1
25159 || expected_size < (desired_align - align) / 2 + size_needed)
25160 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25161 else
25162 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25165 if (label && size_needed == 1)
25167 emit_label (label);
25168 LABEL_NUSES (label) = 1;
25169 label = NULL;
25170 epilogue_size_needed = 1;
25171 if (issetmem)
25172 promoted_val = val_exp;
25174 else if (label == NULL_RTX && !misaligned_prologue_used)
25175 epilogue_size_needed = size_needed;
25177 /* Step 3: Main loop. */
25179 switch (alg)
25181 case libcall:
25182 case no_stringop:
25183 case last_alg:
25184 gcc_unreachable ();
25185 case loop_1_byte:
25186 case loop:
25187 case unrolled_loop:
25188 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
25189 count_exp, move_mode, unroll_factor,
25190 expected_size, issetmem);
25191 break;
25192 case vector_loop:
25193 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
25194 vec_promoted_val, count_exp, move_mode,
25195 unroll_factor, expected_size, issetmem);
25196 break;
25197 case rep_prefix_8_byte:
25198 case rep_prefix_4_byte:
25199 case rep_prefix_1_byte:
25200 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
25201 val_exp, count_exp, move_mode, issetmem);
25202 break;
25204 /* Adjust properly the offset of src and dest memory for aliasing. */
25205 if (CONST_INT_P (count_exp))
25207 if (!issetmem)
25208 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
25209 (count / size_needed) * size_needed);
25210 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
25211 (count / size_needed) * size_needed);
25213 else
25215 if (!issetmem)
25216 src = change_address (src, BLKmode, srcreg);
25217 dst = change_address (dst, BLKmode, destreg);
25220 /* Step 4: Epilogue to copy the remaining bytes. */
25221 epilogue:
25222 if (label)
25224 /* When the main loop is done, COUNT_EXP might hold original count,
25225 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
25226 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
25227 bytes. Compensate if needed. */
25229 if (size_needed < epilogue_size_needed)
25231 tmp =
25232 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
25233 GEN_INT (size_needed - 1), count_exp, 1,
25234 OPTAB_DIRECT);
25235 if (tmp != count_exp)
25236 emit_move_insn (count_exp, tmp);
25238 emit_label (label);
25239 LABEL_NUSES (label) = 1;
25242 if (count_exp != const0_rtx && epilogue_size_needed > 1)
25244 if (force_loopy_epilogue)
25245 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
25246 epilogue_size_needed);
25247 else
25249 if (issetmem)
25250 expand_setmem_epilogue (dst, destreg, promoted_val,
25251 vec_promoted_val, count_exp,
25252 epilogue_size_needed);
25253 else
25254 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
25255 epilogue_size_needed);
25258 if (jump_around_label)
25259 emit_label (jump_around_label);
25260 return true;
25264 /* Expand the appropriate insns for doing strlen if not just doing
25265 repnz; scasb
25267 out = result, initialized with the start address
25268 align_rtx = alignment of the address.
25269 scratch = scratch register, initialized with the startaddress when
25270 not aligned, otherwise undefined
25272 This is just the body. It needs the initializations mentioned above and
25273 some address computing at the end. These things are done in i386.md. */
25275 static void
25276 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
25278 int align;
25279 rtx tmp;
25280 rtx_code_label *align_2_label = NULL;
25281 rtx_code_label *align_3_label = NULL;
25282 rtx_code_label *align_4_label = gen_label_rtx ();
25283 rtx_code_label *end_0_label = gen_label_rtx ();
25284 rtx mem;
25285 rtx tmpreg = gen_reg_rtx (SImode);
25286 rtx scratch = gen_reg_rtx (SImode);
25287 rtx cmp;
25289 align = 0;
25290 if (CONST_INT_P (align_rtx))
25291 align = INTVAL (align_rtx);
25293 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
25295 /* Is there a known alignment and is it less than 4? */
25296 if (align < 4)
25298 rtx scratch1 = gen_reg_rtx (Pmode);
25299 emit_move_insn (scratch1, out);
25300 /* Is there a known alignment and is it not 2? */
25301 if (align != 2)
25303 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
25304 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
25306 /* Leave just the 3 lower bits. */
25307 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
25308 NULL_RTX, 0, OPTAB_WIDEN);
25310 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25311 Pmode, 1, align_4_label);
25312 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
25313 Pmode, 1, align_2_label);
25314 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
25315 Pmode, 1, align_3_label);
25317 else
25319 /* Since the alignment is 2, we have to check 2 or 0 bytes;
25320 check if is aligned to 4 - byte. */
25322 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
25323 NULL_RTX, 0, OPTAB_WIDEN);
25325 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25326 Pmode, 1, align_4_label);
25329 mem = change_address (src, QImode, out);
25331 /* Now compare the bytes. */
25333 /* Compare the first n unaligned byte on a byte per byte basis. */
25334 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
25335 QImode, 1, end_0_label);
25337 /* Increment the address. */
25338 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25340 /* Not needed with an alignment of 2 */
25341 if (align != 2)
25343 emit_label (align_2_label);
25345 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25346 end_0_label);
25348 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25350 emit_label (align_3_label);
25353 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25354 end_0_label);
25356 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25359 /* Generate loop to check 4 bytes at a time. It is not a good idea to
25360 align this loop. It gives only huge programs, but does not help to
25361 speed up. */
25362 emit_label (align_4_label);
25364 mem = change_address (src, SImode, out);
25365 emit_move_insn (scratch, mem);
25366 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
25368 /* This formula yields a nonzero result iff one of the bytes is zero.
25369 This saves three branches inside loop and many cycles. */
25371 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
25372 emit_insn (gen_one_cmplsi2 (scratch, scratch));
25373 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
25374 emit_insn (gen_andsi3 (tmpreg, tmpreg,
25375 gen_int_mode (0x80808080, SImode)));
25376 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
25377 align_4_label);
25379 if (TARGET_CMOVE)
25381 rtx reg = gen_reg_rtx (SImode);
25382 rtx reg2 = gen_reg_rtx (Pmode);
25383 emit_move_insn (reg, tmpreg);
25384 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
25386 /* If zero is not in the first two bytes, move two bytes forward. */
25387 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25388 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25389 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25390 emit_insn (gen_rtx_SET (tmpreg,
25391 gen_rtx_IF_THEN_ELSE (SImode, tmp,
25392 reg,
25393 tmpreg)));
25394 /* Emit lea manually to avoid clobbering of flags. */
25395 emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx)));
25397 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25398 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25399 emit_insn (gen_rtx_SET (out,
25400 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
25401 reg2,
25402 out)));
25404 else
25406 rtx_code_label *end_2_label = gen_label_rtx ();
25407 /* Is zero in the first two bytes? */
25409 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25410 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25411 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
25412 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25413 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
25414 pc_rtx);
25415 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
25416 JUMP_LABEL (tmp) = end_2_label;
25418 /* Not in the first two. Move two bytes forward. */
25419 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
25420 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
25422 emit_label (end_2_label);
25426 /* Avoid branch in fixing the byte. */
25427 tmpreg = gen_lowpart (QImode, tmpreg);
25428 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
25429 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
25430 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
25431 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
25433 emit_label (end_0_label);
25436 /* Expand strlen. */
25438 bool
25439 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
25441 rtx addr, scratch1, scratch2, scratch3, scratch4;
25443 /* The generic case of strlen expander is long. Avoid it's
25444 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
25446 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25447 && !TARGET_INLINE_ALL_STRINGOPS
25448 && !optimize_insn_for_size_p ()
25449 && (!CONST_INT_P (align) || INTVAL (align) < 4))
25450 return false;
25452 addr = force_reg (Pmode, XEXP (src, 0));
25453 scratch1 = gen_reg_rtx (Pmode);
25455 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25456 && !optimize_insn_for_size_p ())
25458 /* Well it seems that some optimizer does not combine a call like
25459 foo(strlen(bar), strlen(bar));
25460 when the move and the subtraction is done here. It does calculate
25461 the length just once when these instructions are done inside of
25462 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
25463 often used and I use one fewer register for the lifetime of
25464 output_strlen_unroll() this is better. */
25466 emit_move_insn (out, addr);
25468 ix86_expand_strlensi_unroll_1 (out, src, align);
25470 /* strlensi_unroll_1 returns the address of the zero at the end of
25471 the string, like memchr(), so compute the length by subtracting
25472 the start address. */
25473 emit_insn (ix86_gen_sub3 (out, out, addr));
25475 else
25477 rtx unspec;
25479 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25480 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25481 return false;
25483 scratch2 = gen_reg_rtx (Pmode);
25484 scratch3 = gen_reg_rtx (Pmode);
25485 scratch4 = force_reg (Pmode, constm1_rtx);
25487 emit_move_insn (scratch3, addr);
25488 eoschar = force_reg (QImode, eoschar);
25490 src = replace_equiv_address_nv (src, scratch3);
25492 /* If .md starts supporting :P, this can be done in .md. */
25493 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25494 scratch4), UNSPEC_SCAS);
25495 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25496 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25497 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25499 return true;
25502 /* For given symbol (function) construct code to compute address of it's PLT
25503 entry in large x86-64 PIC model. */
25504 static rtx
25505 construct_plt_address (rtx symbol)
25507 rtx tmp, unspec;
25509 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25510 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25511 gcc_assert (Pmode == DImode);
25513 tmp = gen_reg_rtx (Pmode);
25514 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25516 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25517 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25518 return tmp;
25522 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25523 rtx callarg2,
25524 rtx pop, bool sibcall)
25526 rtx vec[3];
25527 rtx use = NULL, call;
25528 unsigned int vec_len = 0;
25530 if (pop == const0_rtx)
25531 pop = NULL;
25532 gcc_assert (!TARGET_64BIT || !pop);
25534 if (TARGET_MACHO && !TARGET_64BIT)
25536 #if TARGET_MACHO
25537 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25538 fnaddr = machopic_indirect_call_target (fnaddr);
25539 #endif
25541 else
25543 /* Static functions and indirect calls don't need the pic register. Also,
25544 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
25545 it an indirect call. */
25546 if (flag_pic
25547 && (!TARGET_64BIT
25548 || (ix86_cmodel == CM_LARGE_PIC
25549 && DEFAULT_ABI != MS_ABI))
25550 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25551 && !SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))
25552 && flag_plt
25553 && (SYMBOL_REF_DECL ((XEXP (fnaddr, 0))) == NULL_TREE
25554 || !lookup_attribute ("noplt",
25555 DECL_ATTRIBUTES (SYMBOL_REF_DECL (XEXP (fnaddr, 0))))))
25557 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25558 if (ix86_use_pseudo_pic_reg ())
25559 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25560 pic_offset_table_rtx);
25564 /* Skip setting up RAX register for -mskip-rax-setup when there are no
25565 parameters passed in vector registers. */
25566 if (TARGET_64BIT
25567 && (INTVAL (callarg2) > 0
25568 || (INTVAL (callarg2) == 0
25569 && (TARGET_SSE || !flag_skip_rax_setup))))
25571 rtx al = gen_rtx_REG (QImode, AX_REG);
25572 emit_move_insn (al, callarg2);
25573 use_reg (&use, al);
25576 if (ix86_cmodel == CM_LARGE_PIC
25577 && !TARGET_PECOFF
25578 && MEM_P (fnaddr)
25579 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25580 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25581 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25582 else if (sibcall
25583 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25584 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25586 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25587 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25590 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25592 if (retval)
25594 /* We should add bounds as destination register in case
25595 pointer with bounds may be returned. */
25596 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
25598 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
25599 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
25600 if (GET_CODE (retval) == PARALLEL)
25602 b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
25603 b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
25604 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
25605 retval = chkp_join_splitted_slot (retval, par);
25607 else
25609 retval = gen_rtx_PARALLEL (VOIDmode,
25610 gen_rtvec (3, retval, b0, b1));
25611 chkp_put_regs_to_expr_list (retval);
25615 call = gen_rtx_SET (retval, call);
25617 vec[vec_len++] = call;
25619 if (pop)
25621 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25622 pop = gen_rtx_SET (stack_pointer_rtx, pop);
25623 vec[vec_len++] = pop;
25626 if (TARGET_64BIT_MS_ABI
25627 && (!callarg2 || INTVAL (callarg2) != -2))
25629 int const cregs_size
25630 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25631 int i;
25633 for (i = 0; i < cregs_size; i++)
25635 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25636 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25638 clobber_reg (&use, gen_rtx_REG (mode, regno));
25642 if (vec_len > 1)
25643 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25644 call = emit_call_insn (call);
25645 if (use)
25646 CALL_INSN_FUNCTION_USAGE (call) = use;
25648 return call;
25651 /* Return true if the function being called was marked with attribute "noplt"
25652 or using -fno-plt and we are compiling for non-PIC and x86_64. We need to
25653 handle the non-PIC case in the backend because there is no easy interface
25654 for the front-end to force non-PLT calls to use the GOT. This is currently
25655 used only with 64-bit ELF targets to call the function marked "noplt"
25656 indirectly. */
25658 static bool
25659 ix86_nopic_noplt_attribute_p (rtx call_op)
25661 if (flag_pic || ix86_cmodel == CM_LARGE
25662 || !TARGET_64BIT || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
25663 || SYMBOL_REF_LOCAL_P (call_op))
25664 return false;
25666 tree symbol_decl = SYMBOL_REF_DECL (call_op);
25668 if (!flag_plt
25669 || (symbol_decl != NULL_TREE
25670 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
25671 return true;
25673 return false;
25676 /* Output the assembly for a call instruction. */
25678 const char *
25679 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25681 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25682 bool seh_nop_p = false;
25683 const char *xasm;
25685 if (SIBLING_CALL_P (insn))
25687 if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
25688 xasm = "%!jmp\t*%p0@GOTPCREL(%%rip)";
25689 else if (direct_p)
25690 xasm = "%!jmp\t%P0";
25691 /* SEH epilogue detection requires the indirect branch case
25692 to include REX.W. */
25693 else if (TARGET_SEH)
25694 xasm = "%!rex.W jmp %A0";
25695 else
25696 xasm = "%!jmp\t%A0";
25698 output_asm_insn (xasm, &call_op);
25699 return "";
25702 /* SEH unwinding can require an extra nop to be emitted in several
25703 circumstances. Determine if we have one of those. */
25704 if (TARGET_SEH)
25706 rtx_insn *i;
25708 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25710 /* If we get to another real insn, we don't need the nop. */
25711 if (INSN_P (i))
25712 break;
25714 /* If we get to the epilogue note, prevent a catch region from
25715 being adjacent to the standard epilogue sequence. If non-
25716 call-exceptions, we'll have done this during epilogue emission. */
25717 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25718 && !flag_non_call_exceptions
25719 && !can_throw_internal (insn))
25721 seh_nop_p = true;
25722 break;
25726 /* If we didn't find a real insn following the call, prevent the
25727 unwinder from looking into the next function. */
25728 if (i == NULL)
25729 seh_nop_p = true;
25732 if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
25733 xasm = "%!call\t*%p0@GOTPCREL(%%rip)";
25734 else if (direct_p)
25735 xasm = "%!call\t%P0";
25736 else
25737 xasm = "%!call\t%A0";
25739 output_asm_insn (xasm, &call_op);
25741 if (seh_nop_p)
25742 return "nop";
25744 return "";
25747 /* Clear stack slot assignments remembered from previous functions.
25748 This is called from INIT_EXPANDERS once before RTL is emitted for each
25749 function. */
25751 static struct machine_function *
25752 ix86_init_machine_status (void)
25754 struct machine_function *f;
25756 f = ggc_cleared_alloc<machine_function> ();
25757 f->use_fast_prologue_epilogue_nregs = -1;
25758 f->call_abi = ix86_abi;
25760 return f;
25763 /* Return a MEM corresponding to a stack slot with mode MODE.
25764 Allocate a new slot if necessary.
25766 The RTL for a function can have several slots available: N is
25767 which slot to use. */
25770 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
25772 struct stack_local_entry *s;
25774 gcc_assert (n < MAX_386_STACK_LOCALS);
25776 for (s = ix86_stack_locals; s; s = s->next)
25777 if (s->mode == mode && s->n == n)
25778 return validize_mem (copy_rtx (s->rtl));
25780 s = ggc_alloc<stack_local_entry> ();
25781 s->n = n;
25782 s->mode = mode;
25783 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25785 s->next = ix86_stack_locals;
25786 ix86_stack_locals = s;
25787 return validize_mem (copy_rtx (s->rtl));
25790 static void
25791 ix86_instantiate_decls (void)
25793 struct stack_local_entry *s;
25795 for (s = ix86_stack_locals; s; s = s->next)
25796 if (s->rtl != NULL_RTX)
25797 instantiate_decl_rtl (s->rtl);
25800 /* Check whether x86 address PARTS is a pc-relative address. */
25802 static bool
25803 rip_relative_addr_p (struct ix86_address *parts)
25805 rtx base, index, disp;
25807 base = parts->base;
25808 index = parts->index;
25809 disp = parts->disp;
25811 if (disp && !base && !index)
25813 if (TARGET_64BIT)
25815 rtx symbol = disp;
25817 if (GET_CODE (disp) == CONST)
25818 symbol = XEXP (disp, 0);
25819 if (GET_CODE (symbol) == PLUS
25820 && CONST_INT_P (XEXP (symbol, 1)))
25821 symbol = XEXP (symbol, 0);
25823 if (GET_CODE (symbol) == LABEL_REF
25824 || (GET_CODE (symbol) == SYMBOL_REF
25825 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25826 || (GET_CODE (symbol) == UNSPEC
25827 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25828 || XINT (symbol, 1) == UNSPEC_PCREL
25829 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25830 return true;
25833 return false;
25836 /* Calculate the length of the memory address in the instruction encoding.
25837 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25838 or other prefixes. We never generate addr32 prefix for LEA insn. */
25841 memory_address_length (rtx addr, bool lea)
25843 struct ix86_address parts;
25844 rtx base, index, disp;
25845 int len;
25846 int ok;
25848 if (GET_CODE (addr) == PRE_DEC
25849 || GET_CODE (addr) == POST_INC
25850 || GET_CODE (addr) == PRE_MODIFY
25851 || GET_CODE (addr) == POST_MODIFY)
25852 return 0;
25854 ok = ix86_decompose_address (addr, &parts);
25855 gcc_assert (ok);
25857 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25859 /* If this is not LEA instruction, add the length of addr32 prefix. */
25860 if (TARGET_64BIT && !lea
25861 && (SImode_address_operand (addr, VOIDmode)
25862 || (parts.base && GET_MODE (parts.base) == SImode)
25863 || (parts.index && GET_MODE (parts.index) == SImode)))
25864 len++;
25866 base = parts.base;
25867 index = parts.index;
25868 disp = parts.disp;
25870 if (base && GET_CODE (base) == SUBREG)
25871 base = SUBREG_REG (base);
25872 if (index && GET_CODE (index) == SUBREG)
25873 index = SUBREG_REG (index);
25875 gcc_assert (base == NULL_RTX || REG_P (base));
25876 gcc_assert (index == NULL_RTX || REG_P (index));
25878 /* Rule of thumb:
25879 - esp as the base always wants an index,
25880 - ebp as the base always wants a displacement,
25881 - r12 as the base always wants an index,
25882 - r13 as the base always wants a displacement. */
25884 /* Register Indirect. */
25885 if (base && !index && !disp)
25887 /* esp (for its index) and ebp (for its displacement) need
25888 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25889 code. */
25890 if (base == arg_pointer_rtx
25891 || base == frame_pointer_rtx
25892 || REGNO (base) == SP_REG
25893 || REGNO (base) == BP_REG
25894 || REGNO (base) == R12_REG
25895 || REGNO (base) == R13_REG)
25896 len++;
25899 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25900 is not disp32, but disp32(%rip), so for disp32
25901 SIB byte is needed, unless print_operand_address
25902 optimizes it into disp32(%rip) or (%rip) is implied
25903 by UNSPEC. */
25904 else if (disp && !base && !index)
25906 len += 4;
25907 if (rip_relative_addr_p (&parts))
25908 len++;
25910 else
25912 /* Find the length of the displacement constant. */
25913 if (disp)
25915 if (base && satisfies_constraint_K (disp))
25916 len += 1;
25917 else
25918 len += 4;
25920 /* ebp always wants a displacement. Similarly r13. */
25921 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25922 len++;
25924 /* An index requires the two-byte modrm form.... */
25925 if (index
25926 /* ...like esp (or r12), which always wants an index. */
25927 || base == arg_pointer_rtx
25928 || base == frame_pointer_rtx
25929 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25930 len++;
25933 return len;
25936 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25937 is set, expect that insn have 8bit immediate alternative. */
25939 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
25941 int len = 0;
25942 int i;
25943 extract_insn_cached (insn);
25944 for (i = recog_data.n_operands - 1; i >= 0; --i)
25945 if (CONSTANT_P (recog_data.operand[i]))
25947 enum attr_mode mode = get_attr_mode (insn);
25949 gcc_assert (!len);
25950 if (shortform && CONST_INT_P (recog_data.operand[i]))
25952 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25953 switch (mode)
25955 case MODE_QI:
25956 len = 1;
25957 continue;
25958 case MODE_HI:
25959 ival = trunc_int_for_mode (ival, HImode);
25960 break;
25961 case MODE_SI:
25962 ival = trunc_int_for_mode (ival, SImode);
25963 break;
25964 default:
25965 break;
25967 if (IN_RANGE (ival, -128, 127))
25969 len = 1;
25970 continue;
25973 switch (mode)
25975 case MODE_QI:
25976 len = 1;
25977 break;
25978 case MODE_HI:
25979 len = 2;
25980 break;
25981 case MODE_SI:
25982 len = 4;
25983 break;
25984 /* Immediates for DImode instructions are encoded
25985 as 32bit sign extended values. */
25986 case MODE_DI:
25987 len = 4;
25988 break;
25989 default:
25990 fatal_insn ("unknown insn mode", insn);
25993 return len;
25996 /* Compute default value for "length_address" attribute. */
25998 ix86_attr_length_address_default (rtx_insn *insn)
26000 int i;
26002 if (get_attr_type (insn) == TYPE_LEA)
26004 rtx set = PATTERN (insn), addr;
26006 if (GET_CODE (set) == PARALLEL)
26007 set = XVECEXP (set, 0, 0);
26009 gcc_assert (GET_CODE (set) == SET);
26011 addr = SET_SRC (set);
26013 return memory_address_length (addr, true);
26016 extract_insn_cached (insn);
26017 for (i = recog_data.n_operands - 1; i >= 0; --i)
26018 if (MEM_P (recog_data.operand[i]))
26020 constrain_operands_cached (insn, reload_completed);
26021 if (which_alternative != -1)
26023 const char *constraints = recog_data.constraints[i];
26024 int alt = which_alternative;
26026 while (*constraints == '=' || *constraints == '+')
26027 constraints++;
26028 while (alt-- > 0)
26029 while (*constraints++ != ',')
26031 /* Skip ignored operands. */
26032 if (*constraints == 'X')
26033 continue;
26035 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
26037 return 0;
26040 /* Compute default value for "length_vex" attribute. It includes
26041 2 or 3 byte VEX prefix and 1 opcode byte. */
26044 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
26045 bool has_vex_w)
26047 int i;
26049 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
26050 byte VEX prefix. */
26051 if (!has_0f_opcode || has_vex_w)
26052 return 3 + 1;
26054 /* We can always use 2 byte VEX prefix in 32bit. */
26055 if (!TARGET_64BIT)
26056 return 2 + 1;
26058 extract_insn_cached (insn);
26060 for (i = recog_data.n_operands - 1; i >= 0; --i)
26061 if (REG_P (recog_data.operand[i]))
26063 /* REX.W bit uses 3 byte VEX prefix. */
26064 if (GET_MODE (recog_data.operand[i]) == DImode
26065 && GENERAL_REG_P (recog_data.operand[i]))
26066 return 3 + 1;
26068 else
26070 /* REX.X or REX.B bits use 3 byte VEX prefix. */
26071 if (MEM_P (recog_data.operand[i])
26072 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
26073 return 3 + 1;
26076 return 2 + 1;
26079 /* Return the maximum number of instructions a cpu can issue. */
26081 static int
26082 ix86_issue_rate (void)
26084 switch (ix86_tune)
26086 case PROCESSOR_PENTIUM:
26087 case PROCESSOR_BONNELL:
26088 case PROCESSOR_SILVERMONT:
26089 case PROCESSOR_KNL:
26090 case PROCESSOR_INTEL:
26091 case PROCESSOR_K6:
26092 case PROCESSOR_BTVER2:
26093 case PROCESSOR_PENTIUM4:
26094 case PROCESSOR_NOCONA:
26095 return 2;
26097 case PROCESSOR_PENTIUMPRO:
26098 case PROCESSOR_ATHLON:
26099 case PROCESSOR_K8:
26100 case PROCESSOR_AMDFAM10:
26101 case PROCESSOR_GENERIC:
26102 case PROCESSOR_BTVER1:
26103 return 3;
26105 case PROCESSOR_BDVER1:
26106 case PROCESSOR_BDVER2:
26107 case PROCESSOR_BDVER3:
26108 case PROCESSOR_BDVER4:
26109 case PROCESSOR_CORE2:
26110 case PROCESSOR_NEHALEM:
26111 case PROCESSOR_SANDYBRIDGE:
26112 case PROCESSOR_HASWELL:
26113 return 4;
26115 default:
26116 return 1;
26120 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
26121 by DEP_INSN and nothing set by DEP_INSN. */
26123 static bool
26124 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
26126 rtx set, set2;
26128 /* Simplify the test for uninteresting insns. */
26129 if (insn_type != TYPE_SETCC
26130 && insn_type != TYPE_ICMOV
26131 && insn_type != TYPE_FCMOV
26132 && insn_type != TYPE_IBR)
26133 return false;
26135 if ((set = single_set (dep_insn)) != 0)
26137 set = SET_DEST (set);
26138 set2 = NULL_RTX;
26140 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
26141 && XVECLEN (PATTERN (dep_insn), 0) == 2
26142 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
26143 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
26145 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26146 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26148 else
26149 return false;
26151 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
26152 return false;
26154 /* This test is true if the dependent insn reads the flags but
26155 not any other potentially set register. */
26156 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
26157 return false;
26159 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
26160 return false;
26162 return true;
26165 /* Return true iff USE_INSN has a memory address with operands set by
26166 SET_INSN. */
26168 bool
26169 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
26171 int i;
26172 extract_insn_cached (use_insn);
26173 for (i = recog_data.n_operands - 1; i >= 0; --i)
26174 if (MEM_P (recog_data.operand[i]))
26176 rtx addr = XEXP (recog_data.operand[i], 0);
26177 return modified_in_p (addr, set_insn) != 0;
26179 return false;
26182 /* Helper function for exact_store_load_dependency.
26183 Return true if addr is found in insn. */
26184 static bool
26185 exact_dependency_1 (rtx addr, rtx insn)
26187 enum rtx_code code;
26188 const char *format_ptr;
26189 int i, j;
26191 code = GET_CODE (insn);
26192 switch (code)
26194 case MEM:
26195 if (rtx_equal_p (addr, insn))
26196 return true;
26197 break;
26198 case REG:
26199 CASE_CONST_ANY:
26200 case SYMBOL_REF:
26201 case CODE_LABEL:
26202 case PC:
26203 case CC0:
26204 case EXPR_LIST:
26205 return false;
26206 default:
26207 break;
26210 format_ptr = GET_RTX_FORMAT (code);
26211 for (i = 0; i < GET_RTX_LENGTH (code); i++)
26213 switch (*format_ptr++)
26215 case 'e':
26216 if (exact_dependency_1 (addr, XEXP (insn, i)))
26217 return true;
26218 break;
26219 case 'E':
26220 for (j = 0; j < XVECLEN (insn, i); j++)
26221 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
26222 return true;
26223 break;
26226 return false;
26229 /* Return true if there exists exact dependency for store & load, i.e.
26230 the same memory address is used in them. */
26231 static bool
26232 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
26234 rtx set1, set2;
26236 set1 = single_set (store);
26237 if (!set1)
26238 return false;
26239 if (!MEM_P (SET_DEST (set1)))
26240 return false;
26241 set2 = single_set (load);
26242 if (!set2)
26243 return false;
26244 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
26245 return true;
26246 return false;
26249 static int
26250 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26252 enum attr_type insn_type, dep_insn_type;
26253 enum attr_memory memory;
26254 rtx set, set2;
26255 int dep_insn_code_number;
26257 /* Anti and output dependencies have zero cost on all CPUs. */
26258 if (REG_NOTE_KIND (link) != 0)
26259 return 0;
26261 dep_insn_code_number = recog_memoized (dep_insn);
26263 /* If we can't recognize the insns, we can't really do anything. */
26264 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
26265 return cost;
26267 insn_type = get_attr_type (insn);
26268 dep_insn_type = get_attr_type (dep_insn);
26270 switch (ix86_tune)
26272 case PROCESSOR_PENTIUM:
26273 /* Address Generation Interlock adds a cycle of latency. */
26274 if (insn_type == TYPE_LEA)
26276 rtx addr = PATTERN (insn);
26278 if (GET_CODE (addr) == PARALLEL)
26279 addr = XVECEXP (addr, 0, 0);
26281 gcc_assert (GET_CODE (addr) == SET);
26283 addr = SET_SRC (addr);
26284 if (modified_in_p (addr, dep_insn))
26285 cost += 1;
26287 else if (ix86_agi_dependent (dep_insn, insn))
26288 cost += 1;
26290 /* ??? Compares pair with jump/setcc. */
26291 if (ix86_flags_dependent (insn, dep_insn, insn_type))
26292 cost = 0;
26294 /* Floating point stores require value to be ready one cycle earlier. */
26295 if (insn_type == TYPE_FMOV
26296 && get_attr_memory (insn) == MEMORY_STORE
26297 && !ix86_agi_dependent (dep_insn, insn))
26298 cost += 1;
26299 break;
26301 case PROCESSOR_PENTIUMPRO:
26302 /* INT->FP conversion is expensive. */
26303 if (get_attr_fp_int_src (dep_insn))
26304 cost += 5;
26306 /* There is one cycle extra latency between an FP op and a store. */
26307 if (insn_type == TYPE_FMOV
26308 && (set = single_set (dep_insn)) != NULL_RTX
26309 && (set2 = single_set (insn)) != NULL_RTX
26310 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
26311 && MEM_P (SET_DEST (set2)))
26312 cost += 1;
26314 memory = get_attr_memory (insn);
26316 /* Show ability of reorder buffer to hide latency of load by executing
26317 in parallel with previous instruction in case
26318 previous instruction is not needed to compute the address. */
26319 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26320 && !ix86_agi_dependent (dep_insn, insn))
26322 /* Claim moves to take one cycle, as core can issue one load
26323 at time and the next load can start cycle later. */
26324 if (dep_insn_type == TYPE_IMOV
26325 || dep_insn_type == TYPE_FMOV)
26326 cost = 1;
26327 else if (cost > 1)
26328 cost--;
26330 break;
26332 case PROCESSOR_K6:
26333 /* The esp dependency is resolved before
26334 the instruction is really finished. */
26335 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26336 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26337 return 1;
26339 /* INT->FP conversion is expensive. */
26340 if (get_attr_fp_int_src (dep_insn))
26341 cost += 5;
26343 memory = get_attr_memory (insn);
26345 /* Show ability of reorder buffer to hide latency of load by executing
26346 in parallel with previous instruction in case
26347 previous instruction is not needed to compute the address. */
26348 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26349 && !ix86_agi_dependent (dep_insn, insn))
26351 /* Claim moves to take one cycle, as core can issue one load
26352 at time and the next load can start cycle later. */
26353 if (dep_insn_type == TYPE_IMOV
26354 || dep_insn_type == TYPE_FMOV)
26355 cost = 1;
26356 else if (cost > 2)
26357 cost -= 2;
26358 else
26359 cost = 1;
26361 break;
26363 case PROCESSOR_AMDFAM10:
26364 case PROCESSOR_BDVER1:
26365 case PROCESSOR_BDVER2:
26366 case PROCESSOR_BDVER3:
26367 case PROCESSOR_BDVER4:
26368 case PROCESSOR_BTVER1:
26369 case PROCESSOR_BTVER2:
26370 case PROCESSOR_GENERIC:
26371 /* Stack engine allows to execute push&pop instructions in parall. */
26372 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26373 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26374 return 0;
26375 /* FALLTHRU */
26377 case PROCESSOR_ATHLON:
26378 case PROCESSOR_K8:
26379 memory = get_attr_memory (insn);
26381 /* Show ability of reorder buffer to hide latency of load by executing
26382 in parallel with previous instruction in case
26383 previous instruction is not needed to compute the address. */
26384 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26385 && !ix86_agi_dependent (dep_insn, insn))
26387 enum attr_unit unit = get_attr_unit (insn);
26388 int loadcost = 3;
26390 /* Because of the difference between the length of integer and
26391 floating unit pipeline preparation stages, the memory operands
26392 for floating point are cheaper.
26394 ??? For Athlon it the difference is most probably 2. */
26395 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
26396 loadcost = 3;
26397 else
26398 loadcost = TARGET_ATHLON ? 2 : 0;
26400 if (cost >= loadcost)
26401 cost -= loadcost;
26402 else
26403 cost = 0;
26405 break;
26407 case PROCESSOR_CORE2:
26408 case PROCESSOR_NEHALEM:
26409 case PROCESSOR_SANDYBRIDGE:
26410 case PROCESSOR_HASWELL:
26411 /* Stack engine allows to execute push&pop instructions in parall. */
26412 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26413 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26414 return 0;
26416 memory = get_attr_memory (insn);
26418 /* Show ability of reorder buffer to hide latency of load by executing
26419 in parallel with previous instruction in case
26420 previous instruction is not needed to compute the address. */
26421 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26422 && !ix86_agi_dependent (dep_insn, insn))
26424 if (cost >= 4)
26425 cost -= 4;
26426 else
26427 cost = 0;
26429 break;
26431 case PROCESSOR_SILVERMONT:
26432 case PROCESSOR_KNL:
26433 case PROCESSOR_INTEL:
26434 if (!reload_completed)
26435 return cost;
26437 /* Increase cost of integer loads. */
26438 memory = get_attr_memory (dep_insn);
26439 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26441 enum attr_unit unit = get_attr_unit (dep_insn);
26442 if (unit == UNIT_INTEGER && cost == 1)
26444 if (memory == MEMORY_LOAD)
26445 cost = 3;
26446 else
26448 /* Increase cost of ld/st for short int types only
26449 because of store forwarding issue. */
26450 rtx set = single_set (dep_insn);
26451 if (set && (GET_MODE (SET_DEST (set)) == QImode
26452 || GET_MODE (SET_DEST (set)) == HImode))
26454 /* Increase cost of store/load insn if exact
26455 dependence exists and it is load insn. */
26456 enum attr_memory insn_memory = get_attr_memory (insn);
26457 if (insn_memory == MEMORY_LOAD
26458 && exact_store_load_dependency (dep_insn, insn))
26459 cost = 3;
26465 default:
26466 break;
26469 return cost;
26472 /* How many alternative schedules to try. This should be as wide as the
26473 scheduling freedom in the DFA, but no wider. Making this value too
26474 large results extra work for the scheduler. */
26476 static int
26477 ia32_multipass_dfa_lookahead (void)
26479 switch (ix86_tune)
26481 case PROCESSOR_PENTIUM:
26482 return 2;
26484 case PROCESSOR_PENTIUMPRO:
26485 case PROCESSOR_K6:
26486 return 1;
26488 case PROCESSOR_BDVER1:
26489 case PROCESSOR_BDVER2:
26490 case PROCESSOR_BDVER3:
26491 case PROCESSOR_BDVER4:
26492 /* We use lookahead value 4 for BD both before and after reload
26493 schedules. Plan is to have value 8 included for O3. */
26494 return 4;
26496 case PROCESSOR_CORE2:
26497 case PROCESSOR_NEHALEM:
26498 case PROCESSOR_SANDYBRIDGE:
26499 case PROCESSOR_HASWELL:
26500 case PROCESSOR_BONNELL:
26501 case PROCESSOR_SILVERMONT:
26502 case PROCESSOR_KNL:
26503 case PROCESSOR_INTEL:
26504 /* Generally, we want haifa-sched:max_issue() to look ahead as far
26505 as many instructions can be executed on a cycle, i.e.,
26506 issue_rate. I wonder why tuning for many CPUs does not do this. */
26507 if (reload_completed)
26508 return ix86_issue_rate ();
26509 /* Don't use lookahead for pre-reload schedule to save compile time. */
26510 return 0;
26512 default:
26513 return 0;
26517 /* Return true if target platform supports macro-fusion. */
26519 static bool
26520 ix86_macro_fusion_p ()
26522 return TARGET_FUSE_CMP_AND_BRANCH;
26525 /* Check whether current microarchitecture support macro fusion
26526 for insn pair "CONDGEN + CONDJMP". Refer to
26527 "Intel Architectures Optimization Reference Manual". */
26529 static bool
26530 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
26532 rtx src, dest;
26533 enum rtx_code ccode;
26534 rtx compare_set = NULL_RTX, test_if, cond;
26535 rtx alu_set = NULL_RTX, addr = NULL_RTX;
26537 if (!any_condjump_p (condjmp))
26538 return false;
26540 if (get_attr_type (condgen) != TYPE_TEST
26541 && get_attr_type (condgen) != TYPE_ICMP
26542 && get_attr_type (condgen) != TYPE_INCDEC
26543 && get_attr_type (condgen) != TYPE_ALU)
26544 return false;
26546 compare_set = single_set (condgen);
26547 if (compare_set == NULL_RTX
26548 && !TARGET_FUSE_ALU_AND_BRANCH)
26549 return false;
26551 if (compare_set == NULL_RTX)
26553 int i;
26554 rtx pat = PATTERN (condgen);
26555 for (i = 0; i < XVECLEN (pat, 0); i++)
26556 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26558 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26559 if (GET_CODE (set_src) == COMPARE)
26560 compare_set = XVECEXP (pat, 0, i);
26561 else
26562 alu_set = XVECEXP (pat, 0, i);
26565 if (compare_set == NULL_RTX)
26566 return false;
26567 src = SET_SRC (compare_set);
26568 if (GET_CODE (src) != COMPARE)
26569 return false;
26571 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26572 supported. */
26573 if ((MEM_P (XEXP (src, 0))
26574 && CONST_INT_P (XEXP (src, 1)))
26575 || (MEM_P (XEXP (src, 1))
26576 && CONST_INT_P (XEXP (src, 0))))
26577 return false;
26579 /* No fusion for RIP-relative address. */
26580 if (MEM_P (XEXP (src, 0)))
26581 addr = XEXP (XEXP (src, 0), 0);
26582 else if (MEM_P (XEXP (src, 1)))
26583 addr = XEXP (XEXP (src, 1), 0);
26585 if (addr) {
26586 ix86_address parts;
26587 int ok = ix86_decompose_address (addr, &parts);
26588 gcc_assert (ok);
26590 if (rip_relative_addr_p (&parts))
26591 return false;
26594 test_if = SET_SRC (pc_set (condjmp));
26595 cond = XEXP (test_if, 0);
26596 ccode = GET_CODE (cond);
26597 /* Check whether conditional jump use Sign or Overflow Flags. */
26598 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26599 && (ccode == GE
26600 || ccode == GT
26601 || ccode == LE
26602 || ccode == LT))
26603 return false;
26605 /* Return true for TYPE_TEST and TYPE_ICMP. */
26606 if (get_attr_type (condgen) == TYPE_TEST
26607 || get_attr_type (condgen) == TYPE_ICMP)
26608 return true;
26610 /* The following is the case that macro-fusion for alu + jmp. */
26611 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26612 return false;
26614 /* No fusion for alu op with memory destination operand. */
26615 dest = SET_DEST (alu_set);
26616 if (MEM_P (dest))
26617 return false;
26619 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26620 supported. */
26621 if (get_attr_type (condgen) == TYPE_INCDEC
26622 && (ccode == GEU
26623 || ccode == GTU
26624 || ccode == LEU
26625 || ccode == LTU))
26626 return false;
26628 return true;
26631 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26632 execution. It is applied if
26633 (1) IMUL instruction is on the top of list;
26634 (2) There exists the only producer of independent IMUL instruction in
26635 ready list.
26636 Return index of IMUL producer if it was found and -1 otherwise. */
26637 static int
26638 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26640 rtx_insn *insn;
26641 rtx set, insn1, insn2;
26642 sd_iterator_def sd_it;
26643 dep_t dep;
26644 int index = -1;
26645 int i;
26647 if (!TARGET_BONNELL)
26648 return index;
26650 /* Check that IMUL instruction is on the top of ready list. */
26651 insn = ready[n_ready - 1];
26652 set = single_set (insn);
26653 if (!set)
26654 return index;
26655 if (!(GET_CODE (SET_SRC (set)) == MULT
26656 && GET_MODE (SET_SRC (set)) == SImode))
26657 return index;
26659 /* Search for producer of independent IMUL instruction. */
26660 for (i = n_ready - 2; i >= 0; i--)
26662 insn = ready[i];
26663 if (!NONDEBUG_INSN_P (insn))
26664 continue;
26665 /* Skip IMUL instruction. */
26666 insn2 = PATTERN (insn);
26667 if (GET_CODE (insn2) == PARALLEL)
26668 insn2 = XVECEXP (insn2, 0, 0);
26669 if (GET_CODE (insn2) == SET
26670 && GET_CODE (SET_SRC (insn2)) == MULT
26671 && GET_MODE (SET_SRC (insn2)) == SImode)
26672 continue;
26674 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26676 rtx con;
26677 con = DEP_CON (dep);
26678 if (!NONDEBUG_INSN_P (con))
26679 continue;
26680 insn1 = PATTERN (con);
26681 if (GET_CODE (insn1) == PARALLEL)
26682 insn1 = XVECEXP (insn1, 0, 0);
26684 if (GET_CODE (insn1) == SET
26685 && GET_CODE (SET_SRC (insn1)) == MULT
26686 && GET_MODE (SET_SRC (insn1)) == SImode)
26688 sd_iterator_def sd_it1;
26689 dep_t dep1;
26690 /* Check if there is no other dependee for IMUL. */
26691 index = i;
26692 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26694 rtx pro;
26695 pro = DEP_PRO (dep1);
26696 if (!NONDEBUG_INSN_P (pro))
26697 continue;
26698 if (pro != insn)
26699 index = -1;
26701 if (index >= 0)
26702 break;
26705 if (index >= 0)
26706 break;
26708 return index;
26711 /* Try to find the best candidate on the top of ready list if two insns
26712 have the same priority - candidate is best if its dependees were
26713 scheduled earlier. Applied for Silvermont only.
26714 Return true if top 2 insns must be interchanged. */
26715 static bool
26716 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26718 rtx_insn *top = ready[n_ready - 1];
26719 rtx_insn *next = ready[n_ready - 2];
26720 rtx set;
26721 sd_iterator_def sd_it;
26722 dep_t dep;
26723 int clock1 = -1;
26724 int clock2 = -1;
26725 #define INSN_TICK(INSN) (HID (INSN)->tick)
26727 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26728 return false;
26730 if (!NONDEBUG_INSN_P (top))
26731 return false;
26732 if (!NONJUMP_INSN_P (top))
26733 return false;
26734 if (!NONDEBUG_INSN_P (next))
26735 return false;
26736 if (!NONJUMP_INSN_P (next))
26737 return false;
26738 set = single_set (top);
26739 if (!set)
26740 return false;
26741 set = single_set (next);
26742 if (!set)
26743 return false;
26745 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26747 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26748 return false;
26749 /* Determine winner more precise. */
26750 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26752 rtx pro;
26753 pro = DEP_PRO (dep);
26754 if (!NONDEBUG_INSN_P (pro))
26755 continue;
26756 if (INSN_TICK (pro) > clock1)
26757 clock1 = INSN_TICK (pro);
26759 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26761 rtx pro;
26762 pro = DEP_PRO (dep);
26763 if (!NONDEBUG_INSN_P (pro))
26764 continue;
26765 if (INSN_TICK (pro) > clock2)
26766 clock2 = INSN_TICK (pro);
26769 if (clock1 == clock2)
26771 /* Determine winner - load must win. */
26772 enum attr_memory memory1, memory2;
26773 memory1 = get_attr_memory (top);
26774 memory2 = get_attr_memory (next);
26775 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26776 return true;
26778 return (bool) (clock2 < clock1);
26780 return false;
26781 #undef INSN_TICK
26784 /* Perform possible reodering of ready list for Atom/Silvermont only.
26785 Return issue rate. */
26786 static int
26787 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26788 int *pn_ready, int clock_var)
26790 int issue_rate = -1;
26791 int n_ready = *pn_ready;
26792 int i;
26793 rtx_insn *insn;
26794 int index = -1;
26796 /* Set up issue rate. */
26797 issue_rate = ix86_issue_rate ();
26799 /* Do reodering for BONNELL/SILVERMONT only. */
26800 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26801 return issue_rate;
26803 /* Nothing to do if ready list contains only 1 instruction. */
26804 if (n_ready <= 1)
26805 return issue_rate;
26807 /* Do reodering for post-reload scheduler only. */
26808 if (!reload_completed)
26809 return issue_rate;
26811 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26813 if (sched_verbose > 1)
26814 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26815 INSN_UID (ready[index]));
26817 /* Put IMUL producer (ready[index]) at the top of ready list. */
26818 insn = ready[index];
26819 for (i = index; i < n_ready - 1; i++)
26820 ready[i] = ready[i + 1];
26821 ready[n_ready - 1] = insn;
26822 return issue_rate;
26825 /* Skip selective scheduling since HID is not populated in it. */
26826 if (clock_var != 0
26827 && !sel_sched_p ()
26828 && swap_top_of_ready_list (ready, n_ready))
26830 if (sched_verbose > 1)
26831 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26832 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26833 /* Swap 2 top elements of ready list. */
26834 insn = ready[n_ready - 1];
26835 ready[n_ready - 1] = ready[n_ready - 2];
26836 ready[n_ready - 2] = insn;
26838 return issue_rate;
26841 static bool
26842 ix86_class_likely_spilled_p (reg_class_t);
26844 /* Returns true if lhs of insn is HW function argument register and set up
26845 is_spilled to true if it is likely spilled HW register. */
26846 static bool
26847 insn_is_function_arg (rtx insn, bool* is_spilled)
26849 rtx dst;
26851 if (!NONDEBUG_INSN_P (insn))
26852 return false;
26853 /* Call instructions are not movable, ignore it. */
26854 if (CALL_P (insn))
26855 return false;
26856 insn = PATTERN (insn);
26857 if (GET_CODE (insn) == PARALLEL)
26858 insn = XVECEXP (insn, 0, 0);
26859 if (GET_CODE (insn) != SET)
26860 return false;
26861 dst = SET_DEST (insn);
26862 if (REG_P (dst) && HARD_REGISTER_P (dst)
26863 && ix86_function_arg_regno_p (REGNO (dst)))
26865 /* Is it likely spilled HW register? */
26866 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26867 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26868 *is_spilled = true;
26869 return true;
26871 return false;
26874 /* Add output dependencies for chain of function adjacent arguments if only
26875 there is a move to likely spilled HW register. Return first argument
26876 if at least one dependence was added or NULL otherwise. */
26877 static rtx_insn *
26878 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26880 rtx_insn *insn;
26881 rtx_insn *last = call;
26882 rtx_insn *first_arg = NULL;
26883 bool is_spilled = false;
26885 head = PREV_INSN (head);
26887 /* Find nearest to call argument passing instruction. */
26888 while (true)
26890 last = PREV_INSN (last);
26891 if (last == head)
26892 return NULL;
26893 if (!NONDEBUG_INSN_P (last))
26894 continue;
26895 if (insn_is_function_arg (last, &is_spilled))
26896 break;
26897 return NULL;
26900 first_arg = last;
26901 while (true)
26903 insn = PREV_INSN (last);
26904 if (!INSN_P (insn))
26905 break;
26906 if (insn == head)
26907 break;
26908 if (!NONDEBUG_INSN_P (insn))
26910 last = insn;
26911 continue;
26913 if (insn_is_function_arg (insn, &is_spilled))
26915 /* Add output depdendence between two function arguments if chain
26916 of output arguments contains likely spilled HW registers. */
26917 if (is_spilled)
26918 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26919 first_arg = last = insn;
26921 else
26922 break;
26924 if (!is_spilled)
26925 return NULL;
26926 return first_arg;
26929 /* Add output or anti dependency from insn to first_arg to restrict its code
26930 motion. */
26931 static void
26932 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26934 rtx set;
26935 rtx tmp;
26937 /* Add anti dependencies for bounds stores. */
26938 if (INSN_P (insn)
26939 && GET_CODE (PATTERN (insn)) == PARALLEL
26940 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
26941 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
26943 add_dependence (first_arg, insn, REG_DEP_ANTI);
26944 return;
26947 set = single_set (insn);
26948 if (!set)
26949 return;
26950 tmp = SET_DEST (set);
26951 if (REG_P (tmp))
26953 /* Add output dependency to the first function argument. */
26954 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26955 return;
26957 /* Add anti dependency. */
26958 add_dependence (first_arg, insn, REG_DEP_ANTI);
26961 /* Avoid cross block motion of function argument through adding dependency
26962 from the first non-jump instruction in bb. */
26963 static void
26964 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26966 rtx_insn *insn = BB_END (bb);
26968 while (insn)
26970 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26972 rtx set = single_set (insn);
26973 if (set)
26975 avoid_func_arg_motion (arg, insn);
26976 return;
26979 if (insn == BB_HEAD (bb))
26980 return;
26981 insn = PREV_INSN (insn);
26985 /* Hook for pre-reload schedule - avoid motion of function arguments
26986 passed in likely spilled HW registers. */
26987 static void
26988 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26990 rtx_insn *insn;
26991 rtx_insn *first_arg = NULL;
26992 if (reload_completed)
26993 return;
26994 while (head != tail && DEBUG_INSN_P (head))
26995 head = NEXT_INSN (head);
26996 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26997 if (INSN_P (insn) && CALL_P (insn))
26999 first_arg = add_parameter_dependencies (insn, head);
27000 if (first_arg)
27002 /* Add dependee for first argument to predecessors if only
27003 region contains more than one block. */
27004 basic_block bb = BLOCK_FOR_INSN (insn);
27005 int rgn = CONTAINING_RGN (bb->index);
27006 int nr_blks = RGN_NR_BLOCKS (rgn);
27007 /* Skip trivial regions and region head blocks that can have
27008 predecessors outside of region. */
27009 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
27011 edge e;
27012 edge_iterator ei;
27014 /* Regions are SCCs with the exception of selective
27015 scheduling with pipelining of outer blocks enabled.
27016 So also check that immediate predecessors of a non-head
27017 block are in the same region. */
27018 FOR_EACH_EDGE (e, ei, bb->preds)
27020 /* Avoid creating of loop-carried dependencies through
27021 using topological ordering in the region. */
27022 if (rgn == CONTAINING_RGN (e->src->index)
27023 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
27024 add_dependee_for_func_arg (first_arg, e->src);
27027 insn = first_arg;
27028 if (insn == head)
27029 break;
27032 else if (first_arg)
27033 avoid_func_arg_motion (first_arg, insn);
27036 /* Hook for pre-reload schedule - set priority of moves from likely spilled
27037 HW registers to maximum, to schedule them at soon as possible. These are
27038 moves from function argument registers at the top of the function entry
27039 and moves from function return value registers after call. */
27040 static int
27041 ix86_adjust_priority (rtx_insn *insn, int priority)
27043 rtx set;
27045 if (reload_completed)
27046 return priority;
27048 if (!NONDEBUG_INSN_P (insn))
27049 return priority;
27051 set = single_set (insn);
27052 if (set)
27054 rtx tmp = SET_SRC (set);
27055 if (REG_P (tmp)
27056 && HARD_REGISTER_P (tmp)
27057 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
27058 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
27059 return current_sched_info->sched_max_insns_priority;
27062 return priority;
27065 /* Model decoder of Core 2/i7.
27066 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
27067 track the instruction fetch block boundaries and make sure that long
27068 (9+ bytes) instructions are assigned to D0. */
27070 /* Maximum length of an insn that can be handled by
27071 a secondary decoder unit. '8' for Core 2/i7. */
27072 static int core2i7_secondary_decoder_max_insn_size;
27074 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
27075 '16' for Core 2/i7. */
27076 static int core2i7_ifetch_block_size;
27078 /* Maximum number of instructions decoder can handle per cycle.
27079 '6' for Core 2/i7. */
27080 static int core2i7_ifetch_block_max_insns;
27082 typedef struct ix86_first_cycle_multipass_data_ *
27083 ix86_first_cycle_multipass_data_t;
27084 typedef const struct ix86_first_cycle_multipass_data_ *
27085 const_ix86_first_cycle_multipass_data_t;
27087 /* A variable to store target state across calls to max_issue within
27088 one cycle. */
27089 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
27090 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
27092 /* Initialize DATA. */
27093 static void
27094 core2i7_first_cycle_multipass_init (void *_data)
27096 ix86_first_cycle_multipass_data_t data
27097 = (ix86_first_cycle_multipass_data_t) _data;
27099 data->ifetch_block_len = 0;
27100 data->ifetch_block_n_insns = 0;
27101 data->ready_try_change = NULL;
27102 data->ready_try_change_size = 0;
27105 /* Advancing the cycle; reset ifetch block counts. */
27106 static void
27107 core2i7_dfa_post_advance_cycle (void)
27109 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
27111 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27113 data->ifetch_block_len = 0;
27114 data->ifetch_block_n_insns = 0;
27117 static int min_insn_size (rtx_insn *);
27119 /* Filter out insns from ready_try that the core will not be able to issue
27120 on current cycle due to decoder. */
27121 static void
27122 core2i7_first_cycle_multipass_filter_ready_try
27123 (const_ix86_first_cycle_multipass_data_t data,
27124 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
27126 while (n_ready--)
27128 rtx_insn *insn;
27129 int insn_size;
27131 if (ready_try[n_ready])
27132 continue;
27134 insn = get_ready_element (n_ready);
27135 insn_size = min_insn_size (insn);
27137 if (/* If this is a too long an insn for a secondary decoder ... */
27138 (!first_cycle_insn_p
27139 && insn_size > core2i7_secondary_decoder_max_insn_size)
27140 /* ... or it would not fit into the ifetch block ... */
27141 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
27142 /* ... or the decoder is full already ... */
27143 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
27144 /* ... mask the insn out. */
27146 ready_try[n_ready] = 1;
27148 if (data->ready_try_change)
27149 bitmap_set_bit (data->ready_try_change, n_ready);
27154 /* Prepare for a new round of multipass lookahead scheduling. */
27155 static void
27156 core2i7_first_cycle_multipass_begin (void *_data,
27157 signed char *ready_try, int n_ready,
27158 bool first_cycle_insn_p)
27160 ix86_first_cycle_multipass_data_t data
27161 = (ix86_first_cycle_multipass_data_t) _data;
27162 const_ix86_first_cycle_multipass_data_t prev_data
27163 = ix86_first_cycle_multipass_data;
27165 /* Restore the state from the end of the previous round. */
27166 data->ifetch_block_len = prev_data->ifetch_block_len;
27167 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
27169 /* Filter instructions that cannot be issued on current cycle due to
27170 decoder restrictions. */
27171 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27172 first_cycle_insn_p);
27175 /* INSN is being issued in current solution. Account for its impact on
27176 the decoder model. */
27177 static void
27178 core2i7_first_cycle_multipass_issue (void *_data,
27179 signed char *ready_try, int n_ready,
27180 rtx_insn *insn, const void *_prev_data)
27182 ix86_first_cycle_multipass_data_t data
27183 = (ix86_first_cycle_multipass_data_t) _data;
27184 const_ix86_first_cycle_multipass_data_t prev_data
27185 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
27187 int insn_size = min_insn_size (insn);
27189 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
27190 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
27191 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
27192 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27194 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
27195 if (!data->ready_try_change)
27197 data->ready_try_change = sbitmap_alloc (n_ready);
27198 data->ready_try_change_size = n_ready;
27200 else if (data->ready_try_change_size < n_ready)
27202 data->ready_try_change = sbitmap_resize (data->ready_try_change,
27203 n_ready, 0);
27204 data->ready_try_change_size = n_ready;
27206 bitmap_clear (data->ready_try_change);
27208 /* Filter out insns from ready_try that the core will not be able to issue
27209 on current cycle due to decoder. */
27210 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27211 false);
27214 /* Revert the effect on ready_try. */
27215 static void
27216 core2i7_first_cycle_multipass_backtrack (const void *_data,
27217 signed char *ready_try,
27218 int n_ready ATTRIBUTE_UNUSED)
27220 const_ix86_first_cycle_multipass_data_t data
27221 = (const_ix86_first_cycle_multipass_data_t) _data;
27222 unsigned int i = 0;
27223 sbitmap_iterator sbi;
27225 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
27226 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
27228 ready_try[i] = 0;
27232 /* Save the result of multipass lookahead scheduling for the next round. */
27233 static void
27234 core2i7_first_cycle_multipass_end (const void *_data)
27236 const_ix86_first_cycle_multipass_data_t data
27237 = (const_ix86_first_cycle_multipass_data_t) _data;
27238 ix86_first_cycle_multipass_data_t next_data
27239 = ix86_first_cycle_multipass_data;
27241 if (data != NULL)
27243 next_data->ifetch_block_len = data->ifetch_block_len;
27244 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
27248 /* Deallocate target data. */
27249 static void
27250 core2i7_first_cycle_multipass_fini (void *_data)
27252 ix86_first_cycle_multipass_data_t data
27253 = (ix86_first_cycle_multipass_data_t) _data;
27255 if (data->ready_try_change)
27257 sbitmap_free (data->ready_try_change);
27258 data->ready_try_change = NULL;
27259 data->ready_try_change_size = 0;
27263 /* Prepare for scheduling pass. */
27264 static void
27265 ix86_sched_init_global (FILE *, int, int)
27267 /* Install scheduling hooks for current CPU. Some of these hooks are used
27268 in time-critical parts of the scheduler, so we only set them up when
27269 they are actually used. */
27270 switch (ix86_tune)
27272 case PROCESSOR_CORE2:
27273 case PROCESSOR_NEHALEM:
27274 case PROCESSOR_SANDYBRIDGE:
27275 case PROCESSOR_HASWELL:
27276 /* Do not perform multipass scheduling for pre-reload schedule
27277 to save compile time. */
27278 if (reload_completed)
27280 targetm.sched.dfa_post_advance_cycle
27281 = core2i7_dfa_post_advance_cycle;
27282 targetm.sched.first_cycle_multipass_init
27283 = core2i7_first_cycle_multipass_init;
27284 targetm.sched.first_cycle_multipass_begin
27285 = core2i7_first_cycle_multipass_begin;
27286 targetm.sched.first_cycle_multipass_issue
27287 = core2i7_first_cycle_multipass_issue;
27288 targetm.sched.first_cycle_multipass_backtrack
27289 = core2i7_first_cycle_multipass_backtrack;
27290 targetm.sched.first_cycle_multipass_end
27291 = core2i7_first_cycle_multipass_end;
27292 targetm.sched.first_cycle_multipass_fini
27293 = core2i7_first_cycle_multipass_fini;
27295 /* Set decoder parameters. */
27296 core2i7_secondary_decoder_max_insn_size = 8;
27297 core2i7_ifetch_block_size = 16;
27298 core2i7_ifetch_block_max_insns = 6;
27299 break;
27301 /* ... Fall through ... */
27302 default:
27303 targetm.sched.dfa_post_advance_cycle = NULL;
27304 targetm.sched.first_cycle_multipass_init = NULL;
27305 targetm.sched.first_cycle_multipass_begin = NULL;
27306 targetm.sched.first_cycle_multipass_issue = NULL;
27307 targetm.sched.first_cycle_multipass_backtrack = NULL;
27308 targetm.sched.first_cycle_multipass_end = NULL;
27309 targetm.sched.first_cycle_multipass_fini = NULL;
27310 break;
27315 /* Compute the alignment given to a constant that is being placed in memory.
27316 EXP is the constant and ALIGN is the alignment that the object would
27317 ordinarily have.
27318 The value of this function is used instead of that alignment to align
27319 the object. */
27322 ix86_constant_alignment (tree exp, int align)
27324 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
27325 || TREE_CODE (exp) == INTEGER_CST)
27327 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
27328 return 64;
27329 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
27330 return 128;
27332 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
27333 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
27334 return BITS_PER_WORD;
27336 return align;
27339 /* Compute the alignment for a static variable.
27340 TYPE is the data type, and ALIGN is the alignment that
27341 the object would ordinarily have. The value of this function is used
27342 instead of that alignment to align the object. */
27345 ix86_data_alignment (tree type, int align, bool opt)
27347 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
27348 for symbols from other compilation units or symbols that don't need
27349 to bind locally. In order to preserve some ABI compatibility with
27350 those compilers, ensure we don't decrease alignment from what we
27351 used to assume. */
27353 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
27355 /* A data structure, equal or greater than the size of a cache line
27356 (64 bytes in the Pentium 4 and other recent Intel processors, including
27357 processors based on Intel Core microarchitecture) should be aligned
27358 so that its base address is a multiple of a cache line size. */
27360 int max_align
27361 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
27363 if (max_align < BITS_PER_WORD)
27364 max_align = BITS_PER_WORD;
27366 switch (ix86_align_data_type)
27368 case ix86_align_data_type_abi: opt = false; break;
27369 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
27370 case ix86_align_data_type_cacheline: break;
27373 if (opt
27374 && AGGREGATE_TYPE_P (type)
27375 && TYPE_SIZE (type)
27376 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
27378 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
27379 && align < max_align_compat)
27380 align = max_align_compat;
27381 if (wi::geu_p (TYPE_SIZE (type), max_align)
27382 && align < max_align)
27383 align = max_align;
27386 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27387 to 16byte boundary. */
27388 if (TARGET_64BIT)
27390 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
27391 && TYPE_SIZE (type)
27392 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27393 && wi::geu_p (TYPE_SIZE (type), 128)
27394 && align < 128)
27395 return 128;
27398 if (!opt)
27399 return align;
27401 if (TREE_CODE (type) == ARRAY_TYPE)
27403 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27404 return 64;
27405 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27406 return 128;
27408 else if (TREE_CODE (type) == COMPLEX_TYPE)
27411 if (TYPE_MODE (type) == DCmode && align < 64)
27412 return 64;
27413 if ((TYPE_MODE (type) == XCmode
27414 || TYPE_MODE (type) == TCmode) && align < 128)
27415 return 128;
27417 else if ((TREE_CODE (type) == RECORD_TYPE
27418 || TREE_CODE (type) == UNION_TYPE
27419 || TREE_CODE (type) == QUAL_UNION_TYPE)
27420 && TYPE_FIELDS (type))
27422 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27423 return 64;
27424 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27425 return 128;
27427 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27428 || TREE_CODE (type) == INTEGER_TYPE)
27430 if (TYPE_MODE (type) == DFmode && align < 64)
27431 return 64;
27432 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27433 return 128;
27436 return align;
27439 /* Compute the alignment for a local variable or a stack slot. EXP is
27440 the data type or decl itself, MODE is the widest mode available and
27441 ALIGN is the alignment that the object would ordinarily have. The
27442 value of this macro is used instead of that alignment to align the
27443 object. */
27445 unsigned int
27446 ix86_local_alignment (tree exp, machine_mode mode,
27447 unsigned int align)
27449 tree type, decl;
27451 if (exp && DECL_P (exp))
27453 type = TREE_TYPE (exp);
27454 decl = exp;
27456 else
27458 type = exp;
27459 decl = NULL;
27462 /* Don't do dynamic stack realignment for long long objects with
27463 -mpreferred-stack-boundary=2. */
27464 if (!TARGET_64BIT
27465 && align == 64
27466 && ix86_preferred_stack_boundary < 64
27467 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
27468 && (!type || !TYPE_USER_ALIGN (type))
27469 && (!decl || !DECL_USER_ALIGN (decl)))
27470 align = 32;
27472 /* If TYPE is NULL, we are allocating a stack slot for caller-save
27473 register in MODE. We will return the largest alignment of XF
27474 and DF. */
27475 if (!type)
27477 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
27478 align = GET_MODE_ALIGNMENT (DFmode);
27479 return align;
27482 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27483 to 16byte boundary. Exact wording is:
27485 An array uses the same alignment as its elements, except that a local or
27486 global array variable of length at least 16 bytes or
27487 a C99 variable-length array variable always has alignment of at least 16 bytes.
27489 This was added to allow use of aligned SSE instructions at arrays. This
27490 rule is meant for static storage (where compiler can not do the analysis
27491 by itself). We follow it for automatic variables only when convenient.
27492 We fully control everything in the function compiled and functions from
27493 other unit can not rely on the alignment.
27495 Exclude va_list type. It is the common case of local array where
27496 we can not benefit from the alignment.
27498 TODO: Probably one should optimize for size only when var is not escaping. */
27499 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
27500 && TARGET_SSE)
27502 if (AGGREGATE_TYPE_P (type)
27503 && (va_list_type_node == NULL_TREE
27504 || (TYPE_MAIN_VARIANT (type)
27505 != TYPE_MAIN_VARIANT (va_list_type_node)))
27506 && TYPE_SIZE (type)
27507 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27508 && wi::geu_p (TYPE_SIZE (type), 16)
27509 && align < 128)
27510 return 128;
27512 if (TREE_CODE (type) == ARRAY_TYPE)
27514 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27515 return 64;
27516 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27517 return 128;
27519 else if (TREE_CODE (type) == COMPLEX_TYPE)
27521 if (TYPE_MODE (type) == DCmode && align < 64)
27522 return 64;
27523 if ((TYPE_MODE (type) == XCmode
27524 || TYPE_MODE (type) == TCmode) && align < 128)
27525 return 128;
27527 else if ((TREE_CODE (type) == RECORD_TYPE
27528 || TREE_CODE (type) == UNION_TYPE
27529 || TREE_CODE (type) == QUAL_UNION_TYPE)
27530 && TYPE_FIELDS (type))
27532 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27533 return 64;
27534 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27535 return 128;
27537 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27538 || TREE_CODE (type) == INTEGER_TYPE)
27541 if (TYPE_MODE (type) == DFmode && align < 64)
27542 return 64;
27543 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27544 return 128;
27546 return align;
27549 /* Compute the minimum required alignment for dynamic stack realignment
27550 purposes for a local variable, parameter or a stack slot. EXP is
27551 the data type or decl itself, MODE is its mode and ALIGN is the
27552 alignment that the object would ordinarily have. */
27554 unsigned int
27555 ix86_minimum_alignment (tree exp, machine_mode mode,
27556 unsigned int align)
27558 tree type, decl;
27560 if (exp && DECL_P (exp))
27562 type = TREE_TYPE (exp);
27563 decl = exp;
27565 else
27567 type = exp;
27568 decl = NULL;
27571 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27572 return align;
27574 /* Don't do dynamic stack realignment for long long objects with
27575 -mpreferred-stack-boundary=2. */
27576 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27577 && (!type || !TYPE_USER_ALIGN (type))
27578 && (!decl || !DECL_USER_ALIGN (decl)))
27579 return 32;
27581 return align;
27584 /* Find a location for the static chain incoming to a nested function.
27585 This is a register, unless all free registers are used by arguments. */
27587 static rtx
27588 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
27590 unsigned regno;
27592 /* While this function won't be called by the middle-end when a static
27593 chain isn't needed, it's also used throughout the backend so it's
27594 easiest to keep this check centralized. */
27595 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
27596 return NULL;
27598 if (TARGET_64BIT)
27600 /* We always use R10 in 64-bit mode. */
27601 regno = R10_REG;
27603 else
27605 const_tree fntype, fndecl;
27606 unsigned int ccvt;
27608 /* By default in 32-bit mode we use ECX to pass the static chain. */
27609 regno = CX_REG;
27611 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
27613 fntype = TREE_TYPE (fndecl_or_type);
27614 fndecl = fndecl_or_type;
27616 else
27618 fntype = fndecl_or_type;
27619 fndecl = NULL;
27622 ccvt = ix86_get_callcvt (fntype);
27623 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27625 /* Fastcall functions use ecx/edx for arguments, which leaves
27626 us with EAX for the static chain.
27627 Thiscall functions use ecx for arguments, which also
27628 leaves us with EAX for the static chain. */
27629 regno = AX_REG;
27631 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27633 /* Thiscall functions use ecx for arguments, which leaves
27634 us with EAX and EDX for the static chain.
27635 We are using for abi-compatibility EAX. */
27636 regno = AX_REG;
27638 else if (ix86_function_regparm (fntype, fndecl) == 3)
27640 /* For regparm 3, we have no free call-clobbered registers in
27641 which to store the static chain. In order to implement this,
27642 we have the trampoline push the static chain to the stack.
27643 However, we can't push a value below the return address when
27644 we call the nested function directly, so we have to use an
27645 alternate entry point. For this we use ESI, and have the
27646 alternate entry point push ESI, so that things appear the
27647 same once we're executing the nested function. */
27648 if (incoming_p)
27650 if (fndecl == current_function_decl)
27651 ix86_static_chain_on_stack = true;
27652 return gen_frame_mem (SImode,
27653 plus_constant (Pmode,
27654 arg_pointer_rtx, -8));
27656 regno = SI_REG;
27660 return gen_rtx_REG (Pmode, regno);
27663 /* Emit RTL insns to initialize the variable parts of a trampoline.
27664 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27665 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27666 to be passed to the target function. */
27668 static void
27669 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27671 rtx mem, fnaddr;
27672 int opcode;
27673 int offset = 0;
27675 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27677 if (TARGET_64BIT)
27679 int size;
27681 /* Load the function address to r11. Try to load address using
27682 the shorter movl instead of movabs. We may want to support
27683 movq for kernel mode, but kernel does not use trampolines at
27684 the moment. FNADDR is a 32bit address and may not be in
27685 DImode when ptr_mode == SImode. Always use movl in this
27686 case. */
27687 if (ptr_mode == SImode
27688 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27690 fnaddr = copy_addr_to_reg (fnaddr);
27692 mem = adjust_address (m_tramp, HImode, offset);
27693 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27695 mem = adjust_address (m_tramp, SImode, offset + 2);
27696 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27697 offset += 6;
27699 else
27701 mem = adjust_address (m_tramp, HImode, offset);
27702 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27704 mem = adjust_address (m_tramp, DImode, offset + 2);
27705 emit_move_insn (mem, fnaddr);
27706 offset += 10;
27709 /* Load static chain using movabs to r10. Use the shorter movl
27710 instead of movabs when ptr_mode == SImode. */
27711 if (ptr_mode == SImode)
27713 opcode = 0xba41;
27714 size = 6;
27716 else
27718 opcode = 0xba49;
27719 size = 10;
27722 mem = adjust_address (m_tramp, HImode, offset);
27723 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27725 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27726 emit_move_insn (mem, chain_value);
27727 offset += size;
27729 /* Jump to r11; the last (unused) byte is a nop, only there to
27730 pad the write out to a single 32-bit store. */
27731 mem = adjust_address (m_tramp, SImode, offset);
27732 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27733 offset += 4;
27735 else
27737 rtx disp, chain;
27739 /* Depending on the static chain location, either load a register
27740 with a constant, or push the constant to the stack. All of the
27741 instructions are the same size. */
27742 chain = ix86_static_chain (fndecl, true);
27743 if (REG_P (chain))
27745 switch (REGNO (chain))
27747 case AX_REG:
27748 opcode = 0xb8; break;
27749 case CX_REG:
27750 opcode = 0xb9; break;
27751 default:
27752 gcc_unreachable ();
27755 else
27756 opcode = 0x68;
27758 mem = adjust_address (m_tramp, QImode, offset);
27759 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27761 mem = adjust_address (m_tramp, SImode, offset + 1);
27762 emit_move_insn (mem, chain_value);
27763 offset += 5;
27765 mem = adjust_address (m_tramp, QImode, offset);
27766 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27768 mem = adjust_address (m_tramp, SImode, offset + 1);
27770 /* Compute offset from the end of the jmp to the target function.
27771 In the case in which the trampoline stores the static chain on
27772 the stack, we need to skip the first insn which pushes the
27773 (call-saved) register static chain; this push is 1 byte. */
27774 offset += 5;
27775 disp = expand_binop (SImode, sub_optab, fnaddr,
27776 plus_constant (Pmode, XEXP (m_tramp, 0),
27777 offset - (MEM_P (chain) ? 1 : 0)),
27778 NULL_RTX, 1, OPTAB_DIRECT);
27779 emit_move_insn (mem, disp);
27782 gcc_assert (offset <= TRAMPOLINE_SIZE);
27784 #ifdef HAVE_ENABLE_EXECUTE_STACK
27785 #ifdef CHECK_EXECUTE_STACK_ENABLED
27786 if (CHECK_EXECUTE_STACK_ENABLED)
27787 #endif
27788 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27789 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27790 #endif
27793 /* The following file contains several enumerations and data structures
27794 built from the definitions in i386-builtin-types.def. */
27796 #include "i386-builtin-types.inc"
27798 /* Table for the ix86 builtin non-function types. */
27799 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27801 /* Retrieve an element from the above table, building some of
27802 the types lazily. */
27804 static tree
27805 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27807 unsigned int index;
27808 tree type, itype;
27810 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27812 type = ix86_builtin_type_tab[(int) tcode];
27813 if (type != NULL)
27814 return type;
27816 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27817 if (tcode <= IX86_BT_LAST_VECT)
27819 machine_mode mode;
27821 index = tcode - IX86_BT_LAST_PRIM - 1;
27822 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27823 mode = ix86_builtin_type_vect_mode[index];
27825 type = build_vector_type_for_mode (itype, mode);
27827 else
27829 int quals;
27831 index = tcode - IX86_BT_LAST_VECT - 1;
27832 if (tcode <= IX86_BT_LAST_PTR)
27833 quals = TYPE_UNQUALIFIED;
27834 else
27835 quals = TYPE_QUAL_CONST;
27837 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27838 if (quals != TYPE_UNQUALIFIED)
27839 itype = build_qualified_type (itype, quals);
27841 type = build_pointer_type (itype);
27844 ix86_builtin_type_tab[(int) tcode] = type;
27845 return type;
27848 /* Table for the ix86 builtin function types. */
27849 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27851 /* Retrieve an element from the above table, building some of
27852 the types lazily. */
27854 static tree
27855 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27857 tree type;
27859 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27861 type = ix86_builtin_func_type_tab[(int) tcode];
27862 if (type != NULL)
27863 return type;
27865 if (tcode <= IX86_BT_LAST_FUNC)
27867 unsigned start = ix86_builtin_func_start[(int) tcode];
27868 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27869 tree rtype, atype, args = void_list_node;
27870 unsigned i;
27872 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27873 for (i = after - 1; i > start; --i)
27875 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27876 args = tree_cons (NULL, atype, args);
27879 type = build_function_type (rtype, args);
27881 else
27883 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27884 enum ix86_builtin_func_type icode;
27886 icode = ix86_builtin_func_alias_base[index];
27887 type = ix86_get_builtin_func_type (icode);
27890 ix86_builtin_func_type_tab[(int) tcode] = type;
27891 return type;
27895 /* Codes for all the SSE/MMX builtins. */
27896 enum ix86_builtins
27898 IX86_BUILTIN_ADDPS,
27899 IX86_BUILTIN_ADDSS,
27900 IX86_BUILTIN_DIVPS,
27901 IX86_BUILTIN_DIVSS,
27902 IX86_BUILTIN_MULPS,
27903 IX86_BUILTIN_MULSS,
27904 IX86_BUILTIN_SUBPS,
27905 IX86_BUILTIN_SUBSS,
27907 IX86_BUILTIN_CMPEQPS,
27908 IX86_BUILTIN_CMPLTPS,
27909 IX86_BUILTIN_CMPLEPS,
27910 IX86_BUILTIN_CMPGTPS,
27911 IX86_BUILTIN_CMPGEPS,
27912 IX86_BUILTIN_CMPNEQPS,
27913 IX86_BUILTIN_CMPNLTPS,
27914 IX86_BUILTIN_CMPNLEPS,
27915 IX86_BUILTIN_CMPNGTPS,
27916 IX86_BUILTIN_CMPNGEPS,
27917 IX86_BUILTIN_CMPORDPS,
27918 IX86_BUILTIN_CMPUNORDPS,
27919 IX86_BUILTIN_CMPEQSS,
27920 IX86_BUILTIN_CMPLTSS,
27921 IX86_BUILTIN_CMPLESS,
27922 IX86_BUILTIN_CMPNEQSS,
27923 IX86_BUILTIN_CMPNLTSS,
27924 IX86_BUILTIN_CMPNLESS,
27925 IX86_BUILTIN_CMPORDSS,
27926 IX86_BUILTIN_CMPUNORDSS,
27928 IX86_BUILTIN_COMIEQSS,
27929 IX86_BUILTIN_COMILTSS,
27930 IX86_BUILTIN_COMILESS,
27931 IX86_BUILTIN_COMIGTSS,
27932 IX86_BUILTIN_COMIGESS,
27933 IX86_BUILTIN_COMINEQSS,
27934 IX86_BUILTIN_UCOMIEQSS,
27935 IX86_BUILTIN_UCOMILTSS,
27936 IX86_BUILTIN_UCOMILESS,
27937 IX86_BUILTIN_UCOMIGTSS,
27938 IX86_BUILTIN_UCOMIGESS,
27939 IX86_BUILTIN_UCOMINEQSS,
27941 IX86_BUILTIN_CVTPI2PS,
27942 IX86_BUILTIN_CVTPS2PI,
27943 IX86_BUILTIN_CVTSI2SS,
27944 IX86_BUILTIN_CVTSI642SS,
27945 IX86_BUILTIN_CVTSS2SI,
27946 IX86_BUILTIN_CVTSS2SI64,
27947 IX86_BUILTIN_CVTTPS2PI,
27948 IX86_BUILTIN_CVTTSS2SI,
27949 IX86_BUILTIN_CVTTSS2SI64,
27951 IX86_BUILTIN_MAXPS,
27952 IX86_BUILTIN_MAXSS,
27953 IX86_BUILTIN_MINPS,
27954 IX86_BUILTIN_MINSS,
27956 IX86_BUILTIN_LOADUPS,
27957 IX86_BUILTIN_STOREUPS,
27958 IX86_BUILTIN_MOVSS,
27960 IX86_BUILTIN_MOVHLPS,
27961 IX86_BUILTIN_MOVLHPS,
27962 IX86_BUILTIN_LOADHPS,
27963 IX86_BUILTIN_LOADLPS,
27964 IX86_BUILTIN_STOREHPS,
27965 IX86_BUILTIN_STORELPS,
27967 IX86_BUILTIN_MASKMOVQ,
27968 IX86_BUILTIN_MOVMSKPS,
27969 IX86_BUILTIN_PMOVMSKB,
27971 IX86_BUILTIN_MOVNTPS,
27972 IX86_BUILTIN_MOVNTQ,
27974 IX86_BUILTIN_LOADDQU,
27975 IX86_BUILTIN_STOREDQU,
27977 IX86_BUILTIN_PACKSSWB,
27978 IX86_BUILTIN_PACKSSDW,
27979 IX86_BUILTIN_PACKUSWB,
27981 IX86_BUILTIN_PADDB,
27982 IX86_BUILTIN_PADDW,
27983 IX86_BUILTIN_PADDD,
27984 IX86_BUILTIN_PADDQ,
27985 IX86_BUILTIN_PADDSB,
27986 IX86_BUILTIN_PADDSW,
27987 IX86_BUILTIN_PADDUSB,
27988 IX86_BUILTIN_PADDUSW,
27989 IX86_BUILTIN_PSUBB,
27990 IX86_BUILTIN_PSUBW,
27991 IX86_BUILTIN_PSUBD,
27992 IX86_BUILTIN_PSUBQ,
27993 IX86_BUILTIN_PSUBSB,
27994 IX86_BUILTIN_PSUBSW,
27995 IX86_BUILTIN_PSUBUSB,
27996 IX86_BUILTIN_PSUBUSW,
27998 IX86_BUILTIN_PAND,
27999 IX86_BUILTIN_PANDN,
28000 IX86_BUILTIN_POR,
28001 IX86_BUILTIN_PXOR,
28003 IX86_BUILTIN_PAVGB,
28004 IX86_BUILTIN_PAVGW,
28006 IX86_BUILTIN_PCMPEQB,
28007 IX86_BUILTIN_PCMPEQW,
28008 IX86_BUILTIN_PCMPEQD,
28009 IX86_BUILTIN_PCMPGTB,
28010 IX86_BUILTIN_PCMPGTW,
28011 IX86_BUILTIN_PCMPGTD,
28013 IX86_BUILTIN_PMADDWD,
28015 IX86_BUILTIN_PMAXSW,
28016 IX86_BUILTIN_PMAXUB,
28017 IX86_BUILTIN_PMINSW,
28018 IX86_BUILTIN_PMINUB,
28020 IX86_BUILTIN_PMULHUW,
28021 IX86_BUILTIN_PMULHW,
28022 IX86_BUILTIN_PMULLW,
28024 IX86_BUILTIN_PSADBW,
28025 IX86_BUILTIN_PSHUFW,
28027 IX86_BUILTIN_PSLLW,
28028 IX86_BUILTIN_PSLLD,
28029 IX86_BUILTIN_PSLLQ,
28030 IX86_BUILTIN_PSRAW,
28031 IX86_BUILTIN_PSRAD,
28032 IX86_BUILTIN_PSRLW,
28033 IX86_BUILTIN_PSRLD,
28034 IX86_BUILTIN_PSRLQ,
28035 IX86_BUILTIN_PSLLWI,
28036 IX86_BUILTIN_PSLLDI,
28037 IX86_BUILTIN_PSLLQI,
28038 IX86_BUILTIN_PSRAWI,
28039 IX86_BUILTIN_PSRADI,
28040 IX86_BUILTIN_PSRLWI,
28041 IX86_BUILTIN_PSRLDI,
28042 IX86_BUILTIN_PSRLQI,
28044 IX86_BUILTIN_PUNPCKHBW,
28045 IX86_BUILTIN_PUNPCKHWD,
28046 IX86_BUILTIN_PUNPCKHDQ,
28047 IX86_BUILTIN_PUNPCKLBW,
28048 IX86_BUILTIN_PUNPCKLWD,
28049 IX86_BUILTIN_PUNPCKLDQ,
28051 IX86_BUILTIN_SHUFPS,
28053 IX86_BUILTIN_RCPPS,
28054 IX86_BUILTIN_RCPSS,
28055 IX86_BUILTIN_RSQRTPS,
28056 IX86_BUILTIN_RSQRTPS_NR,
28057 IX86_BUILTIN_RSQRTSS,
28058 IX86_BUILTIN_RSQRTF,
28059 IX86_BUILTIN_SQRTPS,
28060 IX86_BUILTIN_SQRTPS_NR,
28061 IX86_BUILTIN_SQRTSS,
28063 IX86_BUILTIN_UNPCKHPS,
28064 IX86_BUILTIN_UNPCKLPS,
28066 IX86_BUILTIN_ANDPS,
28067 IX86_BUILTIN_ANDNPS,
28068 IX86_BUILTIN_ORPS,
28069 IX86_BUILTIN_XORPS,
28071 IX86_BUILTIN_EMMS,
28072 IX86_BUILTIN_LDMXCSR,
28073 IX86_BUILTIN_STMXCSR,
28074 IX86_BUILTIN_SFENCE,
28076 IX86_BUILTIN_FXSAVE,
28077 IX86_BUILTIN_FXRSTOR,
28078 IX86_BUILTIN_FXSAVE64,
28079 IX86_BUILTIN_FXRSTOR64,
28081 IX86_BUILTIN_XSAVE,
28082 IX86_BUILTIN_XRSTOR,
28083 IX86_BUILTIN_XSAVE64,
28084 IX86_BUILTIN_XRSTOR64,
28086 IX86_BUILTIN_XSAVEOPT,
28087 IX86_BUILTIN_XSAVEOPT64,
28089 IX86_BUILTIN_XSAVEC,
28090 IX86_BUILTIN_XSAVEC64,
28092 IX86_BUILTIN_XSAVES,
28093 IX86_BUILTIN_XRSTORS,
28094 IX86_BUILTIN_XSAVES64,
28095 IX86_BUILTIN_XRSTORS64,
28097 /* 3DNow! Original */
28098 IX86_BUILTIN_FEMMS,
28099 IX86_BUILTIN_PAVGUSB,
28100 IX86_BUILTIN_PF2ID,
28101 IX86_BUILTIN_PFACC,
28102 IX86_BUILTIN_PFADD,
28103 IX86_BUILTIN_PFCMPEQ,
28104 IX86_BUILTIN_PFCMPGE,
28105 IX86_BUILTIN_PFCMPGT,
28106 IX86_BUILTIN_PFMAX,
28107 IX86_BUILTIN_PFMIN,
28108 IX86_BUILTIN_PFMUL,
28109 IX86_BUILTIN_PFRCP,
28110 IX86_BUILTIN_PFRCPIT1,
28111 IX86_BUILTIN_PFRCPIT2,
28112 IX86_BUILTIN_PFRSQIT1,
28113 IX86_BUILTIN_PFRSQRT,
28114 IX86_BUILTIN_PFSUB,
28115 IX86_BUILTIN_PFSUBR,
28116 IX86_BUILTIN_PI2FD,
28117 IX86_BUILTIN_PMULHRW,
28119 /* 3DNow! Athlon Extensions */
28120 IX86_BUILTIN_PF2IW,
28121 IX86_BUILTIN_PFNACC,
28122 IX86_BUILTIN_PFPNACC,
28123 IX86_BUILTIN_PI2FW,
28124 IX86_BUILTIN_PSWAPDSI,
28125 IX86_BUILTIN_PSWAPDSF,
28127 /* SSE2 */
28128 IX86_BUILTIN_ADDPD,
28129 IX86_BUILTIN_ADDSD,
28130 IX86_BUILTIN_DIVPD,
28131 IX86_BUILTIN_DIVSD,
28132 IX86_BUILTIN_MULPD,
28133 IX86_BUILTIN_MULSD,
28134 IX86_BUILTIN_SUBPD,
28135 IX86_BUILTIN_SUBSD,
28137 IX86_BUILTIN_CMPEQPD,
28138 IX86_BUILTIN_CMPLTPD,
28139 IX86_BUILTIN_CMPLEPD,
28140 IX86_BUILTIN_CMPGTPD,
28141 IX86_BUILTIN_CMPGEPD,
28142 IX86_BUILTIN_CMPNEQPD,
28143 IX86_BUILTIN_CMPNLTPD,
28144 IX86_BUILTIN_CMPNLEPD,
28145 IX86_BUILTIN_CMPNGTPD,
28146 IX86_BUILTIN_CMPNGEPD,
28147 IX86_BUILTIN_CMPORDPD,
28148 IX86_BUILTIN_CMPUNORDPD,
28149 IX86_BUILTIN_CMPEQSD,
28150 IX86_BUILTIN_CMPLTSD,
28151 IX86_BUILTIN_CMPLESD,
28152 IX86_BUILTIN_CMPNEQSD,
28153 IX86_BUILTIN_CMPNLTSD,
28154 IX86_BUILTIN_CMPNLESD,
28155 IX86_BUILTIN_CMPORDSD,
28156 IX86_BUILTIN_CMPUNORDSD,
28158 IX86_BUILTIN_COMIEQSD,
28159 IX86_BUILTIN_COMILTSD,
28160 IX86_BUILTIN_COMILESD,
28161 IX86_BUILTIN_COMIGTSD,
28162 IX86_BUILTIN_COMIGESD,
28163 IX86_BUILTIN_COMINEQSD,
28164 IX86_BUILTIN_UCOMIEQSD,
28165 IX86_BUILTIN_UCOMILTSD,
28166 IX86_BUILTIN_UCOMILESD,
28167 IX86_BUILTIN_UCOMIGTSD,
28168 IX86_BUILTIN_UCOMIGESD,
28169 IX86_BUILTIN_UCOMINEQSD,
28171 IX86_BUILTIN_MAXPD,
28172 IX86_BUILTIN_MAXSD,
28173 IX86_BUILTIN_MINPD,
28174 IX86_BUILTIN_MINSD,
28176 IX86_BUILTIN_ANDPD,
28177 IX86_BUILTIN_ANDNPD,
28178 IX86_BUILTIN_ORPD,
28179 IX86_BUILTIN_XORPD,
28181 IX86_BUILTIN_SQRTPD,
28182 IX86_BUILTIN_SQRTSD,
28184 IX86_BUILTIN_UNPCKHPD,
28185 IX86_BUILTIN_UNPCKLPD,
28187 IX86_BUILTIN_SHUFPD,
28189 IX86_BUILTIN_LOADUPD,
28190 IX86_BUILTIN_STOREUPD,
28191 IX86_BUILTIN_MOVSD,
28193 IX86_BUILTIN_LOADHPD,
28194 IX86_BUILTIN_LOADLPD,
28196 IX86_BUILTIN_CVTDQ2PD,
28197 IX86_BUILTIN_CVTDQ2PS,
28199 IX86_BUILTIN_CVTPD2DQ,
28200 IX86_BUILTIN_CVTPD2PI,
28201 IX86_BUILTIN_CVTPD2PS,
28202 IX86_BUILTIN_CVTTPD2DQ,
28203 IX86_BUILTIN_CVTTPD2PI,
28205 IX86_BUILTIN_CVTPI2PD,
28206 IX86_BUILTIN_CVTSI2SD,
28207 IX86_BUILTIN_CVTSI642SD,
28209 IX86_BUILTIN_CVTSD2SI,
28210 IX86_BUILTIN_CVTSD2SI64,
28211 IX86_BUILTIN_CVTSD2SS,
28212 IX86_BUILTIN_CVTSS2SD,
28213 IX86_BUILTIN_CVTTSD2SI,
28214 IX86_BUILTIN_CVTTSD2SI64,
28216 IX86_BUILTIN_CVTPS2DQ,
28217 IX86_BUILTIN_CVTPS2PD,
28218 IX86_BUILTIN_CVTTPS2DQ,
28220 IX86_BUILTIN_MOVNTI,
28221 IX86_BUILTIN_MOVNTI64,
28222 IX86_BUILTIN_MOVNTPD,
28223 IX86_BUILTIN_MOVNTDQ,
28225 IX86_BUILTIN_MOVQ128,
28227 /* SSE2 MMX */
28228 IX86_BUILTIN_MASKMOVDQU,
28229 IX86_BUILTIN_MOVMSKPD,
28230 IX86_BUILTIN_PMOVMSKB128,
28232 IX86_BUILTIN_PACKSSWB128,
28233 IX86_BUILTIN_PACKSSDW128,
28234 IX86_BUILTIN_PACKUSWB128,
28236 IX86_BUILTIN_PADDB128,
28237 IX86_BUILTIN_PADDW128,
28238 IX86_BUILTIN_PADDD128,
28239 IX86_BUILTIN_PADDQ128,
28240 IX86_BUILTIN_PADDSB128,
28241 IX86_BUILTIN_PADDSW128,
28242 IX86_BUILTIN_PADDUSB128,
28243 IX86_BUILTIN_PADDUSW128,
28244 IX86_BUILTIN_PSUBB128,
28245 IX86_BUILTIN_PSUBW128,
28246 IX86_BUILTIN_PSUBD128,
28247 IX86_BUILTIN_PSUBQ128,
28248 IX86_BUILTIN_PSUBSB128,
28249 IX86_BUILTIN_PSUBSW128,
28250 IX86_BUILTIN_PSUBUSB128,
28251 IX86_BUILTIN_PSUBUSW128,
28253 IX86_BUILTIN_PAND128,
28254 IX86_BUILTIN_PANDN128,
28255 IX86_BUILTIN_POR128,
28256 IX86_BUILTIN_PXOR128,
28258 IX86_BUILTIN_PAVGB128,
28259 IX86_BUILTIN_PAVGW128,
28261 IX86_BUILTIN_PCMPEQB128,
28262 IX86_BUILTIN_PCMPEQW128,
28263 IX86_BUILTIN_PCMPEQD128,
28264 IX86_BUILTIN_PCMPGTB128,
28265 IX86_BUILTIN_PCMPGTW128,
28266 IX86_BUILTIN_PCMPGTD128,
28268 IX86_BUILTIN_PMADDWD128,
28270 IX86_BUILTIN_PMAXSW128,
28271 IX86_BUILTIN_PMAXUB128,
28272 IX86_BUILTIN_PMINSW128,
28273 IX86_BUILTIN_PMINUB128,
28275 IX86_BUILTIN_PMULUDQ,
28276 IX86_BUILTIN_PMULUDQ128,
28277 IX86_BUILTIN_PMULHUW128,
28278 IX86_BUILTIN_PMULHW128,
28279 IX86_BUILTIN_PMULLW128,
28281 IX86_BUILTIN_PSADBW128,
28282 IX86_BUILTIN_PSHUFHW,
28283 IX86_BUILTIN_PSHUFLW,
28284 IX86_BUILTIN_PSHUFD,
28286 IX86_BUILTIN_PSLLDQI128,
28287 IX86_BUILTIN_PSLLWI128,
28288 IX86_BUILTIN_PSLLDI128,
28289 IX86_BUILTIN_PSLLQI128,
28290 IX86_BUILTIN_PSRAWI128,
28291 IX86_BUILTIN_PSRADI128,
28292 IX86_BUILTIN_PSRLDQI128,
28293 IX86_BUILTIN_PSRLWI128,
28294 IX86_BUILTIN_PSRLDI128,
28295 IX86_BUILTIN_PSRLQI128,
28297 IX86_BUILTIN_PSLLDQ128,
28298 IX86_BUILTIN_PSLLW128,
28299 IX86_BUILTIN_PSLLD128,
28300 IX86_BUILTIN_PSLLQ128,
28301 IX86_BUILTIN_PSRAW128,
28302 IX86_BUILTIN_PSRAD128,
28303 IX86_BUILTIN_PSRLW128,
28304 IX86_BUILTIN_PSRLD128,
28305 IX86_BUILTIN_PSRLQ128,
28307 IX86_BUILTIN_PUNPCKHBW128,
28308 IX86_BUILTIN_PUNPCKHWD128,
28309 IX86_BUILTIN_PUNPCKHDQ128,
28310 IX86_BUILTIN_PUNPCKHQDQ128,
28311 IX86_BUILTIN_PUNPCKLBW128,
28312 IX86_BUILTIN_PUNPCKLWD128,
28313 IX86_BUILTIN_PUNPCKLDQ128,
28314 IX86_BUILTIN_PUNPCKLQDQ128,
28316 IX86_BUILTIN_CLFLUSH,
28317 IX86_BUILTIN_MFENCE,
28318 IX86_BUILTIN_LFENCE,
28319 IX86_BUILTIN_PAUSE,
28321 IX86_BUILTIN_FNSTENV,
28322 IX86_BUILTIN_FLDENV,
28323 IX86_BUILTIN_FNSTSW,
28324 IX86_BUILTIN_FNCLEX,
28326 IX86_BUILTIN_BSRSI,
28327 IX86_BUILTIN_BSRDI,
28328 IX86_BUILTIN_RDPMC,
28329 IX86_BUILTIN_RDTSC,
28330 IX86_BUILTIN_RDTSCP,
28331 IX86_BUILTIN_ROLQI,
28332 IX86_BUILTIN_ROLHI,
28333 IX86_BUILTIN_RORQI,
28334 IX86_BUILTIN_RORHI,
28336 /* SSE3. */
28337 IX86_BUILTIN_ADDSUBPS,
28338 IX86_BUILTIN_HADDPS,
28339 IX86_BUILTIN_HSUBPS,
28340 IX86_BUILTIN_MOVSHDUP,
28341 IX86_BUILTIN_MOVSLDUP,
28342 IX86_BUILTIN_ADDSUBPD,
28343 IX86_BUILTIN_HADDPD,
28344 IX86_BUILTIN_HSUBPD,
28345 IX86_BUILTIN_LDDQU,
28347 IX86_BUILTIN_MONITOR,
28348 IX86_BUILTIN_MWAIT,
28350 /* SSSE3. */
28351 IX86_BUILTIN_PHADDW,
28352 IX86_BUILTIN_PHADDD,
28353 IX86_BUILTIN_PHADDSW,
28354 IX86_BUILTIN_PHSUBW,
28355 IX86_BUILTIN_PHSUBD,
28356 IX86_BUILTIN_PHSUBSW,
28357 IX86_BUILTIN_PMADDUBSW,
28358 IX86_BUILTIN_PMULHRSW,
28359 IX86_BUILTIN_PSHUFB,
28360 IX86_BUILTIN_PSIGNB,
28361 IX86_BUILTIN_PSIGNW,
28362 IX86_BUILTIN_PSIGND,
28363 IX86_BUILTIN_PALIGNR,
28364 IX86_BUILTIN_PABSB,
28365 IX86_BUILTIN_PABSW,
28366 IX86_BUILTIN_PABSD,
28368 IX86_BUILTIN_PHADDW128,
28369 IX86_BUILTIN_PHADDD128,
28370 IX86_BUILTIN_PHADDSW128,
28371 IX86_BUILTIN_PHSUBW128,
28372 IX86_BUILTIN_PHSUBD128,
28373 IX86_BUILTIN_PHSUBSW128,
28374 IX86_BUILTIN_PMADDUBSW128,
28375 IX86_BUILTIN_PMULHRSW128,
28376 IX86_BUILTIN_PSHUFB128,
28377 IX86_BUILTIN_PSIGNB128,
28378 IX86_BUILTIN_PSIGNW128,
28379 IX86_BUILTIN_PSIGND128,
28380 IX86_BUILTIN_PALIGNR128,
28381 IX86_BUILTIN_PABSB128,
28382 IX86_BUILTIN_PABSW128,
28383 IX86_BUILTIN_PABSD128,
28385 /* AMDFAM10 - SSE4A New Instructions. */
28386 IX86_BUILTIN_MOVNTSD,
28387 IX86_BUILTIN_MOVNTSS,
28388 IX86_BUILTIN_EXTRQI,
28389 IX86_BUILTIN_EXTRQ,
28390 IX86_BUILTIN_INSERTQI,
28391 IX86_BUILTIN_INSERTQ,
28393 /* SSE4.1. */
28394 IX86_BUILTIN_BLENDPD,
28395 IX86_BUILTIN_BLENDPS,
28396 IX86_BUILTIN_BLENDVPD,
28397 IX86_BUILTIN_BLENDVPS,
28398 IX86_BUILTIN_PBLENDVB128,
28399 IX86_BUILTIN_PBLENDW128,
28401 IX86_BUILTIN_DPPD,
28402 IX86_BUILTIN_DPPS,
28404 IX86_BUILTIN_INSERTPS128,
28406 IX86_BUILTIN_MOVNTDQA,
28407 IX86_BUILTIN_MPSADBW128,
28408 IX86_BUILTIN_PACKUSDW128,
28409 IX86_BUILTIN_PCMPEQQ,
28410 IX86_BUILTIN_PHMINPOSUW128,
28412 IX86_BUILTIN_PMAXSB128,
28413 IX86_BUILTIN_PMAXSD128,
28414 IX86_BUILTIN_PMAXUD128,
28415 IX86_BUILTIN_PMAXUW128,
28417 IX86_BUILTIN_PMINSB128,
28418 IX86_BUILTIN_PMINSD128,
28419 IX86_BUILTIN_PMINUD128,
28420 IX86_BUILTIN_PMINUW128,
28422 IX86_BUILTIN_PMOVSXBW128,
28423 IX86_BUILTIN_PMOVSXBD128,
28424 IX86_BUILTIN_PMOVSXBQ128,
28425 IX86_BUILTIN_PMOVSXWD128,
28426 IX86_BUILTIN_PMOVSXWQ128,
28427 IX86_BUILTIN_PMOVSXDQ128,
28429 IX86_BUILTIN_PMOVZXBW128,
28430 IX86_BUILTIN_PMOVZXBD128,
28431 IX86_BUILTIN_PMOVZXBQ128,
28432 IX86_BUILTIN_PMOVZXWD128,
28433 IX86_BUILTIN_PMOVZXWQ128,
28434 IX86_BUILTIN_PMOVZXDQ128,
28436 IX86_BUILTIN_PMULDQ128,
28437 IX86_BUILTIN_PMULLD128,
28439 IX86_BUILTIN_ROUNDSD,
28440 IX86_BUILTIN_ROUNDSS,
28442 IX86_BUILTIN_ROUNDPD,
28443 IX86_BUILTIN_ROUNDPS,
28445 IX86_BUILTIN_FLOORPD,
28446 IX86_BUILTIN_CEILPD,
28447 IX86_BUILTIN_TRUNCPD,
28448 IX86_BUILTIN_RINTPD,
28449 IX86_BUILTIN_ROUNDPD_AZ,
28451 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
28452 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
28453 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
28455 IX86_BUILTIN_FLOORPS,
28456 IX86_BUILTIN_CEILPS,
28457 IX86_BUILTIN_TRUNCPS,
28458 IX86_BUILTIN_RINTPS,
28459 IX86_BUILTIN_ROUNDPS_AZ,
28461 IX86_BUILTIN_FLOORPS_SFIX,
28462 IX86_BUILTIN_CEILPS_SFIX,
28463 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
28465 IX86_BUILTIN_PTESTZ,
28466 IX86_BUILTIN_PTESTC,
28467 IX86_BUILTIN_PTESTNZC,
28469 IX86_BUILTIN_VEC_INIT_V2SI,
28470 IX86_BUILTIN_VEC_INIT_V4HI,
28471 IX86_BUILTIN_VEC_INIT_V8QI,
28472 IX86_BUILTIN_VEC_EXT_V2DF,
28473 IX86_BUILTIN_VEC_EXT_V2DI,
28474 IX86_BUILTIN_VEC_EXT_V4SF,
28475 IX86_BUILTIN_VEC_EXT_V4SI,
28476 IX86_BUILTIN_VEC_EXT_V8HI,
28477 IX86_BUILTIN_VEC_EXT_V2SI,
28478 IX86_BUILTIN_VEC_EXT_V4HI,
28479 IX86_BUILTIN_VEC_EXT_V16QI,
28480 IX86_BUILTIN_VEC_SET_V2DI,
28481 IX86_BUILTIN_VEC_SET_V4SF,
28482 IX86_BUILTIN_VEC_SET_V4SI,
28483 IX86_BUILTIN_VEC_SET_V8HI,
28484 IX86_BUILTIN_VEC_SET_V4HI,
28485 IX86_BUILTIN_VEC_SET_V16QI,
28487 IX86_BUILTIN_VEC_PACK_SFIX,
28488 IX86_BUILTIN_VEC_PACK_SFIX256,
28490 /* SSE4.2. */
28491 IX86_BUILTIN_CRC32QI,
28492 IX86_BUILTIN_CRC32HI,
28493 IX86_BUILTIN_CRC32SI,
28494 IX86_BUILTIN_CRC32DI,
28496 IX86_BUILTIN_PCMPESTRI128,
28497 IX86_BUILTIN_PCMPESTRM128,
28498 IX86_BUILTIN_PCMPESTRA128,
28499 IX86_BUILTIN_PCMPESTRC128,
28500 IX86_BUILTIN_PCMPESTRO128,
28501 IX86_BUILTIN_PCMPESTRS128,
28502 IX86_BUILTIN_PCMPESTRZ128,
28503 IX86_BUILTIN_PCMPISTRI128,
28504 IX86_BUILTIN_PCMPISTRM128,
28505 IX86_BUILTIN_PCMPISTRA128,
28506 IX86_BUILTIN_PCMPISTRC128,
28507 IX86_BUILTIN_PCMPISTRO128,
28508 IX86_BUILTIN_PCMPISTRS128,
28509 IX86_BUILTIN_PCMPISTRZ128,
28511 IX86_BUILTIN_PCMPGTQ,
28513 /* AES instructions */
28514 IX86_BUILTIN_AESENC128,
28515 IX86_BUILTIN_AESENCLAST128,
28516 IX86_BUILTIN_AESDEC128,
28517 IX86_BUILTIN_AESDECLAST128,
28518 IX86_BUILTIN_AESIMC128,
28519 IX86_BUILTIN_AESKEYGENASSIST128,
28521 /* PCLMUL instruction */
28522 IX86_BUILTIN_PCLMULQDQ128,
28524 /* AVX */
28525 IX86_BUILTIN_ADDPD256,
28526 IX86_BUILTIN_ADDPS256,
28527 IX86_BUILTIN_ADDSUBPD256,
28528 IX86_BUILTIN_ADDSUBPS256,
28529 IX86_BUILTIN_ANDPD256,
28530 IX86_BUILTIN_ANDPS256,
28531 IX86_BUILTIN_ANDNPD256,
28532 IX86_BUILTIN_ANDNPS256,
28533 IX86_BUILTIN_BLENDPD256,
28534 IX86_BUILTIN_BLENDPS256,
28535 IX86_BUILTIN_BLENDVPD256,
28536 IX86_BUILTIN_BLENDVPS256,
28537 IX86_BUILTIN_DIVPD256,
28538 IX86_BUILTIN_DIVPS256,
28539 IX86_BUILTIN_DPPS256,
28540 IX86_BUILTIN_HADDPD256,
28541 IX86_BUILTIN_HADDPS256,
28542 IX86_BUILTIN_HSUBPD256,
28543 IX86_BUILTIN_HSUBPS256,
28544 IX86_BUILTIN_MAXPD256,
28545 IX86_BUILTIN_MAXPS256,
28546 IX86_BUILTIN_MINPD256,
28547 IX86_BUILTIN_MINPS256,
28548 IX86_BUILTIN_MULPD256,
28549 IX86_BUILTIN_MULPS256,
28550 IX86_BUILTIN_ORPD256,
28551 IX86_BUILTIN_ORPS256,
28552 IX86_BUILTIN_SHUFPD256,
28553 IX86_BUILTIN_SHUFPS256,
28554 IX86_BUILTIN_SUBPD256,
28555 IX86_BUILTIN_SUBPS256,
28556 IX86_BUILTIN_XORPD256,
28557 IX86_BUILTIN_XORPS256,
28558 IX86_BUILTIN_CMPSD,
28559 IX86_BUILTIN_CMPSS,
28560 IX86_BUILTIN_CMPPD,
28561 IX86_BUILTIN_CMPPS,
28562 IX86_BUILTIN_CMPPD256,
28563 IX86_BUILTIN_CMPPS256,
28564 IX86_BUILTIN_CVTDQ2PD256,
28565 IX86_BUILTIN_CVTDQ2PS256,
28566 IX86_BUILTIN_CVTPD2PS256,
28567 IX86_BUILTIN_CVTPS2DQ256,
28568 IX86_BUILTIN_CVTPS2PD256,
28569 IX86_BUILTIN_CVTTPD2DQ256,
28570 IX86_BUILTIN_CVTPD2DQ256,
28571 IX86_BUILTIN_CVTTPS2DQ256,
28572 IX86_BUILTIN_EXTRACTF128PD256,
28573 IX86_BUILTIN_EXTRACTF128PS256,
28574 IX86_BUILTIN_EXTRACTF128SI256,
28575 IX86_BUILTIN_VZEROALL,
28576 IX86_BUILTIN_VZEROUPPER,
28577 IX86_BUILTIN_VPERMILVARPD,
28578 IX86_BUILTIN_VPERMILVARPS,
28579 IX86_BUILTIN_VPERMILVARPD256,
28580 IX86_BUILTIN_VPERMILVARPS256,
28581 IX86_BUILTIN_VPERMILPD,
28582 IX86_BUILTIN_VPERMILPS,
28583 IX86_BUILTIN_VPERMILPD256,
28584 IX86_BUILTIN_VPERMILPS256,
28585 IX86_BUILTIN_VPERMIL2PD,
28586 IX86_BUILTIN_VPERMIL2PS,
28587 IX86_BUILTIN_VPERMIL2PD256,
28588 IX86_BUILTIN_VPERMIL2PS256,
28589 IX86_BUILTIN_VPERM2F128PD256,
28590 IX86_BUILTIN_VPERM2F128PS256,
28591 IX86_BUILTIN_VPERM2F128SI256,
28592 IX86_BUILTIN_VBROADCASTSS,
28593 IX86_BUILTIN_VBROADCASTSD256,
28594 IX86_BUILTIN_VBROADCASTSS256,
28595 IX86_BUILTIN_VBROADCASTPD256,
28596 IX86_BUILTIN_VBROADCASTPS256,
28597 IX86_BUILTIN_VINSERTF128PD256,
28598 IX86_BUILTIN_VINSERTF128PS256,
28599 IX86_BUILTIN_VINSERTF128SI256,
28600 IX86_BUILTIN_LOADUPD256,
28601 IX86_BUILTIN_LOADUPS256,
28602 IX86_BUILTIN_STOREUPD256,
28603 IX86_BUILTIN_STOREUPS256,
28604 IX86_BUILTIN_LDDQU256,
28605 IX86_BUILTIN_MOVNTDQ256,
28606 IX86_BUILTIN_MOVNTPD256,
28607 IX86_BUILTIN_MOVNTPS256,
28608 IX86_BUILTIN_LOADDQU256,
28609 IX86_BUILTIN_STOREDQU256,
28610 IX86_BUILTIN_MASKLOADPD,
28611 IX86_BUILTIN_MASKLOADPS,
28612 IX86_BUILTIN_MASKSTOREPD,
28613 IX86_BUILTIN_MASKSTOREPS,
28614 IX86_BUILTIN_MASKLOADPD256,
28615 IX86_BUILTIN_MASKLOADPS256,
28616 IX86_BUILTIN_MASKSTOREPD256,
28617 IX86_BUILTIN_MASKSTOREPS256,
28618 IX86_BUILTIN_MOVSHDUP256,
28619 IX86_BUILTIN_MOVSLDUP256,
28620 IX86_BUILTIN_MOVDDUP256,
28622 IX86_BUILTIN_SQRTPD256,
28623 IX86_BUILTIN_SQRTPS256,
28624 IX86_BUILTIN_SQRTPS_NR256,
28625 IX86_BUILTIN_RSQRTPS256,
28626 IX86_BUILTIN_RSQRTPS_NR256,
28628 IX86_BUILTIN_RCPPS256,
28630 IX86_BUILTIN_ROUNDPD256,
28631 IX86_BUILTIN_ROUNDPS256,
28633 IX86_BUILTIN_FLOORPD256,
28634 IX86_BUILTIN_CEILPD256,
28635 IX86_BUILTIN_TRUNCPD256,
28636 IX86_BUILTIN_RINTPD256,
28637 IX86_BUILTIN_ROUNDPD_AZ256,
28639 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28640 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28641 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28643 IX86_BUILTIN_FLOORPS256,
28644 IX86_BUILTIN_CEILPS256,
28645 IX86_BUILTIN_TRUNCPS256,
28646 IX86_BUILTIN_RINTPS256,
28647 IX86_BUILTIN_ROUNDPS_AZ256,
28649 IX86_BUILTIN_FLOORPS_SFIX256,
28650 IX86_BUILTIN_CEILPS_SFIX256,
28651 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28653 IX86_BUILTIN_UNPCKHPD256,
28654 IX86_BUILTIN_UNPCKLPD256,
28655 IX86_BUILTIN_UNPCKHPS256,
28656 IX86_BUILTIN_UNPCKLPS256,
28658 IX86_BUILTIN_SI256_SI,
28659 IX86_BUILTIN_PS256_PS,
28660 IX86_BUILTIN_PD256_PD,
28661 IX86_BUILTIN_SI_SI256,
28662 IX86_BUILTIN_PS_PS256,
28663 IX86_BUILTIN_PD_PD256,
28665 IX86_BUILTIN_VTESTZPD,
28666 IX86_BUILTIN_VTESTCPD,
28667 IX86_BUILTIN_VTESTNZCPD,
28668 IX86_BUILTIN_VTESTZPS,
28669 IX86_BUILTIN_VTESTCPS,
28670 IX86_BUILTIN_VTESTNZCPS,
28671 IX86_BUILTIN_VTESTZPD256,
28672 IX86_BUILTIN_VTESTCPD256,
28673 IX86_BUILTIN_VTESTNZCPD256,
28674 IX86_BUILTIN_VTESTZPS256,
28675 IX86_BUILTIN_VTESTCPS256,
28676 IX86_BUILTIN_VTESTNZCPS256,
28677 IX86_BUILTIN_PTESTZ256,
28678 IX86_BUILTIN_PTESTC256,
28679 IX86_BUILTIN_PTESTNZC256,
28681 IX86_BUILTIN_MOVMSKPD256,
28682 IX86_BUILTIN_MOVMSKPS256,
28684 /* AVX2 */
28685 IX86_BUILTIN_MPSADBW256,
28686 IX86_BUILTIN_PABSB256,
28687 IX86_BUILTIN_PABSW256,
28688 IX86_BUILTIN_PABSD256,
28689 IX86_BUILTIN_PACKSSDW256,
28690 IX86_BUILTIN_PACKSSWB256,
28691 IX86_BUILTIN_PACKUSDW256,
28692 IX86_BUILTIN_PACKUSWB256,
28693 IX86_BUILTIN_PADDB256,
28694 IX86_BUILTIN_PADDW256,
28695 IX86_BUILTIN_PADDD256,
28696 IX86_BUILTIN_PADDQ256,
28697 IX86_BUILTIN_PADDSB256,
28698 IX86_BUILTIN_PADDSW256,
28699 IX86_BUILTIN_PADDUSB256,
28700 IX86_BUILTIN_PADDUSW256,
28701 IX86_BUILTIN_PALIGNR256,
28702 IX86_BUILTIN_AND256I,
28703 IX86_BUILTIN_ANDNOT256I,
28704 IX86_BUILTIN_PAVGB256,
28705 IX86_BUILTIN_PAVGW256,
28706 IX86_BUILTIN_PBLENDVB256,
28707 IX86_BUILTIN_PBLENDVW256,
28708 IX86_BUILTIN_PCMPEQB256,
28709 IX86_BUILTIN_PCMPEQW256,
28710 IX86_BUILTIN_PCMPEQD256,
28711 IX86_BUILTIN_PCMPEQQ256,
28712 IX86_BUILTIN_PCMPGTB256,
28713 IX86_BUILTIN_PCMPGTW256,
28714 IX86_BUILTIN_PCMPGTD256,
28715 IX86_BUILTIN_PCMPGTQ256,
28716 IX86_BUILTIN_PHADDW256,
28717 IX86_BUILTIN_PHADDD256,
28718 IX86_BUILTIN_PHADDSW256,
28719 IX86_BUILTIN_PHSUBW256,
28720 IX86_BUILTIN_PHSUBD256,
28721 IX86_BUILTIN_PHSUBSW256,
28722 IX86_BUILTIN_PMADDUBSW256,
28723 IX86_BUILTIN_PMADDWD256,
28724 IX86_BUILTIN_PMAXSB256,
28725 IX86_BUILTIN_PMAXSW256,
28726 IX86_BUILTIN_PMAXSD256,
28727 IX86_BUILTIN_PMAXUB256,
28728 IX86_BUILTIN_PMAXUW256,
28729 IX86_BUILTIN_PMAXUD256,
28730 IX86_BUILTIN_PMINSB256,
28731 IX86_BUILTIN_PMINSW256,
28732 IX86_BUILTIN_PMINSD256,
28733 IX86_BUILTIN_PMINUB256,
28734 IX86_BUILTIN_PMINUW256,
28735 IX86_BUILTIN_PMINUD256,
28736 IX86_BUILTIN_PMOVMSKB256,
28737 IX86_BUILTIN_PMOVSXBW256,
28738 IX86_BUILTIN_PMOVSXBD256,
28739 IX86_BUILTIN_PMOVSXBQ256,
28740 IX86_BUILTIN_PMOVSXWD256,
28741 IX86_BUILTIN_PMOVSXWQ256,
28742 IX86_BUILTIN_PMOVSXDQ256,
28743 IX86_BUILTIN_PMOVZXBW256,
28744 IX86_BUILTIN_PMOVZXBD256,
28745 IX86_BUILTIN_PMOVZXBQ256,
28746 IX86_BUILTIN_PMOVZXWD256,
28747 IX86_BUILTIN_PMOVZXWQ256,
28748 IX86_BUILTIN_PMOVZXDQ256,
28749 IX86_BUILTIN_PMULDQ256,
28750 IX86_BUILTIN_PMULHRSW256,
28751 IX86_BUILTIN_PMULHUW256,
28752 IX86_BUILTIN_PMULHW256,
28753 IX86_BUILTIN_PMULLW256,
28754 IX86_BUILTIN_PMULLD256,
28755 IX86_BUILTIN_PMULUDQ256,
28756 IX86_BUILTIN_POR256,
28757 IX86_BUILTIN_PSADBW256,
28758 IX86_BUILTIN_PSHUFB256,
28759 IX86_BUILTIN_PSHUFD256,
28760 IX86_BUILTIN_PSHUFHW256,
28761 IX86_BUILTIN_PSHUFLW256,
28762 IX86_BUILTIN_PSIGNB256,
28763 IX86_BUILTIN_PSIGNW256,
28764 IX86_BUILTIN_PSIGND256,
28765 IX86_BUILTIN_PSLLDQI256,
28766 IX86_BUILTIN_PSLLWI256,
28767 IX86_BUILTIN_PSLLW256,
28768 IX86_BUILTIN_PSLLDI256,
28769 IX86_BUILTIN_PSLLD256,
28770 IX86_BUILTIN_PSLLQI256,
28771 IX86_BUILTIN_PSLLQ256,
28772 IX86_BUILTIN_PSRAWI256,
28773 IX86_BUILTIN_PSRAW256,
28774 IX86_BUILTIN_PSRADI256,
28775 IX86_BUILTIN_PSRAD256,
28776 IX86_BUILTIN_PSRLDQI256,
28777 IX86_BUILTIN_PSRLWI256,
28778 IX86_BUILTIN_PSRLW256,
28779 IX86_BUILTIN_PSRLDI256,
28780 IX86_BUILTIN_PSRLD256,
28781 IX86_BUILTIN_PSRLQI256,
28782 IX86_BUILTIN_PSRLQ256,
28783 IX86_BUILTIN_PSUBB256,
28784 IX86_BUILTIN_PSUBW256,
28785 IX86_BUILTIN_PSUBD256,
28786 IX86_BUILTIN_PSUBQ256,
28787 IX86_BUILTIN_PSUBSB256,
28788 IX86_BUILTIN_PSUBSW256,
28789 IX86_BUILTIN_PSUBUSB256,
28790 IX86_BUILTIN_PSUBUSW256,
28791 IX86_BUILTIN_PUNPCKHBW256,
28792 IX86_BUILTIN_PUNPCKHWD256,
28793 IX86_BUILTIN_PUNPCKHDQ256,
28794 IX86_BUILTIN_PUNPCKHQDQ256,
28795 IX86_BUILTIN_PUNPCKLBW256,
28796 IX86_BUILTIN_PUNPCKLWD256,
28797 IX86_BUILTIN_PUNPCKLDQ256,
28798 IX86_BUILTIN_PUNPCKLQDQ256,
28799 IX86_BUILTIN_PXOR256,
28800 IX86_BUILTIN_MOVNTDQA256,
28801 IX86_BUILTIN_VBROADCASTSS_PS,
28802 IX86_BUILTIN_VBROADCASTSS_PS256,
28803 IX86_BUILTIN_VBROADCASTSD_PD256,
28804 IX86_BUILTIN_VBROADCASTSI256,
28805 IX86_BUILTIN_PBLENDD256,
28806 IX86_BUILTIN_PBLENDD128,
28807 IX86_BUILTIN_PBROADCASTB256,
28808 IX86_BUILTIN_PBROADCASTW256,
28809 IX86_BUILTIN_PBROADCASTD256,
28810 IX86_BUILTIN_PBROADCASTQ256,
28811 IX86_BUILTIN_PBROADCASTB128,
28812 IX86_BUILTIN_PBROADCASTW128,
28813 IX86_BUILTIN_PBROADCASTD128,
28814 IX86_BUILTIN_PBROADCASTQ128,
28815 IX86_BUILTIN_VPERMVARSI256,
28816 IX86_BUILTIN_VPERMDF256,
28817 IX86_BUILTIN_VPERMVARSF256,
28818 IX86_BUILTIN_VPERMDI256,
28819 IX86_BUILTIN_VPERMTI256,
28820 IX86_BUILTIN_VEXTRACT128I256,
28821 IX86_BUILTIN_VINSERT128I256,
28822 IX86_BUILTIN_MASKLOADD,
28823 IX86_BUILTIN_MASKLOADQ,
28824 IX86_BUILTIN_MASKLOADD256,
28825 IX86_BUILTIN_MASKLOADQ256,
28826 IX86_BUILTIN_MASKSTORED,
28827 IX86_BUILTIN_MASKSTOREQ,
28828 IX86_BUILTIN_MASKSTORED256,
28829 IX86_BUILTIN_MASKSTOREQ256,
28830 IX86_BUILTIN_PSLLVV4DI,
28831 IX86_BUILTIN_PSLLVV2DI,
28832 IX86_BUILTIN_PSLLVV8SI,
28833 IX86_BUILTIN_PSLLVV4SI,
28834 IX86_BUILTIN_PSRAVV8SI,
28835 IX86_BUILTIN_PSRAVV4SI,
28836 IX86_BUILTIN_PSRLVV4DI,
28837 IX86_BUILTIN_PSRLVV2DI,
28838 IX86_BUILTIN_PSRLVV8SI,
28839 IX86_BUILTIN_PSRLVV4SI,
28841 IX86_BUILTIN_GATHERSIV2DF,
28842 IX86_BUILTIN_GATHERSIV4DF,
28843 IX86_BUILTIN_GATHERDIV2DF,
28844 IX86_BUILTIN_GATHERDIV4DF,
28845 IX86_BUILTIN_GATHERSIV4SF,
28846 IX86_BUILTIN_GATHERSIV8SF,
28847 IX86_BUILTIN_GATHERDIV4SF,
28848 IX86_BUILTIN_GATHERDIV8SF,
28849 IX86_BUILTIN_GATHERSIV2DI,
28850 IX86_BUILTIN_GATHERSIV4DI,
28851 IX86_BUILTIN_GATHERDIV2DI,
28852 IX86_BUILTIN_GATHERDIV4DI,
28853 IX86_BUILTIN_GATHERSIV4SI,
28854 IX86_BUILTIN_GATHERSIV8SI,
28855 IX86_BUILTIN_GATHERDIV4SI,
28856 IX86_BUILTIN_GATHERDIV8SI,
28858 /* AVX512F */
28859 IX86_BUILTIN_SI512_SI256,
28860 IX86_BUILTIN_PD512_PD256,
28861 IX86_BUILTIN_PS512_PS256,
28862 IX86_BUILTIN_SI512_SI,
28863 IX86_BUILTIN_PD512_PD,
28864 IX86_BUILTIN_PS512_PS,
28865 IX86_BUILTIN_ADDPD512,
28866 IX86_BUILTIN_ADDPS512,
28867 IX86_BUILTIN_ADDSD_ROUND,
28868 IX86_BUILTIN_ADDSS_ROUND,
28869 IX86_BUILTIN_ALIGND512,
28870 IX86_BUILTIN_ALIGNQ512,
28871 IX86_BUILTIN_BLENDMD512,
28872 IX86_BUILTIN_BLENDMPD512,
28873 IX86_BUILTIN_BLENDMPS512,
28874 IX86_BUILTIN_BLENDMQ512,
28875 IX86_BUILTIN_BROADCASTF32X4_512,
28876 IX86_BUILTIN_BROADCASTF64X4_512,
28877 IX86_BUILTIN_BROADCASTI32X4_512,
28878 IX86_BUILTIN_BROADCASTI64X4_512,
28879 IX86_BUILTIN_BROADCASTSD512,
28880 IX86_BUILTIN_BROADCASTSS512,
28881 IX86_BUILTIN_CMPD512,
28882 IX86_BUILTIN_CMPPD512,
28883 IX86_BUILTIN_CMPPS512,
28884 IX86_BUILTIN_CMPQ512,
28885 IX86_BUILTIN_CMPSD_MASK,
28886 IX86_BUILTIN_CMPSS_MASK,
28887 IX86_BUILTIN_COMIDF,
28888 IX86_BUILTIN_COMISF,
28889 IX86_BUILTIN_COMPRESSPD512,
28890 IX86_BUILTIN_COMPRESSPDSTORE512,
28891 IX86_BUILTIN_COMPRESSPS512,
28892 IX86_BUILTIN_COMPRESSPSSTORE512,
28893 IX86_BUILTIN_CVTDQ2PD512,
28894 IX86_BUILTIN_CVTDQ2PS512,
28895 IX86_BUILTIN_CVTPD2DQ512,
28896 IX86_BUILTIN_CVTPD2PS512,
28897 IX86_BUILTIN_CVTPD2UDQ512,
28898 IX86_BUILTIN_CVTPH2PS512,
28899 IX86_BUILTIN_CVTPS2DQ512,
28900 IX86_BUILTIN_CVTPS2PD512,
28901 IX86_BUILTIN_CVTPS2PH512,
28902 IX86_BUILTIN_CVTPS2UDQ512,
28903 IX86_BUILTIN_CVTSD2SS_ROUND,
28904 IX86_BUILTIN_CVTSI2SD64,
28905 IX86_BUILTIN_CVTSI2SS32,
28906 IX86_BUILTIN_CVTSI2SS64,
28907 IX86_BUILTIN_CVTSS2SD_ROUND,
28908 IX86_BUILTIN_CVTTPD2DQ512,
28909 IX86_BUILTIN_CVTTPD2UDQ512,
28910 IX86_BUILTIN_CVTTPS2DQ512,
28911 IX86_BUILTIN_CVTTPS2UDQ512,
28912 IX86_BUILTIN_CVTUDQ2PD512,
28913 IX86_BUILTIN_CVTUDQ2PS512,
28914 IX86_BUILTIN_CVTUSI2SD32,
28915 IX86_BUILTIN_CVTUSI2SD64,
28916 IX86_BUILTIN_CVTUSI2SS32,
28917 IX86_BUILTIN_CVTUSI2SS64,
28918 IX86_BUILTIN_DIVPD512,
28919 IX86_BUILTIN_DIVPS512,
28920 IX86_BUILTIN_DIVSD_ROUND,
28921 IX86_BUILTIN_DIVSS_ROUND,
28922 IX86_BUILTIN_EXPANDPD512,
28923 IX86_BUILTIN_EXPANDPD512Z,
28924 IX86_BUILTIN_EXPANDPDLOAD512,
28925 IX86_BUILTIN_EXPANDPDLOAD512Z,
28926 IX86_BUILTIN_EXPANDPS512,
28927 IX86_BUILTIN_EXPANDPS512Z,
28928 IX86_BUILTIN_EXPANDPSLOAD512,
28929 IX86_BUILTIN_EXPANDPSLOAD512Z,
28930 IX86_BUILTIN_EXTRACTF32X4,
28931 IX86_BUILTIN_EXTRACTF64X4,
28932 IX86_BUILTIN_EXTRACTI32X4,
28933 IX86_BUILTIN_EXTRACTI64X4,
28934 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28935 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28936 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28937 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28938 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28939 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28940 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28941 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28942 IX86_BUILTIN_GETEXPPD512,
28943 IX86_BUILTIN_GETEXPPS512,
28944 IX86_BUILTIN_GETEXPSD128,
28945 IX86_BUILTIN_GETEXPSS128,
28946 IX86_BUILTIN_GETMANTPD512,
28947 IX86_BUILTIN_GETMANTPS512,
28948 IX86_BUILTIN_GETMANTSD128,
28949 IX86_BUILTIN_GETMANTSS128,
28950 IX86_BUILTIN_INSERTF32X4,
28951 IX86_BUILTIN_INSERTF64X4,
28952 IX86_BUILTIN_INSERTI32X4,
28953 IX86_BUILTIN_INSERTI64X4,
28954 IX86_BUILTIN_LOADAPD512,
28955 IX86_BUILTIN_LOADAPS512,
28956 IX86_BUILTIN_LOADDQUDI512,
28957 IX86_BUILTIN_LOADDQUSI512,
28958 IX86_BUILTIN_LOADUPD512,
28959 IX86_BUILTIN_LOADUPS512,
28960 IX86_BUILTIN_MAXPD512,
28961 IX86_BUILTIN_MAXPS512,
28962 IX86_BUILTIN_MAXSD_ROUND,
28963 IX86_BUILTIN_MAXSS_ROUND,
28964 IX86_BUILTIN_MINPD512,
28965 IX86_BUILTIN_MINPS512,
28966 IX86_BUILTIN_MINSD_ROUND,
28967 IX86_BUILTIN_MINSS_ROUND,
28968 IX86_BUILTIN_MOVAPD512,
28969 IX86_BUILTIN_MOVAPS512,
28970 IX86_BUILTIN_MOVDDUP512,
28971 IX86_BUILTIN_MOVDQA32LOAD512,
28972 IX86_BUILTIN_MOVDQA32STORE512,
28973 IX86_BUILTIN_MOVDQA32_512,
28974 IX86_BUILTIN_MOVDQA64LOAD512,
28975 IX86_BUILTIN_MOVDQA64STORE512,
28976 IX86_BUILTIN_MOVDQA64_512,
28977 IX86_BUILTIN_MOVNTDQ512,
28978 IX86_BUILTIN_MOVNTDQA512,
28979 IX86_BUILTIN_MOVNTPD512,
28980 IX86_BUILTIN_MOVNTPS512,
28981 IX86_BUILTIN_MOVSHDUP512,
28982 IX86_BUILTIN_MOVSLDUP512,
28983 IX86_BUILTIN_MULPD512,
28984 IX86_BUILTIN_MULPS512,
28985 IX86_BUILTIN_MULSD_ROUND,
28986 IX86_BUILTIN_MULSS_ROUND,
28987 IX86_BUILTIN_PABSD512,
28988 IX86_BUILTIN_PABSQ512,
28989 IX86_BUILTIN_PADDD512,
28990 IX86_BUILTIN_PADDQ512,
28991 IX86_BUILTIN_PANDD512,
28992 IX86_BUILTIN_PANDND512,
28993 IX86_BUILTIN_PANDNQ512,
28994 IX86_BUILTIN_PANDQ512,
28995 IX86_BUILTIN_PBROADCASTD512,
28996 IX86_BUILTIN_PBROADCASTD512_GPR,
28997 IX86_BUILTIN_PBROADCASTMB512,
28998 IX86_BUILTIN_PBROADCASTMW512,
28999 IX86_BUILTIN_PBROADCASTQ512,
29000 IX86_BUILTIN_PBROADCASTQ512_GPR,
29001 IX86_BUILTIN_PCMPEQD512_MASK,
29002 IX86_BUILTIN_PCMPEQQ512_MASK,
29003 IX86_BUILTIN_PCMPGTD512_MASK,
29004 IX86_BUILTIN_PCMPGTQ512_MASK,
29005 IX86_BUILTIN_PCOMPRESSD512,
29006 IX86_BUILTIN_PCOMPRESSDSTORE512,
29007 IX86_BUILTIN_PCOMPRESSQ512,
29008 IX86_BUILTIN_PCOMPRESSQSTORE512,
29009 IX86_BUILTIN_PEXPANDD512,
29010 IX86_BUILTIN_PEXPANDD512Z,
29011 IX86_BUILTIN_PEXPANDDLOAD512,
29012 IX86_BUILTIN_PEXPANDDLOAD512Z,
29013 IX86_BUILTIN_PEXPANDQ512,
29014 IX86_BUILTIN_PEXPANDQ512Z,
29015 IX86_BUILTIN_PEXPANDQLOAD512,
29016 IX86_BUILTIN_PEXPANDQLOAD512Z,
29017 IX86_BUILTIN_PMAXSD512,
29018 IX86_BUILTIN_PMAXSQ512,
29019 IX86_BUILTIN_PMAXUD512,
29020 IX86_BUILTIN_PMAXUQ512,
29021 IX86_BUILTIN_PMINSD512,
29022 IX86_BUILTIN_PMINSQ512,
29023 IX86_BUILTIN_PMINUD512,
29024 IX86_BUILTIN_PMINUQ512,
29025 IX86_BUILTIN_PMOVDB512,
29026 IX86_BUILTIN_PMOVDB512_MEM,
29027 IX86_BUILTIN_PMOVDW512,
29028 IX86_BUILTIN_PMOVDW512_MEM,
29029 IX86_BUILTIN_PMOVQB512,
29030 IX86_BUILTIN_PMOVQB512_MEM,
29031 IX86_BUILTIN_PMOVQD512,
29032 IX86_BUILTIN_PMOVQD512_MEM,
29033 IX86_BUILTIN_PMOVQW512,
29034 IX86_BUILTIN_PMOVQW512_MEM,
29035 IX86_BUILTIN_PMOVSDB512,
29036 IX86_BUILTIN_PMOVSDB512_MEM,
29037 IX86_BUILTIN_PMOVSDW512,
29038 IX86_BUILTIN_PMOVSDW512_MEM,
29039 IX86_BUILTIN_PMOVSQB512,
29040 IX86_BUILTIN_PMOVSQB512_MEM,
29041 IX86_BUILTIN_PMOVSQD512,
29042 IX86_BUILTIN_PMOVSQD512_MEM,
29043 IX86_BUILTIN_PMOVSQW512,
29044 IX86_BUILTIN_PMOVSQW512_MEM,
29045 IX86_BUILTIN_PMOVSXBD512,
29046 IX86_BUILTIN_PMOVSXBQ512,
29047 IX86_BUILTIN_PMOVSXDQ512,
29048 IX86_BUILTIN_PMOVSXWD512,
29049 IX86_BUILTIN_PMOVSXWQ512,
29050 IX86_BUILTIN_PMOVUSDB512,
29051 IX86_BUILTIN_PMOVUSDB512_MEM,
29052 IX86_BUILTIN_PMOVUSDW512,
29053 IX86_BUILTIN_PMOVUSDW512_MEM,
29054 IX86_BUILTIN_PMOVUSQB512,
29055 IX86_BUILTIN_PMOVUSQB512_MEM,
29056 IX86_BUILTIN_PMOVUSQD512,
29057 IX86_BUILTIN_PMOVUSQD512_MEM,
29058 IX86_BUILTIN_PMOVUSQW512,
29059 IX86_BUILTIN_PMOVUSQW512_MEM,
29060 IX86_BUILTIN_PMOVZXBD512,
29061 IX86_BUILTIN_PMOVZXBQ512,
29062 IX86_BUILTIN_PMOVZXDQ512,
29063 IX86_BUILTIN_PMOVZXWD512,
29064 IX86_BUILTIN_PMOVZXWQ512,
29065 IX86_BUILTIN_PMULDQ512,
29066 IX86_BUILTIN_PMULLD512,
29067 IX86_BUILTIN_PMULUDQ512,
29068 IX86_BUILTIN_PORD512,
29069 IX86_BUILTIN_PORQ512,
29070 IX86_BUILTIN_PROLD512,
29071 IX86_BUILTIN_PROLQ512,
29072 IX86_BUILTIN_PROLVD512,
29073 IX86_BUILTIN_PROLVQ512,
29074 IX86_BUILTIN_PRORD512,
29075 IX86_BUILTIN_PRORQ512,
29076 IX86_BUILTIN_PRORVD512,
29077 IX86_BUILTIN_PRORVQ512,
29078 IX86_BUILTIN_PSHUFD512,
29079 IX86_BUILTIN_PSLLD512,
29080 IX86_BUILTIN_PSLLDI512,
29081 IX86_BUILTIN_PSLLQ512,
29082 IX86_BUILTIN_PSLLQI512,
29083 IX86_BUILTIN_PSLLVV16SI,
29084 IX86_BUILTIN_PSLLVV8DI,
29085 IX86_BUILTIN_PSRAD512,
29086 IX86_BUILTIN_PSRADI512,
29087 IX86_BUILTIN_PSRAQ512,
29088 IX86_BUILTIN_PSRAQI512,
29089 IX86_BUILTIN_PSRAVV16SI,
29090 IX86_BUILTIN_PSRAVV8DI,
29091 IX86_BUILTIN_PSRLD512,
29092 IX86_BUILTIN_PSRLDI512,
29093 IX86_BUILTIN_PSRLQ512,
29094 IX86_BUILTIN_PSRLQI512,
29095 IX86_BUILTIN_PSRLVV16SI,
29096 IX86_BUILTIN_PSRLVV8DI,
29097 IX86_BUILTIN_PSUBD512,
29098 IX86_BUILTIN_PSUBQ512,
29099 IX86_BUILTIN_PTESTMD512,
29100 IX86_BUILTIN_PTESTMQ512,
29101 IX86_BUILTIN_PTESTNMD512,
29102 IX86_BUILTIN_PTESTNMQ512,
29103 IX86_BUILTIN_PUNPCKHDQ512,
29104 IX86_BUILTIN_PUNPCKHQDQ512,
29105 IX86_BUILTIN_PUNPCKLDQ512,
29106 IX86_BUILTIN_PUNPCKLQDQ512,
29107 IX86_BUILTIN_PXORD512,
29108 IX86_BUILTIN_PXORQ512,
29109 IX86_BUILTIN_RCP14PD512,
29110 IX86_BUILTIN_RCP14PS512,
29111 IX86_BUILTIN_RCP14SD,
29112 IX86_BUILTIN_RCP14SS,
29113 IX86_BUILTIN_RNDSCALEPD,
29114 IX86_BUILTIN_RNDSCALEPS,
29115 IX86_BUILTIN_RNDSCALESD,
29116 IX86_BUILTIN_RNDSCALESS,
29117 IX86_BUILTIN_RSQRT14PD512,
29118 IX86_BUILTIN_RSQRT14PS512,
29119 IX86_BUILTIN_RSQRT14SD,
29120 IX86_BUILTIN_RSQRT14SS,
29121 IX86_BUILTIN_SCALEFPD512,
29122 IX86_BUILTIN_SCALEFPS512,
29123 IX86_BUILTIN_SCALEFSD,
29124 IX86_BUILTIN_SCALEFSS,
29125 IX86_BUILTIN_SHUFPD512,
29126 IX86_BUILTIN_SHUFPS512,
29127 IX86_BUILTIN_SHUF_F32x4,
29128 IX86_BUILTIN_SHUF_F64x2,
29129 IX86_BUILTIN_SHUF_I32x4,
29130 IX86_BUILTIN_SHUF_I64x2,
29131 IX86_BUILTIN_SQRTPD512,
29132 IX86_BUILTIN_SQRTPD512_MASK,
29133 IX86_BUILTIN_SQRTPS512_MASK,
29134 IX86_BUILTIN_SQRTPS_NR512,
29135 IX86_BUILTIN_SQRTSD_ROUND,
29136 IX86_BUILTIN_SQRTSS_ROUND,
29137 IX86_BUILTIN_STOREAPD512,
29138 IX86_BUILTIN_STOREAPS512,
29139 IX86_BUILTIN_STOREDQUDI512,
29140 IX86_BUILTIN_STOREDQUSI512,
29141 IX86_BUILTIN_STOREUPD512,
29142 IX86_BUILTIN_STOREUPS512,
29143 IX86_BUILTIN_SUBPD512,
29144 IX86_BUILTIN_SUBPS512,
29145 IX86_BUILTIN_SUBSD_ROUND,
29146 IX86_BUILTIN_SUBSS_ROUND,
29147 IX86_BUILTIN_UCMPD512,
29148 IX86_BUILTIN_UCMPQ512,
29149 IX86_BUILTIN_UNPCKHPD512,
29150 IX86_BUILTIN_UNPCKHPS512,
29151 IX86_BUILTIN_UNPCKLPD512,
29152 IX86_BUILTIN_UNPCKLPS512,
29153 IX86_BUILTIN_VCVTSD2SI32,
29154 IX86_BUILTIN_VCVTSD2SI64,
29155 IX86_BUILTIN_VCVTSD2USI32,
29156 IX86_BUILTIN_VCVTSD2USI64,
29157 IX86_BUILTIN_VCVTSS2SI32,
29158 IX86_BUILTIN_VCVTSS2SI64,
29159 IX86_BUILTIN_VCVTSS2USI32,
29160 IX86_BUILTIN_VCVTSS2USI64,
29161 IX86_BUILTIN_VCVTTSD2SI32,
29162 IX86_BUILTIN_VCVTTSD2SI64,
29163 IX86_BUILTIN_VCVTTSD2USI32,
29164 IX86_BUILTIN_VCVTTSD2USI64,
29165 IX86_BUILTIN_VCVTTSS2SI32,
29166 IX86_BUILTIN_VCVTTSS2SI64,
29167 IX86_BUILTIN_VCVTTSS2USI32,
29168 IX86_BUILTIN_VCVTTSS2USI64,
29169 IX86_BUILTIN_VFMADDPD512_MASK,
29170 IX86_BUILTIN_VFMADDPD512_MASK3,
29171 IX86_BUILTIN_VFMADDPD512_MASKZ,
29172 IX86_BUILTIN_VFMADDPS512_MASK,
29173 IX86_BUILTIN_VFMADDPS512_MASK3,
29174 IX86_BUILTIN_VFMADDPS512_MASKZ,
29175 IX86_BUILTIN_VFMADDSD3_ROUND,
29176 IX86_BUILTIN_VFMADDSS3_ROUND,
29177 IX86_BUILTIN_VFMADDSUBPD512_MASK,
29178 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
29179 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
29180 IX86_BUILTIN_VFMADDSUBPS512_MASK,
29181 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
29182 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
29183 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
29184 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
29185 IX86_BUILTIN_VFMSUBPD512_MASK3,
29186 IX86_BUILTIN_VFMSUBPS512_MASK3,
29187 IX86_BUILTIN_VFMSUBSD3_MASK3,
29188 IX86_BUILTIN_VFMSUBSS3_MASK3,
29189 IX86_BUILTIN_VFNMADDPD512_MASK,
29190 IX86_BUILTIN_VFNMADDPS512_MASK,
29191 IX86_BUILTIN_VFNMSUBPD512_MASK,
29192 IX86_BUILTIN_VFNMSUBPD512_MASK3,
29193 IX86_BUILTIN_VFNMSUBPS512_MASK,
29194 IX86_BUILTIN_VFNMSUBPS512_MASK3,
29195 IX86_BUILTIN_VPCLZCNTD512,
29196 IX86_BUILTIN_VPCLZCNTQ512,
29197 IX86_BUILTIN_VPCONFLICTD512,
29198 IX86_BUILTIN_VPCONFLICTQ512,
29199 IX86_BUILTIN_VPERMDF512,
29200 IX86_BUILTIN_VPERMDI512,
29201 IX86_BUILTIN_VPERMI2VARD512,
29202 IX86_BUILTIN_VPERMI2VARPD512,
29203 IX86_BUILTIN_VPERMI2VARPS512,
29204 IX86_BUILTIN_VPERMI2VARQ512,
29205 IX86_BUILTIN_VPERMILPD512,
29206 IX86_BUILTIN_VPERMILPS512,
29207 IX86_BUILTIN_VPERMILVARPD512,
29208 IX86_BUILTIN_VPERMILVARPS512,
29209 IX86_BUILTIN_VPERMT2VARD512,
29210 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
29211 IX86_BUILTIN_VPERMT2VARPD512,
29212 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
29213 IX86_BUILTIN_VPERMT2VARPS512,
29214 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
29215 IX86_BUILTIN_VPERMT2VARQ512,
29216 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
29217 IX86_BUILTIN_VPERMVARDF512,
29218 IX86_BUILTIN_VPERMVARDI512,
29219 IX86_BUILTIN_VPERMVARSF512,
29220 IX86_BUILTIN_VPERMVARSI512,
29221 IX86_BUILTIN_VTERNLOGD512_MASK,
29222 IX86_BUILTIN_VTERNLOGD512_MASKZ,
29223 IX86_BUILTIN_VTERNLOGQ512_MASK,
29224 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
29226 /* Mask arithmetic operations */
29227 IX86_BUILTIN_KAND16,
29228 IX86_BUILTIN_KANDN16,
29229 IX86_BUILTIN_KNOT16,
29230 IX86_BUILTIN_KOR16,
29231 IX86_BUILTIN_KORTESTC16,
29232 IX86_BUILTIN_KORTESTZ16,
29233 IX86_BUILTIN_KUNPCKBW,
29234 IX86_BUILTIN_KXNOR16,
29235 IX86_BUILTIN_KXOR16,
29236 IX86_BUILTIN_KMOV16,
29238 /* AVX512VL. */
29239 IX86_BUILTIN_PMOVUSQD256_MEM,
29240 IX86_BUILTIN_PMOVUSQD128_MEM,
29241 IX86_BUILTIN_PMOVSQD256_MEM,
29242 IX86_BUILTIN_PMOVSQD128_MEM,
29243 IX86_BUILTIN_PMOVQD256_MEM,
29244 IX86_BUILTIN_PMOVQD128_MEM,
29245 IX86_BUILTIN_PMOVUSQW256_MEM,
29246 IX86_BUILTIN_PMOVUSQW128_MEM,
29247 IX86_BUILTIN_PMOVSQW256_MEM,
29248 IX86_BUILTIN_PMOVSQW128_MEM,
29249 IX86_BUILTIN_PMOVQW256_MEM,
29250 IX86_BUILTIN_PMOVQW128_MEM,
29251 IX86_BUILTIN_PMOVUSQB256_MEM,
29252 IX86_BUILTIN_PMOVUSQB128_MEM,
29253 IX86_BUILTIN_PMOVSQB256_MEM,
29254 IX86_BUILTIN_PMOVSQB128_MEM,
29255 IX86_BUILTIN_PMOVQB256_MEM,
29256 IX86_BUILTIN_PMOVQB128_MEM,
29257 IX86_BUILTIN_PMOVUSDW256_MEM,
29258 IX86_BUILTIN_PMOVUSDW128_MEM,
29259 IX86_BUILTIN_PMOVSDW256_MEM,
29260 IX86_BUILTIN_PMOVSDW128_MEM,
29261 IX86_BUILTIN_PMOVDW256_MEM,
29262 IX86_BUILTIN_PMOVDW128_MEM,
29263 IX86_BUILTIN_PMOVUSDB256_MEM,
29264 IX86_BUILTIN_PMOVUSDB128_MEM,
29265 IX86_BUILTIN_PMOVSDB256_MEM,
29266 IX86_BUILTIN_PMOVSDB128_MEM,
29267 IX86_BUILTIN_PMOVDB256_MEM,
29268 IX86_BUILTIN_PMOVDB128_MEM,
29269 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
29270 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
29271 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
29272 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
29273 IX86_BUILTIN_MOVDQA64STORE256_MASK,
29274 IX86_BUILTIN_MOVDQA64STORE128_MASK,
29275 IX86_BUILTIN_MOVDQA32STORE256_MASK,
29276 IX86_BUILTIN_MOVDQA32STORE128_MASK,
29277 IX86_BUILTIN_LOADAPD256_MASK,
29278 IX86_BUILTIN_LOADAPD128_MASK,
29279 IX86_BUILTIN_LOADAPS256_MASK,
29280 IX86_BUILTIN_LOADAPS128_MASK,
29281 IX86_BUILTIN_STOREAPD256_MASK,
29282 IX86_BUILTIN_STOREAPD128_MASK,
29283 IX86_BUILTIN_STOREAPS256_MASK,
29284 IX86_BUILTIN_STOREAPS128_MASK,
29285 IX86_BUILTIN_LOADUPD256_MASK,
29286 IX86_BUILTIN_LOADUPD128_MASK,
29287 IX86_BUILTIN_LOADUPS256_MASK,
29288 IX86_BUILTIN_LOADUPS128_MASK,
29289 IX86_BUILTIN_STOREUPD256_MASK,
29290 IX86_BUILTIN_STOREUPD128_MASK,
29291 IX86_BUILTIN_STOREUPS256_MASK,
29292 IX86_BUILTIN_STOREUPS128_MASK,
29293 IX86_BUILTIN_LOADDQUDI256_MASK,
29294 IX86_BUILTIN_LOADDQUDI128_MASK,
29295 IX86_BUILTIN_LOADDQUSI256_MASK,
29296 IX86_BUILTIN_LOADDQUSI128_MASK,
29297 IX86_BUILTIN_LOADDQUHI256_MASK,
29298 IX86_BUILTIN_LOADDQUHI128_MASK,
29299 IX86_BUILTIN_LOADDQUQI256_MASK,
29300 IX86_BUILTIN_LOADDQUQI128_MASK,
29301 IX86_BUILTIN_STOREDQUDI256_MASK,
29302 IX86_BUILTIN_STOREDQUDI128_MASK,
29303 IX86_BUILTIN_STOREDQUSI256_MASK,
29304 IX86_BUILTIN_STOREDQUSI128_MASK,
29305 IX86_BUILTIN_STOREDQUHI256_MASK,
29306 IX86_BUILTIN_STOREDQUHI128_MASK,
29307 IX86_BUILTIN_STOREDQUQI256_MASK,
29308 IX86_BUILTIN_STOREDQUQI128_MASK,
29309 IX86_BUILTIN_COMPRESSPDSTORE256,
29310 IX86_BUILTIN_COMPRESSPDSTORE128,
29311 IX86_BUILTIN_COMPRESSPSSTORE256,
29312 IX86_BUILTIN_COMPRESSPSSTORE128,
29313 IX86_BUILTIN_PCOMPRESSQSTORE256,
29314 IX86_BUILTIN_PCOMPRESSQSTORE128,
29315 IX86_BUILTIN_PCOMPRESSDSTORE256,
29316 IX86_BUILTIN_PCOMPRESSDSTORE128,
29317 IX86_BUILTIN_EXPANDPDLOAD256,
29318 IX86_BUILTIN_EXPANDPDLOAD128,
29319 IX86_BUILTIN_EXPANDPSLOAD256,
29320 IX86_BUILTIN_EXPANDPSLOAD128,
29321 IX86_BUILTIN_PEXPANDQLOAD256,
29322 IX86_BUILTIN_PEXPANDQLOAD128,
29323 IX86_BUILTIN_PEXPANDDLOAD256,
29324 IX86_BUILTIN_PEXPANDDLOAD128,
29325 IX86_BUILTIN_EXPANDPDLOAD256Z,
29326 IX86_BUILTIN_EXPANDPDLOAD128Z,
29327 IX86_BUILTIN_EXPANDPSLOAD256Z,
29328 IX86_BUILTIN_EXPANDPSLOAD128Z,
29329 IX86_BUILTIN_PEXPANDQLOAD256Z,
29330 IX86_BUILTIN_PEXPANDQLOAD128Z,
29331 IX86_BUILTIN_PEXPANDDLOAD256Z,
29332 IX86_BUILTIN_PEXPANDDLOAD128Z,
29333 IX86_BUILTIN_PALIGNR256_MASK,
29334 IX86_BUILTIN_PALIGNR128_MASK,
29335 IX86_BUILTIN_MOVDQA64_256_MASK,
29336 IX86_BUILTIN_MOVDQA64_128_MASK,
29337 IX86_BUILTIN_MOVDQA32_256_MASK,
29338 IX86_BUILTIN_MOVDQA32_128_MASK,
29339 IX86_BUILTIN_MOVAPD256_MASK,
29340 IX86_BUILTIN_MOVAPD128_MASK,
29341 IX86_BUILTIN_MOVAPS256_MASK,
29342 IX86_BUILTIN_MOVAPS128_MASK,
29343 IX86_BUILTIN_MOVDQUHI256_MASK,
29344 IX86_BUILTIN_MOVDQUHI128_MASK,
29345 IX86_BUILTIN_MOVDQUQI256_MASK,
29346 IX86_BUILTIN_MOVDQUQI128_MASK,
29347 IX86_BUILTIN_MINPS128_MASK,
29348 IX86_BUILTIN_MAXPS128_MASK,
29349 IX86_BUILTIN_MINPD128_MASK,
29350 IX86_BUILTIN_MAXPD128_MASK,
29351 IX86_BUILTIN_MAXPD256_MASK,
29352 IX86_BUILTIN_MAXPS256_MASK,
29353 IX86_BUILTIN_MINPD256_MASK,
29354 IX86_BUILTIN_MINPS256_MASK,
29355 IX86_BUILTIN_MULPS128_MASK,
29356 IX86_BUILTIN_DIVPS128_MASK,
29357 IX86_BUILTIN_MULPD128_MASK,
29358 IX86_BUILTIN_DIVPD128_MASK,
29359 IX86_BUILTIN_DIVPD256_MASK,
29360 IX86_BUILTIN_DIVPS256_MASK,
29361 IX86_BUILTIN_MULPD256_MASK,
29362 IX86_BUILTIN_MULPS256_MASK,
29363 IX86_BUILTIN_ADDPD128_MASK,
29364 IX86_BUILTIN_ADDPD256_MASK,
29365 IX86_BUILTIN_ADDPS128_MASK,
29366 IX86_BUILTIN_ADDPS256_MASK,
29367 IX86_BUILTIN_SUBPD128_MASK,
29368 IX86_BUILTIN_SUBPD256_MASK,
29369 IX86_BUILTIN_SUBPS128_MASK,
29370 IX86_BUILTIN_SUBPS256_MASK,
29371 IX86_BUILTIN_XORPD256_MASK,
29372 IX86_BUILTIN_XORPD128_MASK,
29373 IX86_BUILTIN_XORPS256_MASK,
29374 IX86_BUILTIN_XORPS128_MASK,
29375 IX86_BUILTIN_ORPD256_MASK,
29376 IX86_BUILTIN_ORPD128_MASK,
29377 IX86_BUILTIN_ORPS256_MASK,
29378 IX86_BUILTIN_ORPS128_MASK,
29379 IX86_BUILTIN_BROADCASTF32x2_256,
29380 IX86_BUILTIN_BROADCASTI32x2_256,
29381 IX86_BUILTIN_BROADCASTI32x2_128,
29382 IX86_BUILTIN_BROADCASTF64X2_256,
29383 IX86_BUILTIN_BROADCASTI64X2_256,
29384 IX86_BUILTIN_BROADCASTF32X4_256,
29385 IX86_BUILTIN_BROADCASTI32X4_256,
29386 IX86_BUILTIN_EXTRACTF32X4_256,
29387 IX86_BUILTIN_EXTRACTI32X4_256,
29388 IX86_BUILTIN_DBPSADBW256,
29389 IX86_BUILTIN_DBPSADBW128,
29390 IX86_BUILTIN_CVTTPD2QQ256,
29391 IX86_BUILTIN_CVTTPD2QQ128,
29392 IX86_BUILTIN_CVTTPD2UQQ256,
29393 IX86_BUILTIN_CVTTPD2UQQ128,
29394 IX86_BUILTIN_CVTPD2QQ256,
29395 IX86_BUILTIN_CVTPD2QQ128,
29396 IX86_BUILTIN_CVTPD2UQQ256,
29397 IX86_BUILTIN_CVTPD2UQQ128,
29398 IX86_BUILTIN_CVTPD2UDQ256_MASK,
29399 IX86_BUILTIN_CVTPD2UDQ128_MASK,
29400 IX86_BUILTIN_CVTTPS2QQ256,
29401 IX86_BUILTIN_CVTTPS2QQ128,
29402 IX86_BUILTIN_CVTTPS2UQQ256,
29403 IX86_BUILTIN_CVTTPS2UQQ128,
29404 IX86_BUILTIN_CVTTPS2DQ256_MASK,
29405 IX86_BUILTIN_CVTTPS2DQ128_MASK,
29406 IX86_BUILTIN_CVTTPS2UDQ256,
29407 IX86_BUILTIN_CVTTPS2UDQ128,
29408 IX86_BUILTIN_CVTTPD2DQ256_MASK,
29409 IX86_BUILTIN_CVTTPD2DQ128_MASK,
29410 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
29411 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
29412 IX86_BUILTIN_CVTPD2DQ256_MASK,
29413 IX86_BUILTIN_CVTPD2DQ128_MASK,
29414 IX86_BUILTIN_CVTDQ2PD256_MASK,
29415 IX86_BUILTIN_CVTDQ2PD128_MASK,
29416 IX86_BUILTIN_CVTUDQ2PD256_MASK,
29417 IX86_BUILTIN_CVTUDQ2PD128_MASK,
29418 IX86_BUILTIN_CVTDQ2PS256_MASK,
29419 IX86_BUILTIN_CVTDQ2PS128_MASK,
29420 IX86_BUILTIN_CVTUDQ2PS256_MASK,
29421 IX86_BUILTIN_CVTUDQ2PS128_MASK,
29422 IX86_BUILTIN_CVTPS2PD256_MASK,
29423 IX86_BUILTIN_CVTPS2PD128_MASK,
29424 IX86_BUILTIN_PBROADCASTB256_MASK,
29425 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
29426 IX86_BUILTIN_PBROADCASTB128_MASK,
29427 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
29428 IX86_BUILTIN_PBROADCASTW256_MASK,
29429 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
29430 IX86_BUILTIN_PBROADCASTW128_MASK,
29431 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
29432 IX86_BUILTIN_PBROADCASTD256_MASK,
29433 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
29434 IX86_BUILTIN_PBROADCASTD128_MASK,
29435 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
29436 IX86_BUILTIN_PBROADCASTQ256_MASK,
29437 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
29438 IX86_BUILTIN_PBROADCASTQ128_MASK,
29439 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
29440 IX86_BUILTIN_BROADCASTSS256,
29441 IX86_BUILTIN_BROADCASTSS128,
29442 IX86_BUILTIN_BROADCASTSD256,
29443 IX86_BUILTIN_EXTRACTF64X2_256,
29444 IX86_BUILTIN_EXTRACTI64X2_256,
29445 IX86_BUILTIN_INSERTF32X4_256,
29446 IX86_BUILTIN_INSERTI32X4_256,
29447 IX86_BUILTIN_PMOVSXBW256_MASK,
29448 IX86_BUILTIN_PMOVSXBW128_MASK,
29449 IX86_BUILTIN_PMOVSXBD256_MASK,
29450 IX86_BUILTIN_PMOVSXBD128_MASK,
29451 IX86_BUILTIN_PMOVSXBQ256_MASK,
29452 IX86_BUILTIN_PMOVSXBQ128_MASK,
29453 IX86_BUILTIN_PMOVSXWD256_MASK,
29454 IX86_BUILTIN_PMOVSXWD128_MASK,
29455 IX86_BUILTIN_PMOVSXWQ256_MASK,
29456 IX86_BUILTIN_PMOVSXWQ128_MASK,
29457 IX86_BUILTIN_PMOVSXDQ256_MASK,
29458 IX86_BUILTIN_PMOVSXDQ128_MASK,
29459 IX86_BUILTIN_PMOVZXBW256_MASK,
29460 IX86_BUILTIN_PMOVZXBW128_MASK,
29461 IX86_BUILTIN_PMOVZXBD256_MASK,
29462 IX86_BUILTIN_PMOVZXBD128_MASK,
29463 IX86_BUILTIN_PMOVZXBQ256_MASK,
29464 IX86_BUILTIN_PMOVZXBQ128_MASK,
29465 IX86_BUILTIN_PMOVZXWD256_MASK,
29466 IX86_BUILTIN_PMOVZXWD128_MASK,
29467 IX86_BUILTIN_PMOVZXWQ256_MASK,
29468 IX86_BUILTIN_PMOVZXWQ128_MASK,
29469 IX86_BUILTIN_PMOVZXDQ256_MASK,
29470 IX86_BUILTIN_PMOVZXDQ128_MASK,
29471 IX86_BUILTIN_REDUCEPD256_MASK,
29472 IX86_BUILTIN_REDUCEPD128_MASK,
29473 IX86_BUILTIN_REDUCEPS256_MASK,
29474 IX86_BUILTIN_REDUCEPS128_MASK,
29475 IX86_BUILTIN_REDUCESD_MASK,
29476 IX86_BUILTIN_REDUCESS_MASK,
29477 IX86_BUILTIN_VPERMVARHI256_MASK,
29478 IX86_BUILTIN_VPERMVARHI128_MASK,
29479 IX86_BUILTIN_VPERMT2VARHI256,
29480 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
29481 IX86_BUILTIN_VPERMT2VARHI128,
29482 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
29483 IX86_BUILTIN_VPERMI2VARHI256,
29484 IX86_BUILTIN_VPERMI2VARHI128,
29485 IX86_BUILTIN_RCP14PD256,
29486 IX86_BUILTIN_RCP14PD128,
29487 IX86_BUILTIN_RCP14PS256,
29488 IX86_BUILTIN_RCP14PS128,
29489 IX86_BUILTIN_RSQRT14PD256_MASK,
29490 IX86_BUILTIN_RSQRT14PD128_MASK,
29491 IX86_BUILTIN_RSQRT14PS256_MASK,
29492 IX86_BUILTIN_RSQRT14PS128_MASK,
29493 IX86_BUILTIN_SQRTPD256_MASK,
29494 IX86_BUILTIN_SQRTPD128_MASK,
29495 IX86_BUILTIN_SQRTPS256_MASK,
29496 IX86_BUILTIN_SQRTPS128_MASK,
29497 IX86_BUILTIN_PADDB128_MASK,
29498 IX86_BUILTIN_PADDW128_MASK,
29499 IX86_BUILTIN_PADDD128_MASK,
29500 IX86_BUILTIN_PADDQ128_MASK,
29501 IX86_BUILTIN_PSUBB128_MASK,
29502 IX86_BUILTIN_PSUBW128_MASK,
29503 IX86_BUILTIN_PSUBD128_MASK,
29504 IX86_BUILTIN_PSUBQ128_MASK,
29505 IX86_BUILTIN_PADDSB128_MASK,
29506 IX86_BUILTIN_PADDSW128_MASK,
29507 IX86_BUILTIN_PSUBSB128_MASK,
29508 IX86_BUILTIN_PSUBSW128_MASK,
29509 IX86_BUILTIN_PADDUSB128_MASK,
29510 IX86_BUILTIN_PADDUSW128_MASK,
29511 IX86_BUILTIN_PSUBUSB128_MASK,
29512 IX86_BUILTIN_PSUBUSW128_MASK,
29513 IX86_BUILTIN_PADDB256_MASK,
29514 IX86_BUILTIN_PADDW256_MASK,
29515 IX86_BUILTIN_PADDD256_MASK,
29516 IX86_BUILTIN_PADDQ256_MASK,
29517 IX86_BUILTIN_PADDSB256_MASK,
29518 IX86_BUILTIN_PADDSW256_MASK,
29519 IX86_BUILTIN_PADDUSB256_MASK,
29520 IX86_BUILTIN_PADDUSW256_MASK,
29521 IX86_BUILTIN_PSUBB256_MASK,
29522 IX86_BUILTIN_PSUBW256_MASK,
29523 IX86_BUILTIN_PSUBD256_MASK,
29524 IX86_BUILTIN_PSUBQ256_MASK,
29525 IX86_BUILTIN_PSUBSB256_MASK,
29526 IX86_BUILTIN_PSUBSW256_MASK,
29527 IX86_BUILTIN_PSUBUSB256_MASK,
29528 IX86_BUILTIN_PSUBUSW256_MASK,
29529 IX86_BUILTIN_SHUF_F64x2_256,
29530 IX86_BUILTIN_SHUF_I64x2_256,
29531 IX86_BUILTIN_SHUF_I32x4_256,
29532 IX86_BUILTIN_SHUF_F32x4_256,
29533 IX86_BUILTIN_PMOVWB128,
29534 IX86_BUILTIN_PMOVWB256,
29535 IX86_BUILTIN_PMOVSWB128,
29536 IX86_BUILTIN_PMOVSWB256,
29537 IX86_BUILTIN_PMOVUSWB128,
29538 IX86_BUILTIN_PMOVUSWB256,
29539 IX86_BUILTIN_PMOVDB128,
29540 IX86_BUILTIN_PMOVDB256,
29541 IX86_BUILTIN_PMOVSDB128,
29542 IX86_BUILTIN_PMOVSDB256,
29543 IX86_BUILTIN_PMOVUSDB128,
29544 IX86_BUILTIN_PMOVUSDB256,
29545 IX86_BUILTIN_PMOVDW128,
29546 IX86_BUILTIN_PMOVDW256,
29547 IX86_BUILTIN_PMOVSDW128,
29548 IX86_BUILTIN_PMOVSDW256,
29549 IX86_BUILTIN_PMOVUSDW128,
29550 IX86_BUILTIN_PMOVUSDW256,
29551 IX86_BUILTIN_PMOVQB128,
29552 IX86_BUILTIN_PMOVQB256,
29553 IX86_BUILTIN_PMOVSQB128,
29554 IX86_BUILTIN_PMOVSQB256,
29555 IX86_BUILTIN_PMOVUSQB128,
29556 IX86_BUILTIN_PMOVUSQB256,
29557 IX86_BUILTIN_PMOVQW128,
29558 IX86_BUILTIN_PMOVQW256,
29559 IX86_BUILTIN_PMOVSQW128,
29560 IX86_BUILTIN_PMOVSQW256,
29561 IX86_BUILTIN_PMOVUSQW128,
29562 IX86_BUILTIN_PMOVUSQW256,
29563 IX86_BUILTIN_PMOVQD128,
29564 IX86_BUILTIN_PMOVQD256,
29565 IX86_BUILTIN_PMOVSQD128,
29566 IX86_BUILTIN_PMOVSQD256,
29567 IX86_BUILTIN_PMOVUSQD128,
29568 IX86_BUILTIN_PMOVUSQD256,
29569 IX86_BUILTIN_RANGEPD256,
29570 IX86_BUILTIN_RANGEPD128,
29571 IX86_BUILTIN_RANGEPS256,
29572 IX86_BUILTIN_RANGEPS128,
29573 IX86_BUILTIN_GETEXPPS256,
29574 IX86_BUILTIN_GETEXPPD256,
29575 IX86_BUILTIN_GETEXPPS128,
29576 IX86_BUILTIN_GETEXPPD128,
29577 IX86_BUILTIN_FIXUPIMMPD256_MASK,
29578 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
29579 IX86_BUILTIN_FIXUPIMMPS256_MASK,
29580 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
29581 IX86_BUILTIN_FIXUPIMMPD128_MASK,
29582 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
29583 IX86_BUILTIN_FIXUPIMMPS128_MASK,
29584 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
29585 IX86_BUILTIN_PABSQ256,
29586 IX86_BUILTIN_PABSQ128,
29587 IX86_BUILTIN_PABSD256_MASK,
29588 IX86_BUILTIN_PABSD128_MASK,
29589 IX86_BUILTIN_PMULHRSW256_MASK,
29590 IX86_BUILTIN_PMULHRSW128_MASK,
29591 IX86_BUILTIN_PMULHUW128_MASK,
29592 IX86_BUILTIN_PMULHUW256_MASK,
29593 IX86_BUILTIN_PMULHW256_MASK,
29594 IX86_BUILTIN_PMULHW128_MASK,
29595 IX86_BUILTIN_PMULLW256_MASK,
29596 IX86_BUILTIN_PMULLW128_MASK,
29597 IX86_BUILTIN_PMULLQ256,
29598 IX86_BUILTIN_PMULLQ128,
29599 IX86_BUILTIN_ANDPD256_MASK,
29600 IX86_BUILTIN_ANDPD128_MASK,
29601 IX86_BUILTIN_ANDPS256_MASK,
29602 IX86_BUILTIN_ANDPS128_MASK,
29603 IX86_BUILTIN_ANDNPD256_MASK,
29604 IX86_BUILTIN_ANDNPD128_MASK,
29605 IX86_BUILTIN_ANDNPS256_MASK,
29606 IX86_BUILTIN_ANDNPS128_MASK,
29607 IX86_BUILTIN_PSLLWI128_MASK,
29608 IX86_BUILTIN_PSLLDI128_MASK,
29609 IX86_BUILTIN_PSLLQI128_MASK,
29610 IX86_BUILTIN_PSLLW128_MASK,
29611 IX86_BUILTIN_PSLLD128_MASK,
29612 IX86_BUILTIN_PSLLQ128_MASK,
29613 IX86_BUILTIN_PSLLWI256_MASK ,
29614 IX86_BUILTIN_PSLLW256_MASK,
29615 IX86_BUILTIN_PSLLDI256_MASK,
29616 IX86_BUILTIN_PSLLD256_MASK,
29617 IX86_BUILTIN_PSLLQI256_MASK,
29618 IX86_BUILTIN_PSLLQ256_MASK,
29619 IX86_BUILTIN_PSRADI128_MASK,
29620 IX86_BUILTIN_PSRAD128_MASK,
29621 IX86_BUILTIN_PSRADI256_MASK,
29622 IX86_BUILTIN_PSRAD256_MASK,
29623 IX86_BUILTIN_PSRAQI128_MASK,
29624 IX86_BUILTIN_PSRAQ128_MASK,
29625 IX86_BUILTIN_PSRAQI256_MASK,
29626 IX86_BUILTIN_PSRAQ256_MASK,
29627 IX86_BUILTIN_PANDD256,
29628 IX86_BUILTIN_PANDD128,
29629 IX86_BUILTIN_PSRLDI128_MASK,
29630 IX86_BUILTIN_PSRLD128_MASK,
29631 IX86_BUILTIN_PSRLDI256_MASK,
29632 IX86_BUILTIN_PSRLD256_MASK,
29633 IX86_BUILTIN_PSRLQI128_MASK,
29634 IX86_BUILTIN_PSRLQ128_MASK,
29635 IX86_BUILTIN_PSRLQI256_MASK,
29636 IX86_BUILTIN_PSRLQ256_MASK,
29637 IX86_BUILTIN_PANDQ256,
29638 IX86_BUILTIN_PANDQ128,
29639 IX86_BUILTIN_PANDND256,
29640 IX86_BUILTIN_PANDND128,
29641 IX86_BUILTIN_PANDNQ256,
29642 IX86_BUILTIN_PANDNQ128,
29643 IX86_BUILTIN_PORD256,
29644 IX86_BUILTIN_PORD128,
29645 IX86_BUILTIN_PORQ256,
29646 IX86_BUILTIN_PORQ128,
29647 IX86_BUILTIN_PXORD256,
29648 IX86_BUILTIN_PXORD128,
29649 IX86_BUILTIN_PXORQ256,
29650 IX86_BUILTIN_PXORQ128,
29651 IX86_BUILTIN_PACKSSWB256_MASK,
29652 IX86_BUILTIN_PACKSSWB128_MASK,
29653 IX86_BUILTIN_PACKUSWB256_MASK,
29654 IX86_BUILTIN_PACKUSWB128_MASK,
29655 IX86_BUILTIN_RNDSCALEPS256,
29656 IX86_BUILTIN_RNDSCALEPD256,
29657 IX86_BUILTIN_RNDSCALEPS128,
29658 IX86_BUILTIN_RNDSCALEPD128,
29659 IX86_BUILTIN_VTERNLOGQ256_MASK,
29660 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29661 IX86_BUILTIN_VTERNLOGD256_MASK,
29662 IX86_BUILTIN_VTERNLOGD256_MASKZ,
29663 IX86_BUILTIN_VTERNLOGQ128_MASK,
29664 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29665 IX86_BUILTIN_VTERNLOGD128_MASK,
29666 IX86_BUILTIN_VTERNLOGD128_MASKZ,
29667 IX86_BUILTIN_SCALEFPD256,
29668 IX86_BUILTIN_SCALEFPS256,
29669 IX86_BUILTIN_SCALEFPD128,
29670 IX86_BUILTIN_SCALEFPS128,
29671 IX86_BUILTIN_VFMADDPD256_MASK,
29672 IX86_BUILTIN_VFMADDPD256_MASK3,
29673 IX86_BUILTIN_VFMADDPD256_MASKZ,
29674 IX86_BUILTIN_VFMADDPD128_MASK,
29675 IX86_BUILTIN_VFMADDPD128_MASK3,
29676 IX86_BUILTIN_VFMADDPD128_MASKZ,
29677 IX86_BUILTIN_VFMADDPS256_MASK,
29678 IX86_BUILTIN_VFMADDPS256_MASK3,
29679 IX86_BUILTIN_VFMADDPS256_MASKZ,
29680 IX86_BUILTIN_VFMADDPS128_MASK,
29681 IX86_BUILTIN_VFMADDPS128_MASK3,
29682 IX86_BUILTIN_VFMADDPS128_MASKZ,
29683 IX86_BUILTIN_VFMSUBPD256_MASK3,
29684 IX86_BUILTIN_VFMSUBPD128_MASK3,
29685 IX86_BUILTIN_VFMSUBPS256_MASK3,
29686 IX86_BUILTIN_VFMSUBPS128_MASK3,
29687 IX86_BUILTIN_VFNMADDPD256_MASK,
29688 IX86_BUILTIN_VFNMADDPD128_MASK,
29689 IX86_BUILTIN_VFNMADDPS256_MASK,
29690 IX86_BUILTIN_VFNMADDPS128_MASK,
29691 IX86_BUILTIN_VFNMSUBPD256_MASK,
29692 IX86_BUILTIN_VFNMSUBPD256_MASK3,
29693 IX86_BUILTIN_VFNMSUBPD128_MASK,
29694 IX86_BUILTIN_VFNMSUBPD128_MASK3,
29695 IX86_BUILTIN_VFNMSUBPS256_MASK,
29696 IX86_BUILTIN_VFNMSUBPS256_MASK3,
29697 IX86_BUILTIN_VFNMSUBPS128_MASK,
29698 IX86_BUILTIN_VFNMSUBPS128_MASK3,
29699 IX86_BUILTIN_VFMADDSUBPD256_MASK,
29700 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29701 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29702 IX86_BUILTIN_VFMADDSUBPD128_MASK,
29703 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29704 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29705 IX86_BUILTIN_VFMADDSUBPS256_MASK,
29706 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29707 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29708 IX86_BUILTIN_VFMADDSUBPS128_MASK,
29709 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29710 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29711 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29712 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29713 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29714 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29715 IX86_BUILTIN_INSERTF64X2_256,
29716 IX86_BUILTIN_INSERTI64X2_256,
29717 IX86_BUILTIN_PSRAVV16HI,
29718 IX86_BUILTIN_PSRAVV8HI,
29719 IX86_BUILTIN_PMADDUBSW256_MASK,
29720 IX86_BUILTIN_PMADDUBSW128_MASK,
29721 IX86_BUILTIN_PMADDWD256_MASK,
29722 IX86_BUILTIN_PMADDWD128_MASK,
29723 IX86_BUILTIN_PSRLVV16HI,
29724 IX86_BUILTIN_PSRLVV8HI,
29725 IX86_BUILTIN_CVTPS2DQ256_MASK,
29726 IX86_BUILTIN_CVTPS2DQ128_MASK,
29727 IX86_BUILTIN_CVTPS2UDQ256,
29728 IX86_BUILTIN_CVTPS2UDQ128,
29729 IX86_BUILTIN_CVTPS2QQ256,
29730 IX86_BUILTIN_CVTPS2QQ128,
29731 IX86_BUILTIN_CVTPS2UQQ256,
29732 IX86_BUILTIN_CVTPS2UQQ128,
29733 IX86_BUILTIN_GETMANTPS256,
29734 IX86_BUILTIN_GETMANTPS128,
29735 IX86_BUILTIN_GETMANTPD256,
29736 IX86_BUILTIN_GETMANTPD128,
29737 IX86_BUILTIN_MOVDDUP256_MASK,
29738 IX86_BUILTIN_MOVDDUP128_MASK,
29739 IX86_BUILTIN_MOVSHDUP256_MASK,
29740 IX86_BUILTIN_MOVSHDUP128_MASK,
29741 IX86_BUILTIN_MOVSLDUP256_MASK,
29742 IX86_BUILTIN_MOVSLDUP128_MASK,
29743 IX86_BUILTIN_CVTQQ2PS256,
29744 IX86_BUILTIN_CVTQQ2PS128,
29745 IX86_BUILTIN_CVTUQQ2PS256,
29746 IX86_BUILTIN_CVTUQQ2PS128,
29747 IX86_BUILTIN_CVTQQ2PD256,
29748 IX86_BUILTIN_CVTQQ2PD128,
29749 IX86_BUILTIN_CVTUQQ2PD256,
29750 IX86_BUILTIN_CVTUQQ2PD128,
29751 IX86_BUILTIN_VPERMT2VARQ256,
29752 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29753 IX86_BUILTIN_VPERMT2VARD256,
29754 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29755 IX86_BUILTIN_VPERMI2VARQ256,
29756 IX86_BUILTIN_VPERMI2VARD256,
29757 IX86_BUILTIN_VPERMT2VARPD256,
29758 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29759 IX86_BUILTIN_VPERMT2VARPS256,
29760 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29761 IX86_BUILTIN_VPERMI2VARPD256,
29762 IX86_BUILTIN_VPERMI2VARPS256,
29763 IX86_BUILTIN_VPERMT2VARQ128,
29764 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29765 IX86_BUILTIN_VPERMT2VARD128,
29766 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29767 IX86_BUILTIN_VPERMI2VARQ128,
29768 IX86_BUILTIN_VPERMI2VARD128,
29769 IX86_BUILTIN_VPERMT2VARPD128,
29770 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29771 IX86_BUILTIN_VPERMT2VARPS128,
29772 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29773 IX86_BUILTIN_VPERMI2VARPD128,
29774 IX86_BUILTIN_VPERMI2VARPS128,
29775 IX86_BUILTIN_PSHUFB256_MASK,
29776 IX86_BUILTIN_PSHUFB128_MASK,
29777 IX86_BUILTIN_PSHUFHW256_MASK,
29778 IX86_BUILTIN_PSHUFHW128_MASK,
29779 IX86_BUILTIN_PSHUFLW256_MASK,
29780 IX86_BUILTIN_PSHUFLW128_MASK,
29781 IX86_BUILTIN_PSHUFD256_MASK,
29782 IX86_BUILTIN_PSHUFD128_MASK,
29783 IX86_BUILTIN_SHUFPD256_MASK,
29784 IX86_BUILTIN_SHUFPD128_MASK,
29785 IX86_BUILTIN_SHUFPS256_MASK,
29786 IX86_BUILTIN_SHUFPS128_MASK,
29787 IX86_BUILTIN_PROLVQ256,
29788 IX86_BUILTIN_PROLVQ128,
29789 IX86_BUILTIN_PROLQ256,
29790 IX86_BUILTIN_PROLQ128,
29791 IX86_BUILTIN_PRORVQ256,
29792 IX86_BUILTIN_PRORVQ128,
29793 IX86_BUILTIN_PRORQ256,
29794 IX86_BUILTIN_PRORQ128,
29795 IX86_BUILTIN_PSRAVQ128,
29796 IX86_BUILTIN_PSRAVQ256,
29797 IX86_BUILTIN_PSLLVV4DI_MASK,
29798 IX86_BUILTIN_PSLLVV2DI_MASK,
29799 IX86_BUILTIN_PSLLVV8SI_MASK,
29800 IX86_BUILTIN_PSLLVV4SI_MASK,
29801 IX86_BUILTIN_PSRAVV8SI_MASK,
29802 IX86_BUILTIN_PSRAVV4SI_MASK,
29803 IX86_BUILTIN_PSRLVV4DI_MASK,
29804 IX86_BUILTIN_PSRLVV2DI_MASK,
29805 IX86_BUILTIN_PSRLVV8SI_MASK,
29806 IX86_BUILTIN_PSRLVV4SI_MASK,
29807 IX86_BUILTIN_PSRAWI256_MASK,
29808 IX86_BUILTIN_PSRAW256_MASK,
29809 IX86_BUILTIN_PSRAWI128_MASK,
29810 IX86_BUILTIN_PSRAW128_MASK,
29811 IX86_BUILTIN_PSRLWI256_MASK,
29812 IX86_BUILTIN_PSRLW256_MASK,
29813 IX86_BUILTIN_PSRLWI128_MASK,
29814 IX86_BUILTIN_PSRLW128_MASK,
29815 IX86_BUILTIN_PRORVD256,
29816 IX86_BUILTIN_PROLVD256,
29817 IX86_BUILTIN_PRORD256,
29818 IX86_BUILTIN_PROLD256,
29819 IX86_BUILTIN_PRORVD128,
29820 IX86_BUILTIN_PROLVD128,
29821 IX86_BUILTIN_PRORD128,
29822 IX86_BUILTIN_PROLD128,
29823 IX86_BUILTIN_FPCLASSPD256,
29824 IX86_BUILTIN_FPCLASSPD128,
29825 IX86_BUILTIN_FPCLASSSD,
29826 IX86_BUILTIN_FPCLASSPS256,
29827 IX86_BUILTIN_FPCLASSPS128,
29828 IX86_BUILTIN_FPCLASSSS,
29829 IX86_BUILTIN_CVTB2MASK128,
29830 IX86_BUILTIN_CVTB2MASK256,
29831 IX86_BUILTIN_CVTW2MASK128,
29832 IX86_BUILTIN_CVTW2MASK256,
29833 IX86_BUILTIN_CVTD2MASK128,
29834 IX86_BUILTIN_CVTD2MASK256,
29835 IX86_BUILTIN_CVTQ2MASK128,
29836 IX86_BUILTIN_CVTQ2MASK256,
29837 IX86_BUILTIN_CVTMASK2B128,
29838 IX86_BUILTIN_CVTMASK2B256,
29839 IX86_BUILTIN_CVTMASK2W128,
29840 IX86_BUILTIN_CVTMASK2W256,
29841 IX86_BUILTIN_CVTMASK2D128,
29842 IX86_BUILTIN_CVTMASK2D256,
29843 IX86_BUILTIN_CVTMASK2Q128,
29844 IX86_BUILTIN_CVTMASK2Q256,
29845 IX86_BUILTIN_PCMPEQB128_MASK,
29846 IX86_BUILTIN_PCMPEQB256_MASK,
29847 IX86_BUILTIN_PCMPEQW128_MASK,
29848 IX86_BUILTIN_PCMPEQW256_MASK,
29849 IX86_BUILTIN_PCMPEQD128_MASK,
29850 IX86_BUILTIN_PCMPEQD256_MASK,
29851 IX86_BUILTIN_PCMPEQQ128_MASK,
29852 IX86_BUILTIN_PCMPEQQ256_MASK,
29853 IX86_BUILTIN_PCMPGTB128_MASK,
29854 IX86_BUILTIN_PCMPGTB256_MASK,
29855 IX86_BUILTIN_PCMPGTW128_MASK,
29856 IX86_BUILTIN_PCMPGTW256_MASK,
29857 IX86_BUILTIN_PCMPGTD128_MASK,
29858 IX86_BUILTIN_PCMPGTD256_MASK,
29859 IX86_BUILTIN_PCMPGTQ128_MASK,
29860 IX86_BUILTIN_PCMPGTQ256_MASK,
29861 IX86_BUILTIN_PTESTMB128,
29862 IX86_BUILTIN_PTESTMB256,
29863 IX86_BUILTIN_PTESTMW128,
29864 IX86_BUILTIN_PTESTMW256,
29865 IX86_BUILTIN_PTESTMD128,
29866 IX86_BUILTIN_PTESTMD256,
29867 IX86_BUILTIN_PTESTMQ128,
29868 IX86_BUILTIN_PTESTMQ256,
29869 IX86_BUILTIN_PTESTNMB128,
29870 IX86_BUILTIN_PTESTNMB256,
29871 IX86_BUILTIN_PTESTNMW128,
29872 IX86_BUILTIN_PTESTNMW256,
29873 IX86_BUILTIN_PTESTNMD128,
29874 IX86_BUILTIN_PTESTNMD256,
29875 IX86_BUILTIN_PTESTNMQ128,
29876 IX86_BUILTIN_PTESTNMQ256,
29877 IX86_BUILTIN_PBROADCASTMB128,
29878 IX86_BUILTIN_PBROADCASTMB256,
29879 IX86_BUILTIN_PBROADCASTMW128,
29880 IX86_BUILTIN_PBROADCASTMW256,
29881 IX86_BUILTIN_COMPRESSPD256,
29882 IX86_BUILTIN_COMPRESSPD128,
29883 IX86_BUILTIN_COMPRESSPS256,
29884 IX86_BUILTIN_COMPRESSPS128,
29885 IX86_BUILTIN_PCOMPRESSQ256,
29886 IX86_BUILTIN_PCOMPRESSQ128,
29887 IX86_BUILTIN_PCOMPRESSD256,
29888 IX86_BUILTIN_PCOMPRESSD128,
29889 IX86_BUILTIN_EXPANDPD256,
29890 IX86_BUILTIN_EXPANDPD128,
29891 IX86_BUILTIN_EXPANDPS256,
29892 IX86_BUILTIN_EXPANDPS128,
29893 IX86_BUILTIN_PEXPANDQ256,
29894 IX86_BUILTIN_PEXPANDQ128,
29895 IX86_BUILTIN_PEXPANDD256,
29896 IX86_BUILTIN_PEXPANDD128,
29897 IX86_BUILTIN_EXPANDPD256Z,
29898 IX86_BUILTIN_EXPANDPD128Z,
29899 IX86_BUILTIN_EXPANDPS256Z,
29900 IX86_BUILTIN_EXPANDPS128Z,
29901 IX86_BUILTIN_PEXPANDQ256Z,
29902 IX86_BUILTIN_PEXPANDQ128Z,
29903 IX86_BUILTIN_PEXPANDD256Z,
29904 IX86_BUILTIN_PEXPANDD128Z,
29905 IX86_BUILTIN_PMAXSD256_MASK,
29906 IX86_BUILTIN_PMINSD256_MASK,
29907 IX86_BUILTIN_PMAXUD256_MASK,
29908 IX86_BUILTIN_PMINUD256_MASK,
29909 IX86_BUILTIN_PMAXSD128_MASK,
29910 IX86_BUILTIN_PMINSD128_MASK,
29911 IX86_BUILTIN_PMAXUD128_MASK,
29912 IX86_BUILTIN_PMINUD128_MASK,
29913 IX86_BUILTIN_PMAXSQ256_MASK,
29914 IX86_BUILTIN_PMINSQ256_MASK,
29915 IX86_BUILTIN_PMAXUQ256_MASK,
29916 IX86_BUILTIN_PMINUQ256_MASK,
29917 IX86_BUILTIN_PMAXSQ128_MASK,
29918 IX86_BUILTIN_PMINSQ128_MASK,
29919 IX86_BUILTIN_PMAXUQ128_MASK,
29920 IX86_BUILTIN_PMINUQ128_MASK,
29921 IX86_BUILTIN_PMINSB256_MASK,
29922 IX86_BUILTIN_PMINUB256_MASK,
29923 IX86_BUILTIN_PMAXSB256_MASK,
29924 IX86_BUILTIN_PMAXUB256_MASK,
29925 IX86_BUILTIN_PMINSB128_MASK,
29926 IX86_BUILTIN_PMINUB128_MASK,
29927 IX86_BUILTIN_PMAXSB128_MASK,
29928 IX86_BUILTIN_PMAXUB128_MASK,
29929 IX86_BUILTIN_PMINSW256_MASK,
29930 IX86_BUILTIN_PMINUW256_MASK,
29931 IX86_BUILTIN_PMAXSW256_MASK,
29932 IX86_BUILTIN_PMAXUW256_MASK,
29933 IX86_BUILTIN_PMINSW128_MASK,
29934 IX86_BUILTIN_PMINUW128_MASK,
29935 IX86_BUILTIN_PMAXSW128_MASK,
29936 IX86_BUILTIN_PMAXUW128_MASK,
29937 IX86_BUILTIN_VPCONFLICTQ256,
29938 IX86_BUILTIN_VPCONFLICTD256,
29939 IX86_BUILTIN_VPCLZCNTQ256,
29940 IX86_BUILTIN_VPCLZCNTD256,
29941 IX86_BUILTIN_UNPCKHPD256_MASK,
29942 IX86_BUILTIN_UNPCKHPD128_MASK,
29943 IX86_BUILTIN_UNPCKHPS256_MASK,
29944 IX86_BUILTIN_UNPCKHPS128_MASK,
29945 IX86_BUILTIN_UNPCKLPD256_MASK,
29946 IX86_BUILTIN_UNPCKLPD128_MASK,
29947 IX86_BUILTIN_UNPCKLPS256_MASK,
29948 IX86_BUILTIN_VPCONFLICTQ128,
29949 IX86_BUILTIN_VPCONFLICTD128,
29950 IX86_BUILTIN_VPCLZCNTQ128,
29951 IX86_BUILTIN_VPCLZCNTD128,
29952 IX86_BUILTIN_UNPCKLPS128_MASK,
29953 IX86_BUILTIN_ALIGND256,
29954 IX86_BUILTIN_ALIGNQ256,
29955 IX86_BUILTIN_ALIGND128,
29956 IX86_BUILTIN_ALIGNQ128,
29957 IX86_BUILTIN_CVTPS2PH256_MASK,
29958 IX86_BUILTIN_CVTPS2PH_MASK,
29959 IX86_BUILTIN_CVTPH2PS_MASK,
29960 IX86_BUILTIN_CVTPH2PS256_MASK,
29961 IX86_BUILTIN_PUNPCKHDQ128_MASK,
29962 IX86_BUILTIN_PUNPCKHDQ256_MASK,
29963 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
29964 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
29965 IX86_BUILTIN_PUNPCKLDQ128_MASK,
29966 IX86_BUILTIN_PUNPCKLDQ256_MASK,
29967 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
29968 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
29969 IX86_BUILTIN_PUNPCKHBW128_MASK,
29970 IX86_BUILTIN_PUNPCKHBW256_MASK,
29971 IX86_BUILTIN_PUNPCKHWD128_MASK,
29972 IX86_BUILTIN_PUNPCKHWD256_MASK,
29973 IX86_BUILTIN_PUNPCKLBW128_MASK,
29974 IX86_BUILTIN_PUNPCKLBW256_MASK,
29975 IX86_BUILTIN_PUNPCKLWD128_MASK,
29976 IX86_BUILTIN_PUNPCKLWD256_MASK,
29977 IX86_BUILTIN_PSLLVV16HI,
29978 IX86_BUILTIN_PSLLVV8HI,
29979 IX86_BUILTIN_PACKSSDW256_MASK,
29980 IX86_BUILTIN_PACKSSDW128_MASK,
29981 IX86_BUILTIN_PACKUSDW256_MASK,
29982 IX86_BUILTIN_PACKUSDW128_MASK,
29983 IX86_BUILTIN_PAVGB256_MASK,
29984 IX86_BUILTIN_PAVGW256_MASK,
29985 IX86_BUILTIN_PAVGB128_MASK,
29986 IX86_BUILTIN_PAVGW128_MASK,
29987 IX86_BUILTIN_VPERMVARSF256_MASK,
29988 IX86_BUILTIN_VPERMVARDF256_MASK,
29989 IX86_BUILTIN_VPERMDF256_MASK,
29990 IX86_BUILTIN_PABSB256_MASK,
29991 IX86_BUILTIN_PABSB128_MASK,
29992 IX86_BUILTIN_PABSW256_MASK,
29993 IX86_BUILTIN_PABSW128_MASK,
29994 IX86_BUILTIN_VPERMILVARPD_MASK,
29995 IX86_BUILTIN_VPERMILVARPS_MASK,
29996 IX86_BUILTIN_VPERMILVARPD256_MASK,
29997 IX86_BUILTIN_VPERMILVARPS256_MASK,
29998 IX86_BUILTIN_VPERMILPD_MASK,
29999 IX86_BUILTIN_VPERMILPS_MASK,
30000 IX86_BUILTIN_VPERMILPD256_MASK,
30001 IX86_BUILTIN_VPERMILPS256_MASK,
30002 IX86_BUILTIN_BLENDMQ256,
30003 IX86_BUILTIN_BLENDMD256,
30004 IX86_BUILTIN_BLENDMPD256,
30005 IX86_BUILTIN_BLENDMPS256,
30006 IX86_BUILTIN_BLENDMQ128,
30007 IX86_BUILTIN_BLENDMD128,
30008 IX86_BUILTIN_BLENDMPD128,
30009 IX86_BUILTIN_BLENDMPS128,
30010 IX86_BUILTIN_BLENDMW256,
30011 IX86_BUILTIN_BLENDMB256,
30012 IX86_BUILTIN_BLENDMW128,
30013 IX86_BUILTIN_BLENDMB128,
30014 IX86_BUILTIN_PMULLD256_MASK,
30015 IX86_BUILTIN_PMULLD128_MASK,
30016 IX86_BUILTIN_PMULUDQ256_MASK,
30017 IX86_BUILTIN_PMULDQ256_MASK,
30018 IX86_BUILTIN_PMULDQ128_MASK,
30019 IX86_BUILTIN_PMULUDQ128_MASK,
30020 IX86_BUILTIN_CVTPD2PS256_MASK,
30021 IX86_BUILTIN_CVTPD2PS_MASK,
30022 IX86_BUILTIN_VPERMVARSI256_MASK,
30023 IX86_BUILTIN_VPERMVARDI256_MASK,
30024 IX86_BUILTIN_VPERMDI256_MASK,
30025 IX86_BUILTIN_CMPQ256,
30026 IX86_BUILTIN_CMPD256,
30027 IX86_BUILTIN_UCMPQ256,
30028 IX86_BUILTIN_UCMPD256,
30029 IX86_BUILTIN_CMPB256,
30030 IX86_BUILTIN_CMPW256,
30031 IX86_BUILTIN_UCMPB256,
30032 IX86_BUILTIN_UCMPW256,
30033 IX86_BUILTIN_CMPPD256_MASK,
30034 IX86_BUILTIN_CMPPS256_MASK,
30035 IX86_BUILTIN_CMPQ128,
30036 IX86_BUILTIN_CMPD128,
30037 IX86_BUILTIN_UCMPQ128,
30038 IX86_BUILTIN_UCMPD128,
30039 IX86_BUILTIN_CMPB128,
30040 IX86_BUILTIN_CMPW128,
30041 IX86_BUILTIN_UCMPB128,
30042 IX86_BUILTIN_UCMPW128,
30043 IX86_BUILTIN_CMPPD128_MASK,
30044 IX86_BUILTIN_CMPPS128_MASK,
30046 IX86_BUILTIN_GATHER3SIV8SF,
30047 IX86_BUILTIN_GATHER3SIV4SF,
30048 IX86_BUILTIN_GATHER3SIV4DF,
30049 IX86_BUILTIN_GATHER3SIV2DF,
30050 IX86_BUILTIN_GATHER3DIV8SF,
30051 IX86_BUILTIN_GATHER3DIV4SF,
30052 IX86_BUILTIN_GATHER3DIV4DF,
30053 IX86_BUILTIN_GATHER3DIV2DF,
30054 IX86_BUILTIN_GATHER3SIV8SI,
30055 IX86_BUILTIN_GATHER3SIV4SI,
30056 IX86_BUILTIN_GATHER3SIV4DI,
30057 IX86_BUILTIN_GATHER3SIV2DI,
30058 IX86_BUILTIN_GATHER3DIV8SI,
30059 IX86_BUILTIN_GATHER3DIV4SI,
30060 IX86_BUILTIN_GATHER3DIV4DI,
30061 IX86_BUILTIN_GATHER3DIV2DI,
30062 IX86_BUILTIN_SCATTERSIV8SF,
30063 IX86_BUILTIN_SCATTERSIV4SF,
30064 IX86_BUILTIN_SCATTERSIV4DF,
30065 IX86_BUILTIN_SCATTERSIV2DF,
30066 IX86_BUILTIN_SCATTERDIV8SF,
30067 IX86_BUILTIN_SCATTERDIV4SF,
30068 IX86_BUILTIN_SCATTERDIV4DF,
30069 IX86_BUILTIN_SCATTERDIV2DF,
30070 IX86_BUILTIN_SCATTERSIV8SI,
30071 IX86_BUILTIN_SCATTERSIV4SI,
30072 IX86_BUILTIN_SCATTERSIV4DI,
30073 IX86_BUILTIN_SCATTERSIV2DI,
30074 IX86_BUILTIN_SCATTERDIV8SI,
30075 IX86_BUILTIN_SCATTERDIV4SI,
30076 IX86_BUILTIN_SCATTERDIV4DI,
30077 IX86_BUILTIN_SCATTERDIV2DI,
30079 /* AVX512DQ. */
30080 IX86_BUILTIN_RANGESD128,
30081 IX86_BUILTIN_RANGESS128,
30082 IX86_BUILTIN_KUNPCKWD,
30083 IX86_BUILTIN_KUNPCKDQ,
30084 IX86_BUILTIN_BROADCASTF32x2_512,
30085 IX86_BUILTIN_BROADCASTI32x2_512,
30086 IX86_BUILTIN_BROADCASTF64X2_512,
30087 IX86_BUILTIN_BROADCASTI64X2_512,
30088 IX86_BUILTIN_BROADCASTF32X8_512,
30089 IX86_BUILTIN_BROADCASTI32X8_512,
30090 IX86_BUILTIN_EXTRACTF64X2_512,
30091 IX86_BUILTIN_EXTRACTF32X8,
30092 IX86_BUILTIN_EXTRACTI64X2_512,
30093 IX86_BUILTIN_EXTRACTI32X8,
30094 IX86_BUILTIN_REDUCEPD512_MASK,
30095 IX86_BUILTIN_REDUCEPS512_MASK,
30096 IX86_BUILTIN_PMULLQ512,
30097 IX86_BUILTIN_XORPD512,
30098 IX86_BUILTIN_XORPS512,
30099 IX86_BUILTIN_ORPD512,
30100 IX86_BUILTIN_ORPS512,
30101 IX86_BUILTIN_ANDPD512,
30102 IX86_BUILTIN_ANDPS512,
30103 IX86_BUILTIN_ANDNPD512,
30104 IX86_BUILTIN_ANDNPS512,
30105 IX86_BUILTIN_INSERTF32X8,
30106 IX86_BUILTIN_INSERTI32X8,
30107 IX86_BUILTIN_INSERTF64X2_512,
30108 IX86_BUILTIN_INSERTI64X2_512,
30109 IX86_BUILTIN_FPCLASSPD512,
30110 IX86_BUILTIN_FPCLASSPS512,
30111 IX86_BUILTIN_CVTD2MASK512,
30112 IX86_BUILTIN_CVTQ2MASK512,
30113 IX86_BUILTIN_CVTMASK2D512,
30114 IX86_BUILTIN_CVTMASK2Q512,
30115 IX86_BUILTIN_CVTPD2QQ512,
30116 IX86_BUILTIN_CVTPS2QQ512,
30117 IX86_BUILTIN_CVTPD2UQQ512,
30118 IX86_BUILTIN_CVTPS2UQQ512,
30119 IX86_BUILTIN_CVTQQ2PS512,
30120 IX86_BUILTIN_CVTUQQ2PS512,
30121 IX86_BUILTIN_CVTQQ2PD512,
30122 IX86_BUILTIN_CVTUQQ2PD512,
30123 IX86_BUILTIN_CVTTPS2QQ512,
30124 IX86_BUILTIN_CVTTPS2UQQ512,
30125 IX86_BUILTIN_CVTTPD2QQ512,
30126 IX86_BUILTIN_CVTTPD2UQQ512,
30127 IX86_BUILTIN_RANGEPS512,
30128 IX86_BUILTIN_RANGEPD512,
30130 /* AVX512BW. */
30131 IX86_BUILTIN_PACKUSDW512,
30132 IX86_BUILTIN_PACKSSDW512,
30133 IX86_BUILTIN_LOADDQUHI512_MASK,
30134 IX86_BUILTIN_LOADDQUQI512_MASK,
30135 IX86_BUILTIN_PSLLDQ512,
30136 IX86_BUILTIN_PSRLDQ512,
30137 IX86_BUILTIN_STOREDQUHI512_MASK,
30138 IX86_BUILTIN_STOREDQUQI512_MASK,
30139 IX86_BUILTIN_PALIGNR512,
30140 IX86_BUILTIN_PALIGNR512_MASK,
30141 IX86_BUILTIN_MOVDQUHI512_MASK,
30142 IX86_BUILTIN_MOVDQUQI512_MASK,
30143 IX86_BUILTIN_PSADBW512,
30144 IX86_BUILTIN_DBPSADBW512,
30145 IX86_BUILTIN_PBROADCASTB512,
30146 IX86_BUILTIN_PBROADCASTB512_GPR,
30147 IX86_BUILTIN_PBROADCASTW512,
30148 IX86_BUILTIN_PBROADCASTW512_GPR,
30149 IX86_BUILTIN_PMOVSXBW512_MASK,
30150 IX86_BUILTIN_PMOVZXBW512_MASK,
30151 IX86_BUILTIN_VPERMVARHI512_MASK,
30152 IX86_BUILTIN_VPERMT2VARHI512,
30153 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
30154 IX86_BUILTIN_VPERMI2VARHI512,
30155 IX86_BUILTIN_PAVGB512,
30156 IX86_BUILTIN_PAVGW512,
30157 IX86_BUILTIN_PADDB512,
30158 IX86_BUILTIN_PSUBB512,
30159 IX86_BUILTIN_PSUBSB512,
30160 IX86_BUILTIN_PADDSB512,
30161 IX86_BUILTIN_PSUBUSB512,
30162 IX86_BUILTIN_PADDUSB512,
30163 IX86_BUILTIN_PSUBW512,
30164 IX86_BUILTIN_PADDW512,
30165 IX86_BUILTIN_PSUBSW512,
30166 IX86_BUILTIN_PADDSW512,
30167 IX86_BUILTIN_PSUBUSW512,
30168 IX86_BUILTIN_PADDUSW512,
30169 IX86_BUILTIN_PMAXUW512,
30170 IX86_BUILTIN_PMAXSW512,
30171 IX86_BUILTIN_PMINUW512,
30172 IX86_BUILTIN_PMINSW512,
30173 IX86_BUILTIN_PMAXUB512,
30174 IX86_BUILTIN_PMAXSB512,
30175 IX86_BUILTIN_PMINUB512,
30176 IX86_BUILTIN_PMINSB512,
30177 IX86_BUILTIN_PMOVWB512,
30178 IX86_BUILTIN_PMOVSWB512,
30179 IX86_BUILTIN_PMOVUSWB512,
30180 IX86_BUILTIN_PMULHRSW512_MASK,
30181 IX86_BUILTIN_PMULHUW512_MASK,
30182 IX86_BUILTIN_PMULHW512_MASK,
30183 IX86_BUILTIN_PMULLW512_MASK,
30184 IX86_BUILTIN_PSLLWI512_MASK,
30185 IX86_BUILTIN_PSLLW512_MASK,
30186 IX86_BUILTIN_PACKSSWB512,
30187 IX86_BUILTIN_PACKUSWB512,
30188 IX86_BUILTIN_PSRAVV32HI,
30189 IX86_BUILTIN_PMADDUBSW512_MASK,
30190 IX86_BUILTIN_PMADDWD512_MASK,
30191 IX86_BUILTIN_PSRLVV32HI,
30192 IX86_BUILTIN_PUNPCKHBW512,
30193 IX86_BUILTIN_PUNPCKHWD512,
30194 IX86_BUILTIN_PUNPCKLBW512,
30195 IX86_BUILTIN_PUNPCKLWD512,
30196 IX86_BUILTIN_PSHUFB512,
30197 IX86_BUILTIN_PSHUFHW512,
30198 IX86_BUILTIN_PSHUFLW512,
30199 IX86_BUILTIN_PSRAWI512,
30200 IX86_BUILTIN_PSRAW512,
30201 IX86_BUILTIN_PSRLWI512,
30202 IX86_BUILTIN_PSRLW512,
30203 IX86_BUILTIN_CVTB2MASK512,
30204 IX86_BUILTIN_CVTW2MASK512,
30205 IX86_BUILTIN_CVTMASK2B512,
30206 IX86_BUILTIN_CVTMASK2W512,
30207 IX86_BUILTIN_PCMPEQB512_MASK,
30208 IX86_BUILTIN_PCMPEQW512_MASK,
30209 IX86_BUILTIN_PCMPGTB512_MASK,
30210 IX86_BUILTIN_PCMPGTW512_MASK,
30211 IX86_BUILTIN_PTESTMB512,
30212 IX86_BUILTIN_PTESTMW512,
30213 IX86_BUILTIN_PTESTNMB512,
30214 IX86_BUILTIN_PTESTNMW512,
30215 IX86_BUILTIN_PSLLVV32HI,
30216 IX86_BUILTIN_PABSB512,
30217 IX86_BUILTIN_PABSW512,
30218 IX86_BUILTIN_BLENDMW512,
30219 IX86_BUILTIN_BLENDMB512,
30220 IX86_BUILTIN_CMPB512,
30221 IX86_BUILTIN_CMPW512,
30222 IX86_BUILTIN_UCMPB512,
30223 IX86_BUILTIN_UCMPW512,
30225 /* Alternate 4 and 8 element gather/scatter for the vectorizer
30226 where all operands are 32-byte or 64-byte wide respectively. */
30227 IX86_BUILTIN_GATHERALTSIV4DF,
30228 IX86_BUILTIN_GATHERALTDIV8SF,
30229 IX86_BUILTIN_GATHERALTSIV4DI,
30230 IX86_BUILTIN_GATHERALTDIV8SI,
30231 IX86_BUILTIN_GATHER3ALTDIV16SF,
30232 IX86_BUILTIN_GATHER3ALTDIV16SI,
30233 IX86_BUILTIN_GATHER3ALTSIV4DF,
30234 IX86_BUILTIN_GATHER3ALTDIV8SF,
30235 IX86_BUILTIN_GATHER3ALTSIV4DI,
30236 IX86_BUILTIN_GATHER3ALTDIV8SI,
30237 IX86_BUILTIN_GATHER3ALTSIV8DF,
30238 IX86_BUILTIN_GATHER3ALTSIV8DI,
30239 IX86_BUILTIN_GATHER3DIV16SF,
30240 IX86_BUILTIN_GATHER3DIV16SI,
30241 IX86_BUILTIN_GATHER3DIV8DF,
30242 IX86_BUILTIN_GATHER3DIV8DI,
30243 IX86_BUILTIN_GATHER3SIV16SF,
30244 IX86_BUILTIN_GATHER3SIV16SI,
30245 IX86_BUILTIN_GATHER3SIV8DF,
30246 IX86_BUILTIN_GATHER3SIV8DI,
30247 IX86_BUILTIN_SCATTERDIV16SF,
30248 IX86_BUILTIN_SCATTERDIV16SI,
30249 IX86_BUILTIN_SCATTERDIV8DF,
30250 IX86_BUILTIN_SCATTERDIV8DI,
30251 IX86_BUILTIN_SCATTERSIV16SF,
30252 IX86_BUILTIN_SCATTERSIV16SI,
30253 IX86_BUILTIN_SCATTERSIV8DF,
30254 IX86_BUILTIN_SCATTERSIV8DI,
30256 /* AVX512PF */
30257 IX86_BUILTIN_GATHERPFQPD,
30258 IX86_BUILTIN_GATHERPFDPS,
30259 IX86_BUILTIN_GATHERPFDPD,
30260 IX86_BUILTIN_GATHERPFQPS,
30261 IX86_BUILTIN_SCATTERPFDPD,
30262 IX86_BUILTIN_SCATTERPFDPS,
30263 IX86_BUILTIN_SCATTERPFQPD,
30264 IX86_BUILTIN_SCATTERPFQPS,
30266 /* AVX-512ER */
30267 IX86_BUILTIN_EXP2PD_MASK,
30268 IX86_BUILTIN_EXP2PS_MASK,
30269 IX86_BUILTIN_EXP2PS,
30270 IX86_BUILTIN_RCP28PD,
30271 IX86_BUILTIN_RCP28PS,
30272 IX86_BUILTIN_RCP28SD,
30273 IX86_BUILTIN_RCP28SS,
30274 IX86_BUILTIN_RSQRT28PD,
30275 IX86_BUILTIN_RSQRT28PS,
30276 IX86_BUILTIN_RSQRT28SD,
30277 IX86_BUILTIN_RSQRT28SS,
30279 /* AVX-512IFMA */
30280 IX86_BUILTIN_VPMADD52LUQ512,
30281 IX86_BUILTIN_VPMADD52HUQ512,
30282 IX86_BUILTIN_VPMADD52LUQ256,
30283 IX86_BUILTIN_VPMADD52HUQ256,
30284 IX86_BUILTIN_VPMADD52LUQ128,
30285 IX86_BUILTIN_VPMADD52HUQ128,
30286 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
30287 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
30288 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
30289 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
30290 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
30291 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
30293 /* AVX-512VBMI */
30294 IX86_BUILTIN_VPMULTISHIFTQB512,
30295 IX86_BUILTIN_VPMULTISHIFTQB256,
30296 IX86_BUILTIN_VPMULTISHIFTQB128,
30297 IX86_BUILTIN_VPERMVARQI512_MASK,
30298 IX86_BUILTIN_VPERMT2VARQI512,
30299 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
30300 IX86_BUILTIN_VPERMI2VARQI512,
30301 IX86_BUILTIN_VPERMVARQI256_MASK,
30302 IX86_BUILTIN_VPERMVARQI128_MASK,
30303 IX86_BUILTIN_VPERMT2VARQI256,
30304 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
30305 IX86_BUILTIN_VPERMT2VARQI128,
30306 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
30307 IX86_BUILTIN_VPERMI2VARQI256,
30308 IX86_BUILTIN_VPERMI2VARQI128,
30310 /* SHA builtins. */
30311 IX86_BUILTIN_SHA1MSG1,
30312 IX86_BUILTIN_SHA1MSG2,
30313 IX86_BUILTIN_SHA1NEXTE,
30314 IX86_BUILTIN_SHA1RNDS4,
30315 IX86_BUILTIN_SHA256MSG1,
30316 IX86_BUILTIN_SHA256MSG2,
30317 IX86_BUILTIN_SHA256RNDS2,
30319 /* CLWB instructions. */
30320 IX86_BUILTIN_CLWB,
30322 /* PCOMMIT instructions. */
30323 IX86_BUILTIN_PCOMMIT,
30325 /* CLFLUSHOPT instructions. */
30326 IX86_BUILTIN_CLFLUSHOPT,
30328 /* TFmode support builtins. */
30329 IX86_BUILTIN_INFQ,
30330 IX86_BUILTIN_HUGE_VALQ,
30331 IX86_BUILTIN_FABSQ,
30332 IX86_BUILTIN_COPYSIGNQ,
30334 /* Vectorizer support builtins. */
30335 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
30336 IX86_BUILTIN_CPYSGNPS,
30337 IX86_BUILTIN_CPYSGNPD,
30338 IX86_BUILTIN_CPYSGNPS256,
30339 IX86_BUILTIN_CPYSGNPS512,
30340 IX86_BUILTIN_CPYSGNPD256,
30341 IX86_BUILTIN_CPYSGNPD512,
30342 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
30343 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
30346 /* FMA4 instructions. */
30347 IX86_BUILTIN_VFMADDSS,
30348 IX86_BUILTIN_VFMADDSD,
30349 IX86_BUILTIN_VFMADDPS,
30350 IX86_BUILTIN_VFMADDPD,
30351 IX86_BUILTIN_VFMADDPS256,
30352 IX86_BUILTIN_VFMADDPD256,
30353 IX86_BUILTIN_VFMADDSUBPS,
30354 IX86_BUILTIN_VFMADDSUBPD,
30355 IX86_BUILTIN_VFMADDSUBPS256,
30356 IX86_BUILTIN_VFMADDSUBPD256,
30358 /* FMA3 instructions. */
30359 IX86_BUILTIN_VFMADDSS3,
30360 IX86_BUILTIN_VFMADDSD3,
30362 /* XOP instructions. */
30363 IX86_BUILTIN_VPCMOV,
30364 IX86_BUILTIN_VPCMOV_V2DI,
30365 IX86_BUILTIN_VPCMOV_V4SI,
30366 IX86_BUILTIN_VPCMOV_V8HI,
30367 IX86_BUILTIN_VPCMOV_V16QI,
30368 IX86_BUILTIN_VPCMOV_V4SF,
30369 IX86_BUILTIN_VPCMOV_V2DF,
30370 IX86_BUILTIN_VPCMOV256,
30371 IX86_BUILTIN_VPCMOV_V4DI256,
30372 IX86_BUILTIN_VPCMOV_V8SI256,
30373 IX86_BUILTIN_VPCMOV_V16HI256,
30374 IX86_BUILTIN_VPCMOV_V32QI256,
30375 IX86_BUILTIN_VPCMOV_V8SF256,
30376 IX86_BUILTIN_VPCMOV_V4DF256,
30378 IX86_BUILTIN_VPPERM,
30380 IX86_BUILTIN_VPMACSSWW,
30381 IX86_BUILTIN_VPMACSWW,
30382 IX86_BUILTIN_VPMACSSWD,
30383 IX86_BUILTIN_VPMACSWD,
30384 IX86_BUILTIN_VPMACSSDD,
30385 IX86_BUILTIN_VPMACSDD,
30386 IX86_BUILTIN_VPMACSSDQL,
30387 IX86_BUILTIN_VPMACSSDQH,
30388 IX86_BUILTIN_VPMACSDQL,
30389 IX86_BUILTIN_VPMACSDQH,
30390 IX86_BUILTIN_VPMADCSSWD,
30391 IX86_BUILTIN_VPMADCSWD,
30393 IX86_BUILTIN_VPHADDBW,
30394 IX86_BUILTIN_VPHADDBD,
30395 IX86_BUILTIN_VPHADDBQ,
30396 IX86_BUILTIN_VPHADDWD,
30397 IX86_BUILTIN_VPHADDWQ,
30398 IX86_BUILTIN_VPHADDDQ,
30399 IX86_BUILTIN_VPHADDUBW,
30400 IX86_BUILTIN_VPHADDUBD,
30401 IX86_BUILTIN_VPHADDUBQ,
30402 IX86_BUILTIN_VPHADDUWD,
30403 IX86_BUILTIN_VPHADDUWQ,
30404 IX86_BUILTIN_VPHADDUDQ,
30405 IX86_BUILTIN_VPHSUBBW,
30406 IX86_BUILTIN_VPHSUBWD,
30407 IX86_BUILTIN_VPHSUBDQ,
30409 IX86_BUILTIN_VPROTB,
30410 IX86_BUILTIN_VPROTW,
30411 IX86_BUILTIN_VPROTD,
30412 IX86_BUILTIN_VPROTQ,
30413 IX86_BUILTIN_VPROTB_IMM,
30414 IX86_BUILTIN_VPROTW_IMM,
30415 IX86_BUILTIN_VPROTD_IMM,
30416 IX86_BUILTIN_VPROTQ_IMM,
30418 IX86_BUILTIN_VPSHLB,
30419 IX86_BUILTIN_VPSHLW,
30420 IX86_BUILTIN_VPSHLD,
30421 IX86_BUILTIN_VPSHLQ,
30422 IX86_BUILTIN_VPSHAB,
30423 IX86_BUILTIN_VPSHAW,
30424 IX86_BUILTIN_VPSHAD,
30425 IX86_BUILTIN_VPSHAQ,
30427 IX86_BUILTIN_VFRCZSS,
30428 IX86_BUILTIN_VFRCZSD,
30429 IX86_BUILTIN_VFRCZPS,
30430 IX86_BUILTIN_VFRCZPD,
30431 IX86_BUILTIN_VFRCZPS256,
30432 IX86_BUILTIN_VFRCZPD256,
30434 IX86_BUILTIN_VPCOMEQUB,
30435 IX86_BUILTIN_VPCOMNEUB,
30436 IX86_BUILTIN_VPCOMLTUB,
30437 IX86_BUILTIN_VPCOMLEUB,
30438 IX86_BUILTIN_VPCOMGTUB,
30439 IX86_BUILTIN_VPCOMGEUB,
30440 IX86_BUILTIN_VPCOMFALSEUB,
30441 IX86_BUILTIN_VPCOMTRUEUB,
30443 IX86_BUILTIN_VPCOMEQUW,
30444 IX86_BUILTIN_VPCOMNEUW,
30445 IX86_BUILTIN_VPCOMLTUW,
30446 IX86_BUILTIN_VPCOMLEUW,
30447 IX86_BUILTIN_VPCOMGTUW,
30448 IX86_BUILTIN_VPCOMGEUW,
30449 IX86_BUILTIN_VPCOMFALSEUW,
30450 IX86_BUILTIN_VPCOMTRUEUW,
30452 IX86_BUILTIN_VPCOMEQUD,
30453 IX86_BUILTIN_VPCOMNEUD,
30454 IX86_BUILTIN_VPCOMLTUD,
30455 IX86_BUILTIN_VPCOMLEUD,
30456 IX86_BUILTIN_VPCOMGTUD,
30457 IX86_BUILTIN_VPCOMGEUD,
30458 IX86_BUILTIN_VPCOMFALSEUD,
30459 IX86_BUILTIN_VPCOMTRUEUD,
30461 IX86_BUILTIN_VPCOMEQUQ,
30462 IX86_BUILTIN_VPCOMNEUQ,
30463 IX86_BUILTIN_VPCOMLTUQ,
30464 IX86_BUILTIN_VPCOMLEUQ,
30465 IX86_BUILTIN_VPCOMGTUQ,
30466 IX86_BUILTIN_VPCOMGEUQ,
30467 IX86_BUILTIN_VPCOMFALSEUQ,
30468 IX86_BUILTIN_VPCOMTRUEUQ,
30470 IX86_BUILTIN_VPCOMEQB,
30471 IX86_BUILTIN_VPCOMNEB,
30472 IX86_BUILTIN_VPCOMLTB,
30473 IX86_BUILTIN_VPCOMLEB,
30474 IX86_BUILTIN_VPCOMGTB,
30475 IX86_BUILTIN_VPCOMGEB,
30476 IX86_BUILTIN_VPCOMFALSEB,
30477 IX86_BUILTIN_VPCOMTRUEB,
30479 IX86_BUILTIN_VPCOMEQW,
30480 IX86_BUILTIN_VPCOMNEW,
30481 IX86_BUILTIN_VPCOMLTW,
30482 IX86_BUILTIN_VPCOMLEW,
30483 IX86_BUILTIN_VPCOMGTW,
30484 IX86_BUILTIN_VPCOMGEW,
30485 IX86_BUILTIN_VPCOMFALSEW,
30486 IX86_BUILTIN_VPCOMTRUEW,
30488 IX86_BUILTIN_VPCOMEQD,
30489 IX86_BUILTIN_VPCOMNED,
30490 IX86_BUILTIN_VPCOMLTD,
30491 IX86_BUILTIN_VPCOMLED,
30492 IX86_BUILTIN_VPCOMGTD,
30493 IX86_BUILTIN_VPCOMGED,
30494 IX86_BUILTIN_VPCOMFALSED,
30495 IX86_BUILTIN_VPCOMTRUED,
30497 IX86_BUILTIN_VPCOMEQQ,
30498 IX86_BUILTIN_VPCOMNEQ,
30499 IX86_BUILTIN_VPCOMLTQ,
30500 IX86_BUILTIN_VPCOMLEQ,
30501 IX86_BUILTIN_VPCOMGTQ,
30502 IX86_BUILTIN_VPCOMGEQ,
30503 IX86_BUILTIN_VPCOMFALSEQ,
30504 IX86_BUILTIN_VPCOMTRUEQ,
30506 /* LWP instructions. */
30507 IX86_BUILTIN_LLWPCB,
30508 IX86_BUILTIN_SLWPCB,
30509 IX86_BUILTIN_LWPVAL32,
30510 IX86_BUILTIN_LWPVAL64,
30511 IX86_BUILTIN_LWPINS32,
30512 IX86_BUILTIN_LWPINS64,
30514 IX86_BUILTIN_CLZS,
30516 /* RTM */
30517 IX86_BUILTIN_XBEGIN,
30518 IX86_BUILTIN_XEND,
30519 IX86_BUILTIN_XABORT,
30520 IX86_BUILTIN_XTEST,
30522 /* MPX */
30523 IX86_BUILTIN_BNDMK,
30524 IX86_BUILTIN_BNDSTX,
30525 IX86_BUILTIN_BNDLDX,
30526 IX86_BUILTIN_BNDCL,
30527 IX86_BUILTIN_BNDCU,
30528 IX86_BUILTIN_BNDRET,
30529 IX86_BUILTIN_BNDNARROW,
30530 IX86_BUILTIN_BNDINT,
30531 IX86_BUILTIN_SIZEOF,
30532 IX86_BUILTIN_BNDLOWER,
30533 IX86_BUILTIN_BNDUPPER,
30535 /* BMI instructions. */
30536 IX86_BUILTIN_BEXTR32,
30537 IX86_BUILTIN_BEXTR64,
30538 IX86_BUILTIN_CTZS,
30540 /* TBM instructions. */
30541 IX86_BUILTIN_BEXTRI32,
30542 IX86_BUILTIN_BEXTRI64,
30544 /* BMI2 instructions. */
30545 IX86_BUILTIN_BZHI32,
30546 IX86_BUILTIN_BZHI64,
30547 IX86_BUILTIN_PDEP32,
30548 IX86_BUILTIN_PDEP64,
30549 IX86_BUILTIN_PEXT32,
30550 IX86_BUILTIN_PEXT64,
30552 /* ADX instructions. */
30553 IX86_BUILTIN_ADDCARRYX32,
30554 IX86_BUILTIN_ADDCARRYX64,
30556 /* SBB instructions. */
30557 IX86_BUILTIN_SBB32,
30558 IX86_BUILTIN_SBB64,
30560 /* FSGSBASE instructions. */
30561 IX86_BUILTIN_RDFSBASE32,
30562 IX86_BUILTIN_RDFSBASE64,
30563 IX86_BUILTIN_RDGSBASE32,
30564 IX86_BUILTIN_RDGSBASE64,
30565 IX86_BUILTIN_WRFSBASE32,
30566 IX86_BUILTIN_WRFSBASE64,
30567 IX86_BUILTIN_WRGSBASE32,
30568 IX86_BUILTIN_WRGSBASE64,
30570 /* RDRND instructions. */
30571 IX86_BUILTIN_RDRAND16_STEP,
30572 IX86_BUILTIN_RDRAND32_STEP,
30573 IX86_BUILTIN_RDRAND64_STEP,
30575 /* RDSEED instructions. */
30576 IX86_BUILTIN_RDSEED16_STEP,
30577 IX86_BUILTIN_RDSEED32_STEP,
30578 IX86_BUILTIN_RDSEED64_STEP,
30580 /* F16C instructions. */
30581 IX86_BUILTIN_CVTPH2PS,
30582 IX86_BUILTIN_CVTPH2PS256,
30583 IX86_BUILTIN_CVTPS2PH,
30584 IX86_BUILTIN_CVTPS2PH256,
30586 /* MONITORX and MWAITX instrucions. */
30587 IX86_BUILTIN_MONITORX,
30588 IX86_BUILTIN_MWAITX,
30590 /* CFString built-in for darwin */
30591 IX86_BUILTIN_CFSTRING,
30593 /* Builtins to get CPU type and supported features. */
30594 IX86_BUILTIN_CPU_INIT,
30595 IX86_BUILTIN_CPU_IS,
30596 IX86_BUILTIN_CPU_SUPPORTS,
30598 /* Read/write FLAGS register built-ins. */
30599 IX86_BUILTIN_READ_FLAGS,
30600 IX86_BUILTIN_WRITE_FLAGS,
30602 IX86_BUILTIN_MAX
30605 /* Table for the ix86 builtin decls. */
30606 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30608 /* Table of all of the builtin functions that are possible with different ISA's
30609 but are waiting to be built until a function is declared to use that
30610 ISA. */
30611 struct builtin_isa {
30612 const char *name; /* function name */
30613 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30614 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
30615 bool const_p; /* true if the declaration is constant */
30616 bool leaf_p; /* true if the declaration has leaf attribute */
30617 bool nothrow_p; /* true if the declaration has nothrow attribute */
30618 bool set_and_not_built_p;
30621 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30623 /* Bits that can still enable any inclusion of a builtin. */
30624 static HOST_WIDE_INT deferred_isa_values = 0;
30626 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
30627 of which isa_flags to use in the ix86_builtins_isa array. Stores the
30628 function decl in the ix86_builtins array. Returns the function decl or
30629 NULL_TREE, if the builtin was not added.
30631 If the front end has a special hook for builtin functions, delay adding
30632 builtin functions that aren't in the current ISA until the ISA is changed
30633 with function specific optimization. Doing so, can save about 300K for the
30634 default compiler. When the builtin is expanded, check at that time whether
30635 it is valid.
30637 If the front end doesn't have a special hook, record all builtins, even if
30638 it isn't an instruction set in the current ISA in case the user uses
30639 function specific options for a different ISA, so that we don't get scope
30640 errors if a builtin is added in the middle of a function scope. */
30642 static inline tree
30643 def_builtin (HOST_WIDE_INT mask, const char *name,
30644 enum ix86_builtin_func_type tcode,
30645 enum ix86_builtins code)
30647 tree decl = NULL_TREE;
30649 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30651 ix86_builtins_isa[(int) code].isa = mask;
30653 mask &= ~OPTION_MASK_ISA_64BIT;
30654 if (mask == 0
30655 || (mask & ix86_isa_flags) != 0
30656 || (lang_hooks.builtin_function
30657 == lang_hooks.builtin_function_ext_scope))
30660 tree type = ix86_get_builtin_func_type (tcode);
30661 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30662 NULL, NULL_TREE);
30663 ix86_builtins[(int) code] = decl;
30664 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30666 else
30668 /* Just a MASK where set_and_not_built_p == true can potentially
30669 include a builtin. */
30670 deferred_isa_values |= mask;
30671 ix86_builtins[(int) code] = NULL_TREE;
30672 ix86_builtins_isa[(int) code].tcode = tcode;
30673 ix86_builtins_isa[(int) code].name = name;
30674 ix86_builtins_isa[(int) code].leaf_p = false;
30675 ix86_builtins_isa[(int) code].nothrow_p = false;
30676 ix86_builtins_isa[(int) code].const_p = false;
30677 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30681 return decl;
30684 /* Like def_builtin, but also marks the function decl "const". */
30686 static inline tree
30687 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30688 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30690 tree decl = def_builtin (mask, name, tcode, code);
30691 if (decl)
30692 TREE_READONLY (decl) = 1;
30693 else
30694 ix86_builtins_isa[(int) code].const_p = true;
30696 return decl;
30699 /* Add any new builtin functions for a given ISA that may not have been
30700 declared. This saves a bit of space compared to adding all of the
30701 declarations to the tree, even if we didn't use them. */
30703 static void
30704 ix86_add_new_builtins (HOST_WIDE_INT isa)
30706 if ((isa & deferred_isa_values) == 0)
30707 return;
30709 /* Bits in ISA value can be removed from potential isa values. */
30710 deferred_isa_values &= ~isa;
30712 int i;
30713 tree saved_current_target_pragma = current_target_pragma;
30714 current_target_pragma = NULL_TREE;
30716 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30718 if ((ix86_builtins_isa[i].isa & isa) != 0
30719 && ix86_builtins_isa[i].set_and_not_built_p)
30721 tree decl, type;
30723 /* Don't define the builtin again. */
30724 ix86_builtins_isa[i].set_and_not_built_p = false;
30726 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30727 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30728 type, i, BUILT_IN_MD, NULL,
30729 NULL_TREE);
30731 ix86_builtins[i] = decl;
30732 if (ix86_builtins_isa[i].const_p)
30733 TREE_READONLY (decl) = 1;
30734 if (ix86_builtins_isa[i].leaf_p)
30735 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30736 NULL_TREE);
30737 if (ix86_builtins_isa[i].nothrow_p)
30738 TREE_NOTHROW (decl) = 1;
30742 current_target_pragma = saved_current_target_pragma;
30745 /* Bits for builtin_description.flag. */
30747 /* Set when we don't support the comparison natively, and should
30748 swap_comparison in order to support it. */
30749 #define BUILTIN_DESC_SWAP_OPERANDS 1
30751 struct builtin_description
30753 const HOST_WIDE_INT mask;
30754 const enum insn_code icode;
30755 const char *const name;
30756 const enum ix86_builtins code;
30757 const enum rtx_code comparison;
30758 const int flag;
30761 static const struct builtin_description bdesc_comi[] =
30763 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30764 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30765 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30766 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30767 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30768 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30769 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30770 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30771 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30772 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30773 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30774 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30775 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30776 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30777 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30778 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30779 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30780 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30781 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30782 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30783 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30784 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30785 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30786 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30789 static const struct builtin_description bdesc_pcmpestr[] =
30791 /* SSE4.2 */
30792 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30793 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30794 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30795 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30796 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30797 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30798 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30801 static const struct builtin_description bdesc_pcmpistr[] =
30803 /* SSE4.2 */
30804 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30805 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30806 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
30807 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
30808 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
30809 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
30810 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
30813 /* Special builtins with variable number of arguments. */
30814 static const struct builtin_description bdesc_special_args[] =
30816 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
30817 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
30818 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
30820 /* 80387 (for use internally for atomic compound assignment). */
30821 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
30822 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
30823 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
30824 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
30826 /* MMX */
30827 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30829 /* 3DNow! */
30830 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30832 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
30833 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
30834 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
30835 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30836 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30837 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30838 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30839 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30840 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30842 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30843 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30844 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30845 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30846 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30847 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30848 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30849 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30851 /* SSE */
30852 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30853 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30854 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30856 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30857 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30858 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30859 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30861 /* SSE or 3DNow!A */
30862 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30863 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
30865 /* SSE2 */
30866 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30867 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30868 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30869 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
30870 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30871 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
30872 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
30873 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
30874 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
30875 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30877 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30878 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30880 /* SSE3 */
30881 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30883 /* SSE4.1 */
30884 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
30886 /* SSE4A */
30887 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30888 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30890 /* AVX */
30891 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
30892 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
30894 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30895 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30896 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30897 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
30898 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
30900 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30901 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30902 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30903 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30904 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30905 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
30906 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30908 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
30909 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30910 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30912 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
30913 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
30914 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
30915 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
30916 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
30917 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
30918 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
30919 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
30921 /* AVX2 */
30922 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
30923 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
30924 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
30925 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
30926 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
30927 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
30928 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
30929 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
30930 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
30932 /* AVX512F */
30933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
30954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
30955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
30956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
30957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30981 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
30982 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
30983 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
30984 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
30985 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
30986 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
30988 /* FSGSBASE */
30989 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30990 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30991 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30992 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30993 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30994 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30995 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30996 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30998 /* RTM */
30999 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
31000 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
31001 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
31003 /* AVX512BW */
31004 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
31005 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
31006 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
31007 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
31009 /* AVX512VL */
31010 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
31011 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
31012 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
31013 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
31014 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31015 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31016 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31017 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31018 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31019 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31020 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31021 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31022 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31023 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31024 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31025 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31026 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31027 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31028 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31029 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31030 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31031 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31032 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31033 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31034 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31035 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31036 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31037 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31038 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31039 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31040 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31041 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31042 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31043 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31044 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31045 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31046 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
31047 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
31048 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
31049 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
31050 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31051 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31052 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31053 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31054 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31055 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31056 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31057 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31058 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31059 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31060 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31061 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31062 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31063 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31064 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31065 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31066 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31067 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31068 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31069 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31070 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31071 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31072 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31073 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31074 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31075 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31076 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31077 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31078 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31079 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31080 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31081 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31082 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31083 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31084 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31085 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31086 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31087 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31088 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31089 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31090 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31091 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31092 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31093 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31094 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31095 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31096 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31097 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31098 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31099 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31100 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31101 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31102 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31103 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31105 /* PCOMMIT. */
31106 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
31109 /* Builtins with variable number of arguments. */
31110 static const struct builtin_description bdesc_args[] =
31112 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
31113 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
31114 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
31115 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31116 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31117 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31118 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31120 /* MMX */
31121 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31122 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31123 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31124 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31125 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31126 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31128 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31129 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31130 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31131 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31132 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31133 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31134 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31135 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31137 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31138 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31140 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31141 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31142 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31143 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31145 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31146 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31147 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31148 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31149 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31150 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31152 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31153 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31154 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31155 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31156 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
31157 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
31159 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31160 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
31161 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31163 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
31165 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31166 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31167 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31168 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31169 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31170 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31172 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31173 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31174 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31175 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31176 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31177 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31179 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31180 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31181 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31182 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31184 /* 3DNow! */
31185 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31186 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31187 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31188 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31190 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31191 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31192 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31193 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31194 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31195 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31196 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31197 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31198 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31199 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31200 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31201 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31202 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31203 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31204 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31206 /* 3DNow!A */
31207 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31208 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31209 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31210 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31211 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31212 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31214 /* SSE */
31215 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
31216 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31217 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31218 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31219 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31220 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31221 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31222 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31223 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31224 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31225 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31226 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31228 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31230 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31231 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31232 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31233 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31234 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31235 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31236 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31237 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31239 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31240 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31241 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31242 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31243 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31244 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31245 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31246 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31247 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31248 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31249 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
31250 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31251 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31252 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31253 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31254 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31255 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31256 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31257 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31258 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31260 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31261 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31262 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31263 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31265 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31266 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31267 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31268 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31270 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31272 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31273 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31274 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31275 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31276 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31278 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
31279 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
31280 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
31282 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
31284 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31285 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31286 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31288 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
31289 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
31291 /* SSE MMX or 3Dnow!A */
31292 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31293 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31294 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31296 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31297 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31298 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31299 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31301 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
31302 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
31304 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
31306 /* SSE2 */
31307 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31309 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
31310 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
31311 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31312 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
31313 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
31315 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31316 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31317 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
31318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
31323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31325 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31326 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31328 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31329 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
31330 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31332 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31333 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31334 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31335 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31336 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31337 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31338 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31339 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31341 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31342 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31343 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31344 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31345 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
31346 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31347 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31348 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31349 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31350 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31351 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31352 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31353 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31354 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31355 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31356 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31357 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31358 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31359 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31360 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31362 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31363 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31364 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31365 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31367 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31368 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31369 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31370 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31372 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31374 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31375 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31376 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31378 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31380 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31381 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31382 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31383 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31384 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31385 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31386 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31387 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31389 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31390 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31391 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31392 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31393 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31394 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31395 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31396 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31398 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31399 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
31401 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31403 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31404 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31416 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31417 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31418 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31421 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31422 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31423 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31424 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31425 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31426 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31427 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31428 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31434 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
31437 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
31438 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31440 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
31442 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
31443 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
31444 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
31445 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
31447 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31448 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31449 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31450 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31451 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31452 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31453 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31455 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31456 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31457 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31458 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31459 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31460 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31461 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31463 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31464 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31465 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31466 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31468 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
31469 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31470 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31472 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
31474 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31476 /* SSE2 MMX */
31477 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31478 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31480 /* SSE3 */
31481 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
31482 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31484 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31485 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31486 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31487 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31488 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31489 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31491 /* SSSE3 */
31492 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31493 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
31494 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31495 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
31496 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31497 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31499 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31500 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31501 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31502 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31503 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31504 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31505 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31506 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31507 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31508 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31509 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31510 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31511 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
31512 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
31513 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31514 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31515 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31516 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31517 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31518 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31519 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31520 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31521 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31522 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31524 /* SSSE3. */
31525 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
31526 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
31528 /* SSE4.1 */
31529 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31530 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31531 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
31532 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
31533 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31534 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31535 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31536 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
31537 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
31538 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
31540 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31541 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31542 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31543 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31544 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31545 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31546 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31547 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31548 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31549 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31550 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31551 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31552 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31554 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31555 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31556 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31557 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31558 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31559 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31560 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31561 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31562 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31563 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31564 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31565 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31567 /* SSE4.1 */
31568 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31569 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31570 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31571 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31573 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
31574 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
31575 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
31576 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
31578 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31579 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31581 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31582 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31584 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
31585 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
31586 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
31587 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
31589 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
31590 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
31592 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31593 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31595 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31596 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31597 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31599 /* SSE4.2 */
31600 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31601 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
31602 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
31603 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31604 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31606 /* SSE4A */
31607 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
31608 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
31609 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
31610 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31612 /* AES */
31613 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
31614 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31616 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31617 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31618 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31619 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31621 /* PCLMUL */
31622 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
31624 /* AVX */
31625 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31626 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31627 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31628 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31629 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31630 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31631 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31632 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31633 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31634 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31635 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31636 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31637 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31638 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31639 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31640 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31641 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31642 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31643 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31644 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31645 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31646 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31647 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31648 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31649 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31650 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31652 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
31653 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
31654 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
31655 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31657 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31658 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31659 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
31660 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
31661 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31662 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31663 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31664 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31665 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31666 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31667 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31668 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31669 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31670 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
31671 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
31672 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
31673 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
31674 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
31675 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
31676 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31677 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
31678 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31679 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31680 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31681 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31682 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31683 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31684 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31685 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31686 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31687 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31688 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31689 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31690 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31692 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31693 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31694 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31696 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31698 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31699 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31700 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31704 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31705 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31707 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31708 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31709 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31710 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31712 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31713 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31726 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31727 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31737 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31738 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31739 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31749 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31754 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31755 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31757 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
31758 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31760 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31761 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31763 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31765 /* AVX2 */
31766 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31767 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31768 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31769 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31770 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31771 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31772 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31773 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31774 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31775 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31776 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31777 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31778 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31779 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31780 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31781 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31782 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31783 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31784 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31785 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31786 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31787 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31788 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31789 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31790 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31791 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31792 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31793 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31794 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31795 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31796 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31797 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31798 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31799 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31800 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31801 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31802 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31803 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31804 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31805 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31806 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31807 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31808 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31809 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31810 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31811 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31812 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31813 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31814 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31815 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31816 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31817 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
31818 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31819 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31820 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31821 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31822 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31823 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31824 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31825 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31826 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31827 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31828 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31829 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31830 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31831 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31832 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31833 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31834 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31835 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31836 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31837 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31838 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31839 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31840 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
31841 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31842 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31843 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31844 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31845 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31846 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31847 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31848 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31849 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31850 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31851 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31852 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31853 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31854 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31855 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31856 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31857 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31858 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31859 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31860 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31861 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31862 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31863 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31864 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31865 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31866 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31867 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31868 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31869 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31870 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31871 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31872 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31873 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31874 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31875 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31876 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31877 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31878 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31879 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31880 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31881 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31882 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31883 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31884 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31885 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31886 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31887 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
31888 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
31889 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31890 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31891 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31892 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31893 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31894 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31895 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31896 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31897 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31898 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
31899 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
31900 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
31901 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
31902 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31903 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31904 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31905 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31906 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31907 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31908 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31909 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31910 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31911 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31913 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31915 /* BMI */
31916 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31917 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31918 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31920 /* TBM */
31921 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31922 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31924 /* F16C */
31925 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
31926 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
31927 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
31928 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
31930 /* BMI2 */
31931 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31932 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31933 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31934 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31935 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31936 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31938 /* AVX512F */
31939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
31940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
31941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
31942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
31943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
31944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
31945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
31953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
31955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
31956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
31963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
31965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
31970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
31971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
31972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
31973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
31974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
31975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
31976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
31977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31981 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31982 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31983 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31984 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31985 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31986 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31988 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31989 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31990 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31991 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31992 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31993 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
31994 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
31995 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
31996 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
31997 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
31998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31999 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32001 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
32027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
32028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
32029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
32030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
32031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
32037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
32038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
32039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
32040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
32041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
32042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
32044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
32094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
32095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
32096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
32097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
32098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
32099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
32100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
32101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32105 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32106 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32107 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32108 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
32137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
32138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
32139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32140 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
32142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32145 /* Mask arithmetic operations */
32146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
32149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
32153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
32157 /* SHA */
32158 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32159 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32160 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32161 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
32162 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32163 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32164 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
32166 /* AVX512VL. */
32167 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
32168 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
32169 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32170 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32171 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32172 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32173 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32174 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32175 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32176 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32177 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32178 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32179 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32180 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32181 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32182 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32183 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32184 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32185 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32186 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32187 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32188 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32189 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32190 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32191 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32192 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32193 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32194 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32195 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32196 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32197 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32198 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32199 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32200 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32201 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32202 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32203 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32204 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32205 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32206 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32207 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32208 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32209 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32210 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32211 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32212 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32213 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32214 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32215 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32216 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32217 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32218 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32219 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32220 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
32221 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
32222 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
32223 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
32224 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32225 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32226 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32227 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32228 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32229 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32230 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32231 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32232 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32233 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32234 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32235 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32236 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32237 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32238 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32239 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32240 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32241 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32242 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32243 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32244 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32245 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32246 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32247 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32248 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32249 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32250 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32251 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32252 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32253 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32254 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32255 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32256 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
32257 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
32258 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
32259 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
32260 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32261 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
32262 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
32263 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
32264 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32265 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
32266 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32267 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
32268 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32269 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
32270 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32271 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
32272 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32273 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
32274 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32275 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32276 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32277 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
32278 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
32279 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
32280 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
32281 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32282 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32283 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32284 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32285 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32286 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32287 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32288 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32289 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32290 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32291 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32292 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32293 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32294 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32295 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32296 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32297 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32298 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32299 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32300 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32301 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32302 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32303 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32304 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32305 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32306 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32307 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32308 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32309 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32310 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32311 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32312 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32313 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32314 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32315 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32316 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32317 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32318 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32319 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32320 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32321 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32322 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32323 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32324 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32325 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32326 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32327 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32328 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32329 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32330 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32331 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32332 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32333 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32334 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32335 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32336 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32337 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32338 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32339 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32340 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32341 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32342 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32343 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32344 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32345 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32346 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32347 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32348 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32349 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32350 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32351 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32352 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32353 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32354 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32355 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32356 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32357 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32358 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32359 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32360 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32361 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32362 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32363 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32364 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32365 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32366 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32367 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32368 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32369 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32370 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32371 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32372 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32373 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32374 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32375 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32376 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32377 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32378 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32379 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32380 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32381 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32382 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32383 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32384 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32385 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32386 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32387 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32388 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32389 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32390 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32391 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32392 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32393 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32394 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32395 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32396 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32397 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32398 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32399 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32400 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32401 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32402 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32403 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32404 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32405 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32406 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32407 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32408 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32409 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32410 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32411 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32412 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32413 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32414 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32415 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32416 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32417 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32418 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32419 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32420 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32421 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32422 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32423 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32424 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32425 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32426 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32427 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32428 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32429 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32430 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32431 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32432 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32433 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32434 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32435 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32436 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32437 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32438 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32439 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32440 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32441 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32442 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32443 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32444 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32445 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32446 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32447 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32448 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32449 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32450 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32451 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32452 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32453 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32454 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32455 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32456 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32457 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32458 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32459 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32460 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32461 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32462 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32463 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32464 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32465 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32466 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32467 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32468 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32469 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32470 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32471 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32472 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32473 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32474 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32475 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32476 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32477 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32478 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32479 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32480 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32481 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32482 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32483 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32484 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32485 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32486 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32487 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32488 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32489 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32490 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32491 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32492 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32493 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32494 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32495 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32496 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32497 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32498 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32499 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32500 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32501 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32502 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32503 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32504 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32505 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32506 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32507 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32508 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32509 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32510 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32511 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32512 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32513 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32514 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32515 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32516 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32517 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32518 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32519 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32520 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32521 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32522 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32523 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32524 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32525 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32526 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32527 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32528 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32529 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32530 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32531 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32532 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32533 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32534 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32535 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32536 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32537 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32538 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32539 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32540 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32541 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32542 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32543 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32544 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32545 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32546 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32547 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32548 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32549 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
32550 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
32551 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32552 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32553 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
32554 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
32555 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
32556 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
32557 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32558 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32559 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32560 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32561 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32562 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32563 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32564 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32565 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32566 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32567 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32568 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32569 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32570 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32571 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32572 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32573 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32574 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32575 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32576 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32577 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32578 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32579 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32580 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32581 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32582 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32583 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32584 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32585 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32586 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32587 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32588 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32589 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32590 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32591 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32592 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32593 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32594 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32595 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32596 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32597 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32598 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32599 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32600 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32601 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32602 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32603 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32604 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32605 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32606 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32607 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32608 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32609 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32610 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32611 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32612 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32613 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32614 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32615 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32616 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32617 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32618 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32619 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32620 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32621 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32622 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32623 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32624 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32625 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32626 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32627 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32628 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32629 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32630 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32631 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32632 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32633 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32634 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32635 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32636 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32637 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32638 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32639 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32640 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32641 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32642 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32643 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32644 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32645 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32646 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32647 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32648 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32649 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32650 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32651 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32652 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32653 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32654 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32655 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32656 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32657 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
32658 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
32659 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
32660 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
32661 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
32662 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
32663 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
32664 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
32665 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
32666 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
32667 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
32668 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
32669 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
32670 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
32671 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
32672 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
32673 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
32674 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
32675 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
32676 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
32677 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
32678 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
32679 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32680 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32681 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32682 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32683 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32684 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32685 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32686 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32687 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32688 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32689 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32690 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32691 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32692 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32693 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32694 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32695 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32696 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32697 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32698 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32699 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32700 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32701 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32702 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32703 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32704 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32705 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32706 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32707 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32708 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32709 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32710 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32711 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
32712 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
32713 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
32714 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
32715 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32716 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32717 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32718 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32719 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32720 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32721 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32722 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32723 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32724 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32725 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32726 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32727 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32728 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32729 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32730 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32731 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32732 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32733 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32734 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32735 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32736 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32737 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32738 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32739 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32740 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32741 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32742 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32743 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32744 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32745 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32746 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32747 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32748 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32749 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32750 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32751 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32752 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32753 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32754 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32755 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32756 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32757 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32758 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32759 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32760 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32761 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32762 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32763 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32764 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32765 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32766 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32767 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32768 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32769 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32770 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32771 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32772 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32773 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32774 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32775 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32776 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32777 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32778 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32779 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32780 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32781 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32782 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32783 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32784 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32785 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32786 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32787 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32788 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32789 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
32790 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
32791 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
32792 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
32793 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
32794 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
32795 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32796 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32797 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32798 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32799 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32800 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32801 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32802 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32803 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32804 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32805 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32806 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32807 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32808 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32809 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32810 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32811 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32812 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32813 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32814 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32815 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32816 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32817 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32818 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32819 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32820 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32821 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32822 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32823 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32824 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32825 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32826 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32827 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32828 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32829 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32830 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32831 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32832 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32833 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32834 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32835 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32836 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32837 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32838 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32839 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32840 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32841 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32842 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32843 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32844 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32845 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32846 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32847 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32848 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32849 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32850 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32851 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32852 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32853 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32854 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
32855 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
32856 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32857 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32858 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32859 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32860 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32861 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32862 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32863 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32864 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32865 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32866 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32867 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
32868 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
32869 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32870 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32871 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32872 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32873 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32874 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32875 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32876 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32877 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
32878 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
32880 /* AVX512DQ. */
32881 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32882 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32883 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
32884 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
32885 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
32886 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
32887 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
32888 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
32889 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
32890 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
32891 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32892 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32893 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32894 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32895 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32896 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32897 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32898 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32899 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32900 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
32901 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32902 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
32903 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
32904 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
32905 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
32906 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
32907 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
32908 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
32909 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
32910 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
32911 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
32913 /* AVX512BW. */
32914 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
32915 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
32916 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32917 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32918 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32919 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32920 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
32921 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
32922 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32923 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32924 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
32925 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
32926 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
32927 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
32928 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
32929 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
32930 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32931 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32932 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32933 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32934 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32935 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32936 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32937 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32938 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32939 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32940 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32941 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32942 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32943 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32944 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32945 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32946 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32947 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32948 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32949 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32950 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32951 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32952 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32953 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32954 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32955 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32956 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32957 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32958 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32959 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32960 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32961 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32962 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32963 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32964 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32965 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32966 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32967 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32968 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32969 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32970 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
32971 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
32972 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32973 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32974 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32975 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32976 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32977 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32978 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32979 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32980 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32981 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32982 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32983 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32984 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
32985 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
32986 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
32987 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
32988 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32989 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32990 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32991 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32992 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32993 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32994 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32995 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32996 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32997 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32998 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32999 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
33000 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
33001 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
33002 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
33003 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
33004 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
33006 /* AVX512IFMA */
33007 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33008 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33009 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33010 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33011 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33012 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33013 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33014 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33015 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33016 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33017 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33018 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33020 /* AVX512VBMI */
33021 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33022 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33023 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33024 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33025 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33026 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33027 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33028 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33029 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33030 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33031 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33032 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33033 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33034 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33035 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33038 /* Builtins with rounding support. */
33039 static const struct builtin_description bdesc_round_args[] =
33041 /* AVX512F */
33042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
33047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
33048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
33049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
33050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
33051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
33052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
33053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
33055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
33057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
33059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
33061 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
33062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
33063 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
33064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
33065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
33070 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
33071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
33072 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
33073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
33078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
33079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
33080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
33081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
33082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
33083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33122 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33124 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33126 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33128 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33130 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33132 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33134 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33136 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
33144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
33145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33159 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33160 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33162 /* AVX512ER */
33163 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33164 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33165 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33166 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33167 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33168 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33169 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33170 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33171 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33172 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33174 /* AVX512DQ. */
33175 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33176 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33177 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33178 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33179 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33180 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33181 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33182 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33183 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33184 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33185 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33186 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33187 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33188 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33189 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
33190 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
33193 /* Bultins for MPX. */
33194 static const struct builtin_description bdesc_mpx[] =
33196 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
33197 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33198 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33201 /* Const builtins for MPX. */
33202 static const struct builtin_description bdesc_mpx_const[] =
33204 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
33205 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
33206 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
33207 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
33208 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
33209 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
33210 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
33211 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
33214 /* FMA4 and XOP. */
33215 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
33216 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
33217 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
33218 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
33219 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
33220 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
33221 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
33222 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
33223 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
33224 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
33225 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
33226 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
33227 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
33228 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
33229 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
33230 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
33231 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
33232 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
33233 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
33234 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
33235 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
33236 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
33237 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
33238 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
33239 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
33240 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
33241 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
33242 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
33243 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
33244 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
33245 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
33246 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
33247 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
33248 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
33249 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
33250 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
33251 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
33252 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
33253 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
33254 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
33255 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
33256 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
33257 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
33258 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
33259 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
33260 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
33261 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
33262 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
33263 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
33264 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
33265 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
33266 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
33268 static const struct builtin_description bdesc_multi_arg[] =
33270 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
33271 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
33272 UNKNOWN, (int)MULTI_ARG_3_SF },
33273 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
33274 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
33275 UNKNOWN, (int)MULTI_ARG_3_DF },
33277 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
33278 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
33279 UNKNOWN, (int)MULTI_ARG_3_SF },
33280 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
33281 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
33282 UNKNOWN, (int)MULTI_ARG_3_DF },
33284 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
33285 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
33286 UNKNOWN, (int)MULTI_ARG_3_SF },
33287 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
33288 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
33289 UNKNOWN, (int)MULTI_ARG_3_DF },
33290 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
33291 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
33292 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33293 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
33294 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
33295 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33297 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
33298 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
33299 UNKNOWN, (int)MULTI_ARG_3_SF },
33300 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
33301 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
33302 UNKNOWN, (int)MULTI_ARG_3_DF },
33303 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
33304 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
33305 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33306 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
33307 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
33308 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33310 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
33311 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
33312 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
33313 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
33314 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
33315 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
33316 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
33318 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33319 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33320 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
33321 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
33322 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
33323 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
33324 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
33326 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
33328 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33329 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33330 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33331 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33332 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33333 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33334 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33335 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33336 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33337 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33338 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33339 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33341 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33342 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
33343 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
33344 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
33345 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
33346 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
33347 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
33348 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
33349 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33350 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
33351 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
33352 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
33353 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33354 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
33355 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
33356 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
33358 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
33359 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
33360 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
33361 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
33362 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
33363 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
33365 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33366 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33367 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33368 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33369 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33370 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33371 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33372 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33373 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33374 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33375 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33376 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33377 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33378 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33379 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33381 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
33382 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33383 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33384 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
33385 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
33386 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
33387 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
33389 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
33390 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33391 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33392 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
33393 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
33394 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
33395 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
33397 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
33398 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33399 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33400 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
33401 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
33402 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
33403 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
33405 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33406 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33407 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33408 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
33409 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
33410 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
33411 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
33413 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
33414 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33415 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33416 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
33417 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
33418 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
33419 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
33421 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
33422 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33423 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33424 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
33425 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
33426 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
33427 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
33429 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
33430 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33431 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33432 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
33433 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
33434 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
33435 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
33437 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33438 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33439 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33440 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
33441 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
33442 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
33443 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
33445 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33446 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33447 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33448 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33449 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33450 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33451 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33452 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33454 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33455 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33456 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33457 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33458 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33459 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33460 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33461 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33463 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
33464 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
33465 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
33466 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
33470 /* TM vector builtins. */
33472 /* Reuse the existing x86-specific `struct builtin_description' cause
33473 we're lazy. Add casts to make them fit. */
33474 static const struct builtin_description bdesc_tm[] =
33476 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33477 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33478 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33479 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33480 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33481 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33482 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33484 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33485 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33486 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33487 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33488 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33489 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33490 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33492 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33493 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33494 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33495 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33496 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33497 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33498 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33500 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
33501 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
33502 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
33505 /* TM callbacks. */
33507 /* Return the builtin decl needed to load a vector of TYPE. */
33509 static tree
33510 ix86_builtin_tm_load (tree type)
33512 if (TREE_CODE (type) == VECTOR_TYPE)
33514 switch (tree_to_uhwi (TYPE_SIZE (type)))
33516 case 64:
33517 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
33518 case 128:
33519 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
33520 case 256:
33521 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
33524 return NULL_TREE;
33527 /* Return the builtin decl needed to store a vector of TYPE. */
33529 static tree
33530 ix86_builtin_tm_store (tree type)
33532 if (TREE_CODE (type) == VECTOR_TYPE)
33534 switch (tree_to_uhwi (TYPE_SIZE (type)))
33536 case 64:
33537 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
33538 case 128:
33539 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
33540 case 256:
33541 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
33544 return NULL_TREE;
33547 /* Initialize the transactional memory vector load/store builtins. */
33549 static void
33550 ix86_init_tm_builtins (void)
33552 enum ix86_builtin_func_type ftype;
33553 const struct builtin_description *d;
33554 size_t i;
33555 tree decl;
33556 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
33557 tree attrs_log, attrs_type_log;
33559 if (!flag_tm)
33560 return;
33562 /* If there are no builtins defined, we must be compiling in a
33563 language without trans-mem support. */
33564 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
33565 return;
33567 /* Use whatever attributes a normal TM load has. */
33568 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
33569 attrs_load = DECL_ATTRIBUTES (decl);
33570 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33571 /* Use whatever attributes a normal TM store has. */
33572 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
33573 attrs_store = DECL_ATTRIBUTES (decl);
33574 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33575 /* Use whatever attributes a normal TM log has. */
33576 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
33577 attrs_log = DECL_ATTRIBUTES (decl);
33578 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33580 for (i = 0, d = bdesc_tm;
33581 i < ARRAY_SIZE (bdesc_tm);
33582 i++, d++)
33584 if ((d->mask & ix86_isa_flags) != 0
33585 || (lang_hooks.builtin_function
33586 == lang_hooks.builtin_function_ext_scope))
33588 tree type, attrs, attrs_type;
33589 enum built_in_function code = (enum built_in_function) d->code;
33591 ftype = (enum ix86_builtin_func_type) d->flag;
33592 type = ix86_get_builtin_func_type (ftype);
33594 if (BUILTIN_TM_LOAD_P (code))
33596 attrs = attrs_load;
33597 attrs_type = attrs_type_load;
33599 else if (BUILTIN_TM_STORE_P (code))
33601 attrs = attrs_store;
33602 attrs_type = attrs_type_store;
33604 else
33606 attrs = attrs_log;
33607 attrs_type = attrs_type_log;
33609 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
33610 /* The builtin without the prefix for
33611 calling it directly. */
33612 d->name + strlen ("__builtin_"),
33613 attrs);
33614 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
33615 set the TYPE_ATTRIBUTES. */
33616 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
33618 set_builtin_decl (code, decl, false);
33623 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
33624 in the current target ISA to allow the user to compile particular modules
33625 with different target specific options that differ from the command line
33626 options. */
33627 static void
33628 ix86_init_mmx_sse_builtins (void)
33630 const struct builtin_description * d;
33631 enum ix86_builtin_func_type ftype;
33632 size_t i;
33634 /* Add all special builtins with variable number of operands. */
33635 for (i = 0, d = bdesc_special_args;
33636 i < ARRAY_SIZE (bdesc_special_args);
33637 i++, d++)
33639 if (d->name == 0)
33640 continue;
33642 ftype = (enum ix86_builtin_func_type) d->flag;
33643 def_builtin (d->mask, d->name, ftype, d->code);
33646 /* Add all builtins with variable number of operands. */
33647 for (i = 0, d = bdesc_args;
33648 i < ARRAY_SIZE (bdesc_args);
33649 i++, d++)
33651 if (d->name == 0)
33652 continue;
33654 ftype = (enum ix86_builtin_func_type) d->flag;
33655 def_builtin_const (d->mask, d->name, ftype, d->code);
33658 /* Add all builtins with rounding. */
33659 for (i = 0, d = bdesc_round_args;
33660 i < ARRAY_SIZE (bdesc_round_args);
33661 i++, d++)
33663 if (d->name == 0)
33664 continue;
33666 ftype = (enum ix86_builtin_func_type) d->flag;
33667 def_builtin_const (d->mask, d->name, ftype, d->code);
33670 /* pcmpestr[im] insns. */
33671 for (i = 0, d = bdesc_pcmpestr;
33672 i < ARRAY_SIZE (bdesc_pcmpestr);
33673 i++, d++)
33675 if (d->code == IX86_BUILTIN_PCMPESTRM128)
33676 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
33677 else
33678 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
33679 def_builtin_const (d->mask, d->name, ftype, d->code);
33682 /* pcmpistr[im] insns. */
33683 for (i = 0, d = bdesc_pcmpistr;
33684 i < ARRAY_SIZE (bdesc_pcmpistr);
33685 i++, d++)
33687 if (d->code == IX86_BUILTIN_PCMPISTRM128)
33688 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
33689 else
33690 ftype = INT_FTYPE_V16QI_V16QI_INT;
33691 def_builtin_const (d->mask, d->name, ftype, d->code);
33694 /* comi/ucomi insns. */
33695 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
33697 if (d->mask == OPTION_MASK_ISA_SSE2)
33698 ftype = INT_FTYPE_V2DF_V2DF;
33699 else
33700 ftype = INT_FTYPE_V4SF_V4SF;
33701 def_builtin_const (d->mask, d->name, ftype, d->code);
33704 /* SSE */
33705 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
33706 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
33707 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
33708 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
33710 /* SSE or 3DNow!A */
33711 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33712 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
33713 IX86_BUILTIN_MASKMOVQ);
33715 /* SSE2 */
33716 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
33717 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
33719 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
33720 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
33721 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
33722 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
33724 /* SSE3. */
33725 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
33726 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
33727 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
33728 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
33730 /* AES */
33731 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
33732 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
33733 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33734 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33735 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33736 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33737 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33738 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33739 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33740 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33741 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33742 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33744 /* PCLMUL */
33745 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33746 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33748 /* RDRND */
33749 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33750 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33751 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33752 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33753 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33754 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33755 IX86_BUILTIN_RDRAND64_STEP);
33757 /* AVX2 */
33758 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33759 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33760 IX86_BUILTIN_GATHERSIV2DF);
33762 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33763 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33764 IX86_BUILTIN_GATHERSIV4DF);
33766 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33767 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33768 IX86_BUILTIN_GATHERDIV2DF);
33770 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33771 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33772 IX86_BUILTIN_GATHERDIV4DF);
33774 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33775 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33776 IX86_BUILTIN_GATHERSIV4SF);
33778 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33779 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33780 IX86_BUILTIN_GATHERSIV8SF);
33782 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33783 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33784 IX86_BUILTIN_GATHERDIV4SF);
33786 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33787 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33788 IX86_BUILTIN_GATHERDIV8SF);
33790 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33791 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33792 IX86_BUILTIN_GATHERSIV2DI);
33794 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33795 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33796 IX86_BUILTIN_GATHERSIV4DI);
33798 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33799 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33800 IX86_BUILTIN_GATHERDIV2DI);
33802 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33803 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33804 IX86_BUILTIN_GATHERDIV4DI);
33806 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
33807 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
33808 IX86_BUILTIN_GATHERSIV4SI);
33810 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
33811 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
33812 IX86_BUILTIN_GATHERSIV8SI);
33814 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
33815 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
33816 IX86_BUILTIN_GATHERDIV4SI);
33818 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
33819 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
33820 IX86_BUILTIN_GATHERDIV8SI);
33822 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
33823 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
33824 IX86_BUILTIN_GATHERALTSIV4DF);
33826 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
33827 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
33828 IX86_BUILTIN_GATHERALTDIV8SF);
33830 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
33831 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
33832 IX86_BUILTIN_GATHERALTSIV4DI);
33834 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
33835 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
33836 IX86_BUILTIN_GATHERALTDIV8SI);
33838 /* AVX512F */
33839 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
33840 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
33841 IX86_BUILTIN_GATHER3SIV16SF);
33843 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
33844 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
33845 IX86_BUILTIN_GATHER3SIV8DF);
33847 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
33848 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
33849 IX86_BUILTIN_GATHER3DIV16SF);
33851 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
33852 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
33853 IX86_BUILTIN_GATHER3DIV8DF);
33855 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
33856 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
33857 IX86_BUILTIN_GATHER3SIV16SI);
33859 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
33860 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
33861 IX86_BUILTIN_GATHER3SIV8DI);
33863 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
33864 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
33865 IX86_BUILTIN_GATHER3DIV16SI);
33867 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
33868 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
33869 IX86_BUILTIN_GATHER3DIV8DI);
33871 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
33872 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
33873 IX86_BUILTIN_GATHER3ALTSIV8DF);
33875 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
33876 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
33877 IX86_BUILTIN_GATHER3ALTDIV16SF);
33879 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
33880 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
33881 IX86_BUILTIN_GATHER3ALTSIV8DI);
33883 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
33884 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
33885 IX86_BUILTIN_GATHER3ALTDIV16SI);
33887 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
33888 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
33889 IX86_BUILTIN_SCATTERSIV16SF);
33891 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
33892 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
33893 IX86_BUILTIN_SCATTERSIV8DF);
33895 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
33896 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
33897 IX86_BUILTIN_SCATTERDIV16SF);
33899 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
33900 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
33901 IX86_BUILTIN_SCATTERDIV8DF);
33903 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
33904 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
33905 IX86_BUILTIN_SCATTERSIV16SI);
33907 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
33908 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
33909 IX86_BUILTIN_SCATTERSIV8DI);
33911 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
33912 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
33913 IX86_BUILTIN_SCATTERDIV16SI);
33915 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
33916 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
33917 IX86_BUILTIN_SCATTERDIV8DI);
33919 /* AVX512VL */
33920 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
33921 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
33922 IX86_BUILTIN_GATHER3SIV2DF);
33924 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
33925 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
33926 IX86_BUILTIN_GATHER3SIV4DF);
33928 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
33929 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
33930 IX86_BUILTIN_GATHER3DIV2DF);
33932 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
33933 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
33934 IX86_BUILTIN_GATHER3DIV4DF);
33936 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
33937 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
33938 IX86_BUILTIN_GATHER3SIV4SF);
33940 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
33941 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
33942 IX86_BUILTIN_GATHER3SIV8SF);
33944 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
33945 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
33946 IX86_BUILTIN_GATHER3DIV4SF);
33948 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
33949 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
33950 IX86_BUILTIN_GATHER3DIV8SF);
33952 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
33953 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
33954 IX86_BUILTIN_GATHER3SIV2DI);
33956 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
33957 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
33958 IX86_BUILTIN_GATHER3SIV4DI);
33960 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
33961 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
33962 IX86_BUILTIN_GATHER3DIV2DI);
33964 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
33965 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
33966 IX86_BUILTIN_GATHER3DIV4DI);
33968 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
33969 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
33970 IX86_BUILTIN_GATHER3SIV4SI);
33972 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
33973 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
33974 IX86_BUILTIN_GATHER3SIV8SI);
33976 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
33977 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
33978 IX86_BUILTIN_GATHER3DIV4SI);
33980 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
33981 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
33982 IX86_BUILTIN_GATHER3DIV8SI);
33984 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
33985 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
33986 IX86_BUILTIN_GATHER3ALTSIV4DF);
33988 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
33989 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
33990 IX86_BUILTIN_GATHER3ALTDIV8SF);
33992 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
33993 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
33994 IX86_BUILTIN_GATHER3ALTSIV4DI);
33996 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
33997 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
33998 IX86_BUILTIN_GATHER3ALTDIV8SI);
34000 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
34001 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
34002 IX86_BUILTIN_SCATTERSIV8SF);
34004 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
34005 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
34006 IX86_BUILTIN_SCATTERSIV4SF);
34008 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
34009 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
34010 IX86_BUILTIN_SCATTERSIV4DF);
34012 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
34013 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
34014 IX86_BUILTIN_SCATTERSIV2DF);
34016 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
34017 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
34018 IX86_BUILTIN_SCATTERDIV8SF);
34020 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
34021 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
34022 IX86_BUILTIN_SCATTERDIV4SF);
34024 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
34025 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
34026 IX86_BUILTIN_SCATTERDIV4DF);
34028 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
34029 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
34030 IX86_BUILTIN_SCATTERDIV2DF);
34032 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
34033 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
34034 IX86_BUILTIN_SCATTERSIV8SI);
34036 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
34037 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
34038 IX86_BUILTIN_SCATTERSIV4SI);
34040 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
34041 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
34042 IX86_BUILTIN_SCATTERSIV4DI);
34044 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
34045 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
34046 IX86_BUILTIN_SCATTERSIV2DI);
34048 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
34049 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
34050 IX86_BUILTIN_SCATTERDIV8SI);
34052 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
34053 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
34054 IX86_BUILTIN_SCATTERDIV4SI);
34056 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
34057 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
34058 IX86_BUILTIN_SCATTERDIV4DI);
34060 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
34061 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
34062 IX86_BUILTIN_SCATTERDIV2DI);
34064 /* AVX512PF */
34065 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
34066 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
34067 IX86_BUILTIN_GATHERPFDPD);
34068 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
34069 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
34070 IX86_BUILTIN_GATHERPFDPS);
34071 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
34072 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34073 IX86_BUILTIN_GATHERPFQPD);
34074 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
34075 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34076 IX86_BUILTIN_GATHERPFQPS);
34077 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
34078 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
34079 IX86_BUILTIN_SCATTERPFDPD);
34080 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
34081 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
34082 IX86_BUILTIN_SCATTERPFDPS);
34083 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
34084 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34085 IX86_BUILTIN_SCATTERPFQPD);
34086 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
34087 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34088 IX86_BUILTIN_SCATTERPFQPS);
34090 /* SHA */
34091 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
34092 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
34093 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
34094 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
34095 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
34096 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
34097 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
34098 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
34099 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
34100 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
34101 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
34102 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
34103 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
34104 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
34106 /* RTM. */
34107 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
34108 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
34110 /* MMX access to the vec_init patterns. */
34111 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
34112 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
34114 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
34115 V4HI_FTYPE_HI_HI_HI_HI,
34116 IX86_BUILTIN_VEC_INIT_V4HI);
34118 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
34119 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
34120 IX86_BUILTIN_VEC_INIT_V8QI);
34122 /* Access to the vec_extract patterns. */
34123 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
34124 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
34125 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
34126 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
34127 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
34128 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
34129 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
34130 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
34131 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
34132 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
34134 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34135 "__builtin_ia32_vec_ext_v4hi",
34136 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
34138 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
34139 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
34141 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
34142 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
34144 /* Access to the vec_set patterns. */
34145 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
34146 "__builtin_ia32_vec_set_v2di",
34147 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
34149 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
34150 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
34152 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
34153 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
34155 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
34156 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
34158 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34159 "__builtin_ia32_vec_set_v4hi",
34160 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
34162 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
34163 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
34165 /* RDSEED */
34166 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
34167 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
34168 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
34169 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
34170 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
34171 "__builtin_ia32_rdseed_di_step",
34172 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
34174 /* ADCX */
34175 def_builtin (0, "__builtin_ia32_addcarryx_u32",
34176 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
34177 def_builtin (OPTION_MASK_ISA_64BIT,
34178 "__builtin_ia32_addcarryx_u64",
34179 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34180 IX86_BUILTIN_ADDCARRYX64);
34182 /* SBB */
34183 def_builtin (0, "__builtin_ia32_sbb_u32",
34184 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
34185 def_builtin (OPTION_MASK_ISA_64BIT,
34186 "__builtin_ia32_sbb_u64",
34187 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34188 IX86_BUILTIN_SBB64);
34190 /* Read/write FLAGS. */
34191 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
34192 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34193 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
34194 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34195 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
34196 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
34197 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
34198 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
34200 /* CLFLUSHOPT. */
34201 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
34202 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
34204 /* CLWB. */
34205 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
34206 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
34208 /* MONITORX and MWAITX. */
34209 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx",
34210 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX);
34211 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx",
34212 VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
34214 /* Add FMA4 multi-arg argument instructions */
34215 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
34217 if (d->name == 0)
34218 continue;
34220 ftype = (enum ix86_builtin_func_type) d->flag;
34221 def_builtin_const (d->mask, d->name, ftype, d->code);
34225 static void
34226 ix86_init_mpx_builtins ()
34228 const struct builtin_description * d;
34229 enum ix86_builtin_func_type ftype;
34230 tree decl;
34231 size_t i;
34233 for (i = 0, d = bdesc_mpx;
34234 i < ARRAY_SIZE (bdesc_mpx);
34235 i++, d++)
34237 if (d->name == 0)
34238 continue;
34240 ftype = (enum ix86_builtin_func_type) d->flag;
34241 decl = def_builtin (d->mask, d->name, ftype, d->code);
34243 /* With no leaf and nothrow flags for MPX builtins
34244 abnormal edges may follow its call when setjmp
34245 presents in the function. Since we may have a lot
34246 of MPX builtins calls it causes lots of useless
34247 edges and enormous PHI nodes. To avoid this we mark
34248 MPX builtins as leaf and nothrow. */
34249 if (decl)
34251 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34252 NULL_TREE);
34253 TREE_NOTHROW (decl) = 1;
34255 else
34257 ix86_builtins_isa[(int)d->code].leaf_p = true;
34258 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34262 for (i = 0, d = bdesc_mpx_const;
34263 i < ARRAY_SIZE (bdesc_mpx_const);
34264 i++, d++)
34266 if (d->name == 0)
34267 continue;
34269 ftype = (enum ix86_builtin_func_type) d->flag;
34270 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
34272 if (decl)
34274 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34275 NULL_TREE);
34276 TREE_NOTHROW (decl) = 1;
34278 else
34280 ix86_builtins_isa[(int)d->code].leaf_p = true;
34281 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34286 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
34287 to return a pointer to VERSION_DECL if the outcome of the expression
34288 formed by PREDICATE_CHAIN is true. This function will be called during
34289 version dispatch to decide which function version to execute. It returns
34290 the basic block at the end, to which more conditions can be added. */
34292 static basic_block
34293 add_condition_to_bb (tree function_decl, tree version_decl,
34294 tree predicate_chain, basic_block new_bb)
34296 gimple return_stmt;
34297 tree convert_expr, result_var;
34298 gimple convert_stmt;
34299 gimple call_cond_stmt;
34300 gimple if_else_stmt;
34302 basic_block bb1, bb2, bb3;
34303 edge e12, e23;
34305 tree cond_var, and_expr_var = NULL_TREE;
34306 gimple_seq gseq;
34308 tree predicate_decl, predicate_arg;
34310 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
34312 gcc_assert (new_bb != NULL);
34313 gseq = bb_seq (new_bb);
34316 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
34317 build_fold_addr_expr (version_decl));
34318 result_var = create_tmp_var (ptr_type_node);
34319 convert_stmt = gimple_build_assign (result_var, convert_expr);
34320 return_stmt = gimple_build_return (result_var);
34322 if (predicate_chain == NULL_TREE)
34324 gimple_seq_add_stmt (&gseq, convert_stmt);
34325 gimple_seq_add_stmt (&gseq, return_stmt);
34326 set_bb_seq (new_bb, gseq);
34327 gimple_set_bb (convert_stmt, new_bb);
34328 gimple_set_bb (return_stmt, new_bb);
34329 pop_cfun ();
34330 return new_bb;
34333 while (predicate_chain != NULL)
34335 cond_var = create_tmp_var (integer_type_node);
34336 predicate_decl = TREE_PURPOSE (predicate_chain);
34337 predicate_arg = TREE_VALUE (predicate_chain);
34338 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
34339 gimple_call_set_lhs (call_cond_stmt, cond_var);
34341 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
34342 gimple_set_bb (call_cond_stmt, new_bb);
34343 gimple_seq_add_stmt (&gseq, call_cond_stmt);
34345 predicate_chain = TREE_CHAIN (predicate_chain);
34347 if (and_expr_var == NULL)
34348 and_expr_var = cond_var;
34349 else
34351 gimple assign_stmt;
34352 /* Use MIN_EXPR to check if any integer is zero?.
34353 and_expr_var = min_expr <cond_var, and_expr_var> */
34354 assign_stmt = gimple_build_assign (and_expr_var,
34355 build2 (MIN_EXPR, integer_type_node,
34356 cond_var, and_expr_var));
34358 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
34359 gimple_set_bb (assign_stmt, new_bb);
34360 gimple_seq_add_stmt (&gseq, assign_stmt);
34364 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
34365 integer_zero_node,
34366 NULL_TREE, NULL_TREE);
34367 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
34368 gimple_set_bb (if_else_stmt, new_bb);
34369 gimple_seq_add_stmt (&gseq, if_else_stmt);
34371 gimple_seq_add_stmt (&gseq, convert_stmt);
34372 gimple_seq_add_stmt (&gseq, return_stmt);
34373 set_bb_seq (new_bb, gseq);
34375 bb1 = new_bb;
34376 e12 = split_block (bb1, if_else_stmt);
34377 bb2 = e12->dest;
34378 e12->flags &= ~EDGE_FALLTHRU;
34379 e12->flags |= EDGE_TRUE_VALUE;
34381 e23 = split_block (bb2, return_stmt);
34383 gimple_set_bb (convert_stmt, bb2);
34384 gimple_set_bb (return_stmt, bb2);
34386 bb3 = e23->dest;
34387 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
34389 remove_edge (e23);
34390 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
34392 pop_cfun ();
34394 return bb3;
34397 /* This parses the attribute arguments to target in DECL and determines
34398 the right builtin to use to match the platform specification.
34399 It returns the priority value for this version decl. If PREDICATE_LIST
34400 is not NULL, it stores the list of cpu features that need to be checked
34401 before dispatching this function. */
34403 static unsigned int
34404 get_builtin_code_for_version (tree decl, tree *predicate_list)
34406 tree attrs;
34407 struct cl_target_option cur_target;
34408 tree target_node;
34409 struct cl_target_option *new_target;
34410 const char *arg_str = NULL;
34411 const char *attrs_str = NULL;
34412 char *tok_str = NULL;
34413 char *token;
34415 /* Priority of i386 features, greater value is higher priority. This is
34416 used to decide the order in which function dispatch must happen. For
34417 instance, a version specialized for SSE4.2 should be checked for dispatch
34418 before a version for SSE3, as SSE4.2 implies SSE3. */
34419 enum feature_priority
34421 P_ZERO = 0,
34422 P_MMX,
34423 P_SSE,
34424 P_SSE2,
34425 P_SSE3,
34426 P_SSSE3,
34427 P_PROC_SSSE3,
34428 P_SSE4_A,
34429 P_PROC_SSE4_A,
34430 P_SSE4_1,
34431 P_SSE4_2,
34432 P_PROC_SSE4_2,
34433 P_POPCNT,
34434 P_AVX,
34435 P_PROC_AVX,
34436 P_BMI,
34437 P_PROC_BMI,
34438 P_FMA4,
34439 P_XOP,
34440 P_PROC_XOP,
34441 P_FMA,
34442 P_PROC_FMA,
34443 P_BMI2,
34444 P_AVX2,
34445 P_PROC_AVX2,
34446 P_AVX512F,
34447 P_PROC_AVX512F
34450 enum feature_priority priority = P_ZERO;
34452 /* These are the target attribute strings for which a dispatcher is
34453 available, from fold_builtin_cpu. */
34455 static struct _feature_list
34457 const char *const name;
34458 const enum feature_priority priority;
34460 const feature_list[] =
34462 {"mmx", P_MMX},
34463 {"sse", P_SSE},
34464 {"sse2", P_SSE2},
34465 {"sse3", P_SSE3},
34466 {"sse4a", P_SSE4_A},
34467 {"ssse3", P_SSSE3},
34468 {"sse4.1", P_SSE4_1},
34469 {"sse4.2", P_SSE4_2},
34470 {"popcnt", P_POPCNT},
34471 {"avx", P_AVX},
34472 {"bmi", P_BMI},
34473 {"fma4", P_FMA4},
34474 {"xop", P_XOP},
34475 {"fma", P_FMA},
34476 {"bmi2", P_BMI2},
34477 {"avx2", P_AVX2},
34478 {"avx512f", P_AVX512F}
34482 static unsigned int NUM_FEATURES
34483 = sizeof (feature_list) / sizeof (struct _feature_list);
34485 unsigned int i;
34487 tree predicate_chain = NULL_TREE;
34488 tree predicate_decl, predicate_arg;
34490 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34491 gcc_assert (attrs != NULL);
34493 attrs = TREE_VALUE (TREE_VALUE (attrs));
34495 gcc_assert (TREE_CODE (attrs) == STRING_CST);
34496 attrs_str = TREE_STRING_POINTER (attrs);
34498 /* Return priority zero for default function. */
34499 if (strcmp (attrs_str, "default") == 0)
34500 return 0;
34502 /* Handle arch= if specified. For priority, set it to be 1 more than
34503 the best instruction set the processor can handle. For instance, if
34504 there is a version for atom and a version for ssse3 (the highest ISA
34505 priority for atom), the atom version must be checked for dispatch
34506 before the ssse3 version. */
34507 if (strstr (attrs_str, "arch=") != NULL)
34509 cl_target_option_save (&cur_target, &global_options);
34510 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
34511 &global_options_set);
34513 gcc_assert (target_node);
34514 new_target = TREE_TARGET_OPTION (target_node);
34515 gcc_assert (new_target);
34517 if (new_target->arch_specified && new_target->arch > 0)
34519 switch (new_target->arch)
34521 case PROCESSOR_CORE2:
34522 arg_str = "core2";
34523 priority = P_PROC_SSSE3;
34524 break;
34525 case PROCESSOR_NEHALEM:
34526 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
34527 arg_str = "westmere";
34528 else
34529 /* We translate "arch=corei7" and "arch=nehalem" to
34530 "corei7" so that it will be mapped to M_INTEL_COREI7
34531 as cpu type to cover all M_INTEL_COREI7_XXXs. */
34532 arg_str = "corei7";
34533 priority = P_PROC_SSE4_2;
34534 break;
34535 case PROCESSOR_SANDYBRIDGE:
34536 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
34537 arg_str = "ivybridge";
34538 else
34539 arg_str = "sandybridge";
34540 priority = P_PROC_AVX;
34541 break;
34542 case PROCESSOR_HASWELL:
34543 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
34544 arg_str = "broadwell";
34545 else
34546 arg_str = "haswell";
34547 priority = P_PROC_AVX2;
34548 break;
34549 case PROCESSOR_BONNELL:
34550 arg_str = "bonnell";
34551 priority = P_PROC_SSSE3;
34552 break;
34553 case PROCESSOR_KNL:
34554 arg_str = "knl";
34555 priority = P_PROC_AVX512F;
34556 break;
34557 case PROCESSOR_SILVERMONT:
34558 arg_str = "silvermont";
34559 priority = P_PROC_SSE4_2;
34560 break;
34561 case PROCESSOR_AMDFAM10:
34562 arg_str = "amdfam10h";
34563 priority = P_PROC_SSE4_A;
34564 break;
34565 case PROCESSOR_BTVER1:
34566 arg_str = "btver1";
34567 priority = P_PROC_SSE4_A;
34568 break;
34569 case PROCESSOR_BTVER2:
34570 arg_str = "btver2";
34571 priority = P_PROC_BMI;
34572 break;
34573 case PROCESSOR_BDVER1:
34574 arg_str = "bdver1";
34575 priority = P_PROC_XOP;
34576 break;
34577 case PROCESSOR_BDVER2:
34578 arg_str = "bdver2";
34579 priority = P_PROC_FMA;
34580 break;
34581 case PROCESSOR_BDVER3:
34582 arg_str = "bdver3";
34583 priority = P_PROC_FMA;
34584 break;
34585 case PROCESSOR_BDVER4:
34586 arg_str = "bdver4";
34587 priority = P_PROC_AVX2;
34588 break;
34592 cl_target_option_restore (&global_options, &cur_target);
34594 if (predicate_list && arg_str == NULL)
34596 error_at (DECL_SOURCE_LOCATION (decl),
34597 "No dispatcher found for the versioning attributes");
34598 return 0;
34601 if (predicate_list)
34603 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
34604 /* For a C string literal the length includes the trailing NULL. */
34605 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
34606 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34607 predicate_chain);
34611 /* Process feature name. */
34612 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
34613 strcpy (tok_str, attrs_str);
34614 token = strtok (tok_str, ",");
34615 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
34617 while (token != NULL)
34619 /* Do not process "arch=" */
34620 if (strncmp (token, "arch=", 5) == 0)
34622 token = strtok (NULL, ",");
34623 continue;
34625 for (i = 0; i < NUM_FEATURES; ++i)
34627 if (strcmp (token, feature_list[i].name) == 0)
34629 if (predicate_list)
34631 predicate_arg = build_string_literal (
34632 strlen (feature_list[i].name) + 1,
34633 feature_list[i].name);
34634 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34635 predicate_chain);
34637 /* Find the maximum priority feature. */
34638 if (feature_list[i].priority > priority)
34639 priority = feature_list[i].priority;
34641 break;
34644 if (predicate_list && i == NUM_FEATURES)
34646 error_at (DECL_SOURCE_LOCATION (decl),
34647 "No dispatcher found for %s", token);
34648 return 0;
34650 token = strtok (NULL, ",");
34652 free (tok_str);
34654 if (predicate_list && predicate_chain == NULL_TREE)
34656 error_at (DECL_SOURCE_LOCATION (decl),
34657 "No dispatcher found for the versioning attributes : %s",
34658 attrs_str);
34659 return 0;
34661 else if (predicate_list)
34663 predicate_chain = nreverse (predicate_chain);
34664 *predicate_list = predicate_chain;
34667 return priority;
34670 /* This compares the priority of target features in function DECL1
34671 and DECL2. It returns positive value if DECL1 is higher priority,
34672 negative value if DECL2 is higher priority and 0 if they are the
34673 same. */
34675 static int
34676 ix86_compare_version_priority (tree decl1, tree decl2)
34678 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
34679 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
34681 return (int)priority1 - (int)priority2;
34684 /* V1 and V2 point to function versions with different priorities
34685 based on the target ISA. This function compares their priorities. */
34687 static int
34688 feature_compare (const void *v1, const void *v2)
34690 typedef struct _function_version_info
34692 tree version_decl;
34693 tree predicate_chain;
34694 unsigned int dispatch_priority;
34695 } function_version_info;
34697 const function_version_info c1 = *(const function_version_info *)v1;
34698 const function_version_info c2 = *(const function_version_info *)v2;
34699 return (c2.dispatch_priority - c1.dispatch_priority);
34702 /* This function generates the dispatch function for
34703 multi-versioned functions. DISPATCH_DECL is the function which will
34704 contain the dispatch logic. FNDECLS are the function choices for
34705 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
34706 in DISPATCH_DECL in which the dispatch code is generated. */
34708 static int
34709 dispatch_function_versions (tree dispatch_decl,
34710 void *fndecls_p,
34711 basic_block *empty_bb)
34713 tree default_decl;
34714 gimple ifunc_cpu_init_stmt;
34715 gimple_seq gseq;
34716 int ix;
34717 tree ele;
34718 vec<tree> *fndecls;
34719 unsigned int num_versions = 0;
34720 unsigned int actual_versions = 0;
34721 unsigned int i;
34723 struct _function_version_info
34725 tree version_decl;
34726 tree predicate_chain;
34727 unsigned int dispatch_priority;
34728 }*function_version_info;
34730 gcc_assert (dispatch_decl != NULL
34731 && fndecls_p != NULL
34732 && empty_bb != NULL);
34734 /*fndecls_p is actually a vector. */
34735 fndecls = static_cast<vec<tree> *> (fndecls_p);
34737 /* At least one more version other than the default. */
34738 num_versions = fndecls->length ();
34739 gcc_assert (num_versions >= 2);
34741 function_version_info = (struct _function_version_info *)
34742 XNEWVEC (struct _function_version_info, (num_versions - 1));
34744 /* The first version in the vector is the default decl. */
34745 default_decl = (*fndecls)[0];
34747 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
34749 gseq = bb_seq (*empty_bb);
34750 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
34751 constructors, so explicity call __builtin_cpu_init here. */
34752 ifunc_cpu_init_stmt = gimple_build_call_vec (
34753 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
34754 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
34755 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
34756 set_bb_seq (*empty_bb, gseq);
34758 pop_cfun ();
34761 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
34763 tree version_decl = ele;
34764 tree predicate_chain = NULL_TREE;
34765 unsigned int priority;
34766 /* Get attribute string, parse it and find the right predicate decl.
34767 The predicate function could be a lengthy combination of many
34768 features, like arch-type and various isa-variants. */
34769 priority = get_builtin_code_for_version (version_decl,
34770 &predicate_chain);
34772 if (predicate_chain == NULL_TREE)
34773 continue;
34775 function_version_info [actual_versions].version_decl = version_decl;
34776 function_version_info [actual_versions].predicate_chain
34777 = predicate_chain;
34778 function_version_info [actual_versions].dispatch_priority = priority;
34779 actual_versions++;
34782 /* Sort the versions according to descending order of dispatch priority. The
34783 priority is based on the ISA. This is not a perfect solution. There
34784 could still be ambiguity. If more than one function version is suitable
34785 to execute, which one should be dispatched? In future, allow the user
34786 to specify a dispatch priority next to the version. */
34787 qsort (function_version_info, actual_versions,
34788 sizeof (struct _function_version_info), feature_compare);
34790 for (i = 0; i < actual_versions; ++i)
34791 *empty_bb = add_condition_to_bb (dispatch_decl,
34792 function_version_info[i].version_decl,
34793 function_version_info[i].predicate_chain,
34794 *empty_bb);
34796 /* dispatch default version at the end. */
34797 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
34798 NULL, *empty_bb);
34800 free (function_version_info);
34801 return 0;
34804 /* Comparator function to be used in qsort routine to sort attribute
34805 specification strings to "target". */
34807 static int
34808 attr_strcmp (const void *v1, const void *v2)
34810 const char *c1 = *(char *const*)v1;
34811 const char *c2 = *(char *const*)v2;
34812 return strcmp (c1, c2);
34815 /* ARGLIST is the argument to target attribute. This function tokenizes
34816 the comma separated arguments, sorts them and returns a string which
34817 is a unique identifier for the comma separated arguments. It also
34818 replaces non-identifier characters "=,-" with "_". */
34820 static char *
34821 sorted_attr_string (tree arglist)
34823 tree arg;
34824 size_t str_len_sum = 0;
34825 char **args = NULL;
34826 char *attr_str, *ret_str;
34827 char *attr = NULL;
34828 unsigned int argnum = 1;
34829 unsigned int i;
34831 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34833 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34834 size_t len = strlen (str);
34835 str_len_sum += len + 1;
34836 if (arg != arglist)
34837 argnum++;
34838 for (i = 0; i < strlen (str); i++)
34839 if (str[i] == ',')
34840 argnum++;
34843 attr_str = XNEWVEC (char, str_len_sum);
34844 str_len_sum = 0;
34845 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34847 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34848 size_t len = strlen (str);
34849 memcpy (attr_str + str_len_sum, str, len);
34850 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
34851 str_len_sum += len + 1;
34854 /* Replace "=,-" with "_". */
34855 for (i = 0; i < strlen (attr_str); i++)
34856 if (attr_str[i] == '=' || attr_str[i]== '-')
34857 attr_str[i] = '_';
34859 if (argnum == 1)
34860 return attr_str;
34862 args = XNEWVEC (char *, argnum);
34864 i = 0;
34865 attr = strtok (attr_str, ",");
34866 while (attr != NULL)
34868 args[i] = attr;
34869 i++;
34870 attr = strtok (NULL, ",");
34873 qsort (args, argnum, sizeof (char *), attr_strcmp);
34875 ret_str = XNEWVEC (char, str_len_sum);
34876 str_len_sum = 0;
34877 for (i = 0; i < argnum; i++)
34879 size_t len = strlen (args[i]);
34880 memcpy (ret_str + str_len_sum, args[i], len);
34881 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
34882 str_len_sum += len + 1;
34885 XDELETEVEC (args);
34886 XDELETEVEC (attr_str);
34887 return ret_str;
34890 /* This function changes the assembler name for functions that are
34891 versions. If DECL is a function version and has a "target"
34892 attribute, it appends the attribute string to its assembler name. */
34894 static tree
34895 ix86_mangle_function_version_assembler_name (tree decl, tree id)
34897 tree version_attr;
34898 const char *orig_name, *version_string;
34899 char *attr_str, *assembler_name;
34901 if (DECL_DECLARED_INLINE_P (decl)
34902 && lookup_attribute ("gnu_inline",
34903 DECL_ATTRIBUTES (decl)))
34904 error_at (DECL_SOURCE_LOCATION (decl),
34905 "Function versions cannot be marked as gnu_inline,"
34906 " bodies have to be generated");
34908 if (DECL_VIRTUAL_P (decl)
34909 || DECL_VINDEX (decl))
34910 sorry ("Virtual function multiversioning not supported");
34912 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34914 /* target attribute string cannot be NULL. */
34915 gcc_assert (version_attr != NULL_TREE);
34917 orig_name = IDENTIFIER_POINTER (id);
34918 version_string
34919 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
34921 if (strcmp (version_string, "default") == 0)
34922 return id;
34924 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
34925 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
34927 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
34929 /* Allow assembler name to be modified if already set. */
34930 if (DECL_ASSEMBLER_NAME_SET_P (decl))
34931 SET_DECL_RTL (decl, NULL);
34933 tree ret = get_identifier (assembler_name);
34934 XDELETEVEC (attr_str);
34935 XDELETEVEC (assembler_name);
34936 return ret;
34939 /* This function returns true if FN1 and FN2 are versions of the same function,
34940 that is, the target strings of the function decls are different. This assumes
34941 that FN1 and FN2 have the same signature. */
34943 static bool
34944 ix86_function_versions (tree fn1, tree fn2)
34946 tree attr1, attr2;
34947 char *target1, *target2;
34948 bool result;
34950 if (TREE_CODE (fn1) != FUNCTION_DECL
34951 || TREE_CODE (fn2) != FUNCTION_DECL)
34952 return false;
34954 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
34955 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
34957 /* At least one function decl should have the target attribute specified. */
34958 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
34959 return false;
34961 /* Diagnose missing target attribute if one of the decls is already
34962 multi-versioned. */
34963 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
34965 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
34967 if (attr2 != NULL_TREE)
34969 std::swap (fn1, fn2);
34970 attr1 = attr2;
34972 error_at (DECL_SOURCE_LOCATION (fn2),
34973 "missing %<target%> attribute for multi-versioned %D",
34974 fn2);
34975 inform (DECL_SOURCE_LOCATION (fn1),
34976 "previous declaration of %D", fn1);
34977 /* Prevent diagnosing of the same error multiple times. */
34978 DECL_ATTRIBUTES (fn2)
34979 = tree_cons (get_identifier ("target"),
34980 copy_node (TREE_VALUE (attr1)),
34981 DECL_ATTRIBUTES (fn2));
34983 return false;
34986 target1 = sorted_attr_string (TREE_VALUE (attr1));
34987 target2 = sorted_attr_string (TREE_VALUE (attr2));
34989 /* The sorted target strings must be different for fn1 and fn2
34990 to be versions. */
34991 if (strcmp (target1, target2) == 0)
34992 result = false;
34993 else
34994 result = true;
34996 XDELETEVEC (target1);
34997 XDELETEVEC (target2);
34999 return result;
35002 static tree
35003 ix86_mangle_decl_assembler_name (tree decl, tree id)
35005 /* For function version, add the target suffix to the assembler name. */
35006 if (TREE_CODE (decl) == FUNCTION_DECL
35007 && DECL_FUNCTION_VERSIONED (decl))
35008 id = ix86_mangle_function_version_assembler_name (decl, id);
35009 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
35010 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
35011 #endif
35013 return id;
35016 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
35017 is true, append the full path name of the source file. */
35019 static char *
35020 make_name (tree decl, const char *suffix, bool make_unique)
35022 char *global_var_name;
35023 int name_len;
35024 const char *name;
35025 const char *unique_name = NULL;
35027 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
35029 /* Get a unique name that can be used globally without any chances
35030 of collision at link time. */
35031 if (make_unique)
35032 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
35034 name_len = strlen (name) + strlen (suffix) + 2;
35036 if (make_unique)
35037 name_len += strlen (unique_name) + 1;
35038 global_var_name = XNEWVEC (char, name_len);
35040 /* Use '.' to concatenate names as it is demangler friendly. */
35041 if (make_unique)
35042 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
35043 suffix);
35044 else
35045 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
35047 return global_var_name;
35050 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35052 /* Make a dispatcher declaration for the multi-versioned function DECL.
35053 Calls to DECL function will be replaced with calls to the dispatcher
35054 by the front-end. Return the decl created. */
35056 static tree
35057 make_dispatcher_decl (const tree decl)
35059 tree func_decl;
35060 char *func_name;
35061 tree fn_type, func_type;
35062 bool is_uniq = false;
35064 if (TREE_PUBLIC (decl) == 0)
35065 is_uniq = true;
35067 func_name = make_name (decl, "ifunc", is_uniq);
35069 fn_type = TREE_TYPE (decl);
35070 func_type = build_function_type (TREE_TYPE (fn_type),
35071 TYPE_ARG_TYPES (fn_type));
35073 func_decl = build_fn_decl (func_name, func_type);
35074 XDELETEVEC (func_name);
35075 TREE_USED (func_decl) = 1;
35076 DECL_CONTEXT (func_decl) = NULL_TREE;
35077 DECL_INITIAL (func_decl) = error_mark_node;
35078 DECL_ARTIFICIAL (func_decl) = 1;
35079 /* Mark this func as external, the resolver will flip it again if
35080 it gets generated. */
35081 DECL_EXTERNAL (func_decl) = 1;
35082 /* This will be of type IFUNCs have to be externally visible. */
35083 TREE_PUBLIC (func_decl) = 1;
35085 return func_decl;
35088 #endif
35090 /* Returns true if decl is multi-versioned and DECL is the default function,
35091 that is it is not tagged with target specific optimization. */
35093 static bool
35094 is_function_default_version (const tree decl)
35096 if (TREE_CODE (decl) != FUNCTION_DECL
35097 || !DECL_FUNCTION_VERSIONED (decl))
35098 return false;
35099 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
35100 gcc_assert (attr);
35101 attr = TREE_VALUE (TREE_VALUE (attr));
35102 return (TREE_CODE (attr) == STRING_CST
35103 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
35106 /* Make a dispatcher declaration for the multi-versioned function DECL.
35107 Calls to DECL function will be replaced with calls to the dispatcher
35108 by the front-end. Returns the decl of the dispatcher function. */
35110 static tree
35111 ix86_get_function_versions_dispatcher (void *decl)
35113 tree fn = (tree) decl;
35114 struct cgraph_node *node = NULL;
35115 struct cgraph_node *default_node = NULL;
35116 struct cgraph_function_version_info *node_v = NULL;
35117 struct cgraph_function_version_info *first_v = NULL;
35119 tree dispatch_decl = NULL;
35121 struct cgraph_function_version_info *default_version_info = NULL;
35123 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
35125 node = cgraph_node::get (fn);
35126 gcc_assert (node != NULL);
35128 node_v = node->function_version ();
35129 gcc_assert (node_v != NULL);
35131 if (node_v->dispatcher_resolver != NULL)
35132 return node_v->dispatcher_resolver;
35134 /* Find the default version and make it the first node. */
35135 first_v = node_v;
35136 /* Go to the beginning of the chain. */
35137 while (first_v->prev != NULL)
35138 first_v = first_v->prev;
35139 default_version_info = first_v;
35140 while (default_version_info != NULL)
35142 if (is_function_default_version
35143 (default_version_info->this_node->decl))
35144 break;
35145 default_version_info = default_version_info->next;
35148 /* If there is no default node, just return NULL. */
35149 if (default_version_info == NULL)
35150 return NULL;
35152 /* Make default info the first node. */
35153 if (first_v != default_version_info)
35155 default_version_info->prev->next = default_version_info->next;
35156 if (default_version_info->next)
35157 default_version_info->next->prev = default_version_info->prev;
35158 first_v->prev = default_version_info;
35159 default_version_info->next = first_v;
35160 default_version_info->prev = NULL;
35163 default_node = default_version_info->this_node;
35165 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35166 if (targetm.has_ifunc_p ())
35168 struct cgraph_function_version_info *it_v = NULL;
35169 struct cgraph_node *dispatcher_node = NULL;
35170 struct cgraph_function_version_info *dispatcher_version_info = NULL;
35172 /* Right now, the dispatching is done via ifunc. */
35173 dispatch_decl = make_dispatcher_decl (default_node->decl);
35175 dispatcher_node = cgraph_node::get_create (dispatch_decl);
35176 gcc_assert (dispatcher_node != NULL);
35177 dispatcher_node->dispatcher_function = 1;
35178 dispatcher_version_info
35179 = dispatcher_node->insert_new_function_version ();
35180 dispatcher_version_info->next = default_version_info;
35181 dispatcher_node->definition = 1;
35183 /* Set the dispatcher for all the versions. */
35184 it_v = default_version_info;
35185 while (it_v != NULL)
35187 it_v->dispatcher_resolver = dispatch_decl;
35188 it_v = it_v->next;
35191 else
35192 #endif
35194 error_at (DECL_SOURCE_LOCATION (default_node->decl),
35195 "multiversioning needs ifunc which is not supported "
35196 "on this target");
35199 return dispatch_decl;
35202 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
35203 it to CHAIN. */
35205 static tree
35206 make_attribute (const char *name, const char *arg_name, tree chain)
35208 tree attr_name;
35209 tree attr_arg_name;
35210 tree attr_args;
35211 tree attr;
35213 attr_name = get_identifier (name);
35214 attr_arg_name = build_string (strlen (arg_name), arg_name);
35215 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
35216 attr = tree_cons (attr_name, attr_args, chain);
35217 return attr;
35220 /* Make the resolver function decl to dispatch the versions of
35221 a multi-versioned function, DEFAULT_DECL. Create an
35222 empty basic block in the resolver and store the pointer in
35223 EMPTY_BB. Return the decl of the resolver function. */
35225 static tree
35226 make_resolver_func (const tree default_decl,
35227 const tree dispatch_decl,
35228 basic_block *empty_bb)
35230 char *resolver_name;
35231 tree decl, type, decl_name, t;
35232 bool is_uniq = false;
35234 /* IFUNC's have to be globally visible. So, if the default_decl is
35235 not, then the name of the IFUNC should be made unique. */
35236 if (TREE_PUBLIC (default_decl) == 0)
35237 is_uniq = true;
35239 /* Append the filename to the resolver function if the versions are
35240 not externally visible. This is because the resolver function has
35241 to be externally visible for the loader to find it. So, appending
35242 the filename will prevent conflicts with a resolver function from
35243 another module which is based on the same version name. */
35244 resolver_name = make_name (default_decl, "resolver", is_uniq);
35246 /* The resolver function should return a (void *). */
35247 type = build_function_type_list (ptr_type_node, NULL_TREE);
35249 decl = build_fn_decl (resolver_name, type);
35250 decl_name = get_identifier (resolver_name);
35251 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
35253 DECL_NAME (decl) = decl_name;
35254 TREE_USED (decl) = 1;
35255 DECL_ARTIFICIAL (decl) = 1;
35256 DECL_IGNORED_P (decl) = 0;
35257 /* IFUNC resolvers have to be externally visible. */
35258 TREE_PUBLIC (decl) = 1;
35259 DECL_UNINLINABLE (decl) = 1;
35261 /* Resolver is not external, body is generated. */
35262 DECL_EXTERNAL (decl) = 0;
35263 DECL_EXTERNAL (dispatch_decl) = 0;
35265 DECL_CONTEXT (decl) = NULL_TREE;
35266 DECL_INITIAL (decl) = make_node (BLOCK);
35267 DECL_STATIC_CONSTRUCTOR (decl) = 0;
35269 if (DECL_COMDAT_GROUP (default_decl)
35270 || TREE_PUBLIC (default_decl))
35272 /* In this case, each translation unit with a call to this
35273 versioned function will put out a resolver. Ensure it
35274 is comdat to keep just one copy. */
35275 DECL_COMDAT (decl) = 1;
35276 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
35278 /* Build result decl and add to function_decl. */
35279 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
35280 DECL_ARTIFICIAL (t) = 1;
35281 DECL_IGNORED_P (t) = 1;
35282 DECL_RESULT (decl) = t;
35284 gimplify_function_tree (decl);
35285 push_cfun (DECL_STRUCT_FUNCTION (decl));
35286 *empty_bb = init_lowered_empty_function (decl, false, 0);
35288 cgraph_node::add_new_function (decl, true);
35289 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
35291 pop_cfun ();
35293 gcc_assert (dispatch_decl != NULL);
35294 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
35295 DECL_ATTRIBUTES (dispatch_decl)
35296 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
35298 /* Create the alias for dispatch to resolver here. */
35299 /*cgraph_create_function_alias (dispatch_decl, decl);*/
35300 cgraph_node::create_same_body_alias (dispatch_decl, decl);
35301 XDELETEVEC (resolver_name);
35302 return decl;
35305 /* Generate the dispatching code body to dispatch multi-versioned function
35306 DECL. The target hook is called to process the "target" attributes and
35307 provide the code to dispatch the right function at run-time. NODE points
35308 to the dispatcher decl whose body will be created. */
35310 static tree
35311 ix86_generate_version_dispatcher_body (void *node_p)
35313 tree resolver_decl;
35314 basic_block empty_bb;
35315 tree default_ver_decl;
35316 struct cgraph_node *versn;
35317 struct cgraph_node *node;
35319 struct cgraph_function_version_info *node_version_info = NULL;
35320 struct cgraph_function_version_info *versn_info = NULL;
35322 node = (cgraph_node *)node_p;
35324 node_version_info = node->function_version ();
35325 gcc_assert (node->dispatcher_function
35326 && node_version_info != NULL);
35328 if (node_version_info->dispatcher_resolver)
35329 return node_version_info->dispatcher_resolver;
35331 /* The first version in the chain corresponds to the default version. */
35332 default_ver_decl = node_version_info->next->this_node->decl;
35334 /* node is going to be an alias, so remove the finalized bit. */
35335 node->definition = false;
35337 resolver_decl = make_resolver_func (default_ver_decl,
35338 node->decl, &empty_bb);
35340 node_version_info->dispatcher_resolver = resolver_decl;
35342 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
35344 auto_vec<tree, 2> fn_ver_vec;
35346 for (versn_info = node_version_info->next; versn_info;
35347 versn_info = versn_info->next)
35349 versn = versn_info->this_node;
35350 /* Check for virtual functions here again, as by this time it should
35351 have been determined if this function needs a vtable index or
35352 not. This happens for methods in derived classes that override
35353 virtual methods in base classes but are not explicitly marked as
35354 virtual. */
35355 if (DECL_VINDEX (versn->decl))
35356 sorry ("Virtual function multiversioning not supported");
35358 fn_ver_vec.safe_push (versn->decl);
35361 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
35362 cgraph_edge::rebuild_edges ();
35363 pop_cfun ();
35364 return resolver_decl;
35366 /* This builds the processor_model struct type defined in
35367 libgcc/config/i386/cpuinfo.c */
35369 static tree
35370 build_processor_model_struct (void)
35372 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
35373 "__cpu_features"};
35374 tree field = NULL_TREE, field_chain = NULL_TREE;
35375 int i;
35376 tree type = make_node (RECORD_TYPE);
35378 /* The first 3 fields are unsigned int. */
35379 for (i = 0; i < 3; ++i)
35381 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35382 get_identifier (field_name[i]), unsigned_type_node);
35383 if (field_chain != NULL_TREE)
35384 DECL_CHAIN (field) = field_chain;
35385 field_chain = field;
35388 /* The last field is an array of unsigned integers of size one. */
35389 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35390 get_identifier (field_name[3]),
35391 build_array_type (unsigned_type_node,
35392 build_index_type (size_one_node)));
35393 if (field_chain != NULL_TREE)
35394 DECL_CHAIN (field) = field_chain;
35395 field_chain = field;
35397 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
35398 return type;
35401 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
35403 static tree
35404 make_var_decl (tree type, const char *name)
35406 tree new_decl;
35408 new_decl = build_decl (UNKNOWN_LOCATION,
35409 VAR_DECL,
35410 get_identifier(name),
35411 type);
35413 DECL_EXTERNAL (new_decl) = 1;
35414 TREE_STATIC (new_decl) = 1;
35415 TREE_PUBLIC (new_decl) = 1;
35416 DECL_INITIAL (new_decl) = 0;
35417 DECL_ARTIFICIAL (new_decl) = 0;
35418 DECL_PRESERVE_P (new_decl) = 1;
35420 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
35421 assemble_variable (new_decl, 0, 0, 0);
35423 return new_decl;
35426 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
35427 into an integer defined in libgcc/config/i386/cpuinfo.c */
35429 static tree
35430 fold_builtin_cpu (tree fndecl, tree *args)
35432 unsigned int i;
35433 enum ix86_builtins fn_code = (enum ix86_builtins)
35434 DECL_FUNCTION_CODE (fndecl);
35435 tree param_string_cst = NULL;
35437 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
35438 enum processor_features
35440 F_CMOV = 0,
35441 F_MMX,
35442 F_POPCNT,
35443 F_SSE,
35444 F_SSE2,
35445 F_SSE3,
35446 F_SSSE3,
35447 F_SSE4_1,
35448 F_SSE4_2,
35449 F_AVX,
35450 F_AVX2,
35451 F_SSE4_A,
35452 F_FMA4,
35453 F_XOP,
35454 F_FMA,
35455 F_AVX512F,
35456 F_BMI,
35457 F_BMI2,
35458 F_MAX
35461 /* These are the values for vendor types and cpu types and subtypes
35462 in cpuinfo.c. Cpu types and subtypes should be subtracted by
35463 the corresponding start value. */
35464 enum processor_model
35466 M_INTEL = 1,
35467 M_AMD,
35468 M_CPU_TYPE_START,
35469 M_INTEL_BONNELL,
35470 M_INTEL_CORE2,
35471 M_INTEL_COREI7,
35472 M_AMDFAM10H,
35473 M_AMDFAM15H,
35474 M_INTEL_SILVERMONT,
35475 M_INTEL_KNL,
35476 M_AMD_BTVER1,
35477 M_AMD_BTVER2,
35478 M_CPU_SUBTYPE_START,
35479 M_INTEL_COREI7_NEHALEM,
35480 M_INTEL_COREI7_WESTMERE,
35481 M_INTEL_COREI7_SANDYBRIDGE,
35482 M_AMDFAM10H_BARCELONA,
35483 M_AMDFAM10H_SHANGHAI,
35484 M_AMDFAM10H_ISTANBUL,
35485 M_AMDFAM15H_BDVER1,
35486 M_AMDFAM15H_BDVER2,
35487 M_AMDFAM15H_BDVER3,
35488 M_AMDFAM15H_BDVER4,
35489 M_INTEL_COREI7_IVYBRIDGE,
35490 M_INTEL_COREI7_HASWELL,
35491 M_INTEL_COREI7_BROADWELL
35494 static struct _arch_names_table
35496 const char *const name;
35497 const enum processor_model model;
35499 const arch_names_table[] =
35501 {"amd", M_AMD},
35502 {"intel", M_INTEL},
35503 {"atom", M_INTEL_BONNELL},
35504 {"slm", M_INTEL_SILVERMONT},
35505 {"core2", M_INTEL_CORE2},
35506 {"corei7", M_INTEL_COREI7},
35507 {"nehalem", M_INTEL_COREI7_NEHALEM},
35508 {"westmere", M_INTEL_COREI7_WESTMERE},
35509 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
35510 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
35511 {"haswell", M_INTEL_COREI7_HASWELL},
35512 {"broadwell", M_INTEL_COREI7_BROADWELL},
35513 {"bonnell", M_INTEL_BONNELL},
35514 {"silvermont", M_INTEL_SILVERMONT},
35515 {"knl", M_INTEL_KNL},
35516 {"amdfam10h", M_AMDFAM10H},
35517 {"barcelona", M_AMDFAM10H_BARCELONA},
35518 {"shanghai", M_AMDFAM10H_SHANGHAI},
35519 {"istanbul", M_AMDFAM10H_ISTANBUL},
35520 {"btver1", M_AMD_BTVER1},
35521 {"amdfam15h", M_AMDFAM15H},
35522 {"bdver1", M_AMDFAM15H_BDVER1},
35523 {"bdver2", M_AMDFAM15H_BDVER2},
35524 {"bdver3", M_AMDFAM15H_BDVER3},
35525 {"bdver4", M_AMDFAM15H_BDVER4},
35526 {"btver2", M_AMD_BTVER2},
35529 static struct _isa_names_table
35531 const char *const name;
35532 const enum processor_features feature;
35534 const isa_names_table[] =
35536 {"cmov", F_CMOV},
35537 {"mmx", F_MMX},
35538 {"popcnt", F_POPCNT},
35539 {"sse", F_SSE},
35540 {"sse2", F_SSE2},
35541 {"sse3", F_SSE3},
35542 {"ssse3", F_SSSE3},
35543 {"sse4a", F_SSE4_A},
35544 {"sse4.1", F_SSE4_1},
35545 {"sse4.2", F_SSE4_2},
35546 {"avx", F_AVX},
35547 {"fma4", F_FMA4},
35548 {"xop", F_XOP},
35549 {"fma", F_FMA},
35550 {"avx2", F_AVX2},
35551 {"avx512f",F_AVX512F},
35552 {"bmi", F_BMI},
35553 {"bmi2", F_BMI2}
35556 tree __processor_model_type = build_processor_model_struct ();
35557 tree __cpu_model_var = make_var_decl (__processor_model_type,
35558 "__cpu_model");
35561 varpool_node::add (__cpu_model_var);
35563 gcc_assert ((args != NULL) && (*args != NULL));
35565 param_string_cst = *args;
35566 while (param_string_cst
35567 && TREE_CODE (param_string_cst) != STRING_CST)
35569 /* *args must be a expr that can contain other EXPRS leading to a
35570 STRING_CST. */
35571 if (!EXPR_P (param_string_cst))
35573 error ("Parameter to builtin must be a string constant or literal");
35574 return integer_zero_node;
35576 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
35579 gcc_assert (param_string_cst);
35581 if (fn_code == IX86_BUILTIN_CPU_IS)
35583 tree ref;
35584 tree field;
35585 tree final;
35587 unsigned int field_val = 0;
35588 unsigned int NUM_ARCH_NAMES
35589 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
35591 for (i = 0; i < NUM_ARCH_NAMES; i++)
35592 if (strcmp (arch_names_table[i].name,
35593 TREE_STRING_POINTER (param_string_cst)) == 0)
35594 break;
35596 if (i == NUM_ARCH_NAMES)
35598 error ("Parameter to builtin not valid: %s",
35599 TREE_STRING_POINTER (param_string_cst));
35600 return integer_zero_node;
35603 field = TYPE_FIELDS (__processor_model_type);
35604 field_val = arch_names_table[i].model;
35606 /* CPU types are stored in the next field. */
35607 if (field_val > M_CPU_TYPE_START
35608 && field_val < M_CPU_SUBTYPE_START)
35610 field = DECL_CHAIN (field);
35611 field_val -= M_CPU_TYPE_START;
35614 /* CPU subtypes are stored in the next field. */
35615 if (field_val > M_CPU_SUBTYPE_START)
35617 field = DECL_CHAIN ( DECL_CHAIN (field));
35618 field_val -= M_CPU_SUBTYPE_START;
35621 /* Get the appropriate field in __cpu_model. */
35622 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35623 field, NULL_TREE);
35625 /* Check the value. */
35626 final = build2 (EQ_EXPR, unsigned_type_node, ref,
35627 build_int_cstu (unsigned_type_node, field_val));
35628 return build1 (CONVERT_EXPR, integer_type_node, final);
35630 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35632 tree ref;
35633 tree array_elt;
35634 tree field;
35635 tree final;
35637 unsigned int field_val = 0;
35638 unsigned int NUM_ISA_NAMES
35639 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
35641 for (i = 0; i < NUM_ISA_NAMES; i++)
35642 if (strcmp (isa_names_table[i].name,
35643 TREE_STRING_POINTER (param_string_cst)) == 0)
35644 break;
35646 if (i == NUM_ISA_NAMES)
35648 error ("Parameter to builtin not valid: %s",
35649 TREE_STRING_POINTER (param_string_cst));
35650 return integer_zero_node;
35653 field = TYPE_FIELDS (__processor_model_type);
35654 /* Get the last field, which is __cpu_features. */
35655 while (DECL_CHAIN (field))
35656 field = DECL_CHAIN (field);
35658 /* Get the appropriate field: __cpu_model.__cpu_features */
35659 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35660 field, NULL_TREE);
35662 /* Access the 0th element of __cpu_features array. */
35663 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
35664 integer_zero_node, NULL_TREE, NULL_TREE);
35666 field_val = (1 << isa_names_table[i].feature);
35667 /* Return __cpu_model.__cpu_features[0] & field_val */
35668 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
35669 build_int_cstu (unsigned_type_node, field_val));
35670 return build1 (CONVERT_EXPR, integer_type_node, final);
35672 gcc_unreachable ();
35675 static tree
35676 ix86_fold_builtin (tree fndecl, int n_args,
35677 tree *args, bool ignore ATTRIBUTE_UNUSED)
35679 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35681 enum ix86_builtins fn_code = (enum ix86_builtins)
35682 DECL_FUNCTION_CODE (fndecl);
35683 if (fn_code == IX86_BUILTIN_CPU_IS
35684 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35686 gcc_assert (n_args == 1);
35687 return fold_builtin_cpu (fndecl, args);
35691 #ifdef SUBTARGET_FOLD_BUILTIN
35692 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
35693 #endif
35695 return NULL_TREE;
35698 /* Make builtins to detect cpu type and features supported. NAME is
35699 the builtin name, CODE is the builtin code, and FTYPE is the function
35700 type of the builtin. */
35702 static void
35703 make_cpu_type_builtin (const char* name, int code,
35704 enum ix86_builtin_func_type ftype, bool is_const)
35706 tree decl;
35707 tree type;
35709 type = ix86_get_builtin_func_type (ftype);
35710 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
35711 NULL, NULL_TREE);
35712 gcc_assert (decl != NULL_TREE);
35713 ix86_builtins[(int) code] = decl;
35714 TREE_READONLY (decl) = is_const;
35717 /* Make builtins to get CPU type and features supported. The created
35718 builtins are :
35720 __builtin_cpu_init (), to detect cpu type and features,
35721 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
35722 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
35725 static void
35726 ix86_init_platform_type_builtins (void)
35728 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
35729 INT_FTYPE_VOID, false);
35730 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
35731 INT_FTYPE_PCCHAR, true);
35732 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
35733 INT_FTYPE_PCCHAR, true);
35736 /* Internal method for ix86_init_builtins. */
35738 static void
35739 ix86_init_builtins_va_builtins_abi (void)
35741 tree ms_va_ref, sysv_va_ref;
35742 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
35743 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
35744 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
35745 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
35747 if (!TARGET_64BIT)
35748 return;
35749 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
35750 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
35751 ms_va_ref = build_reference_type (ms_va_list_type_node);
35752 sysv_va_ref =
35753 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
35755 fnvoid_va_end_ms =
35756 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35757 fnvoid_va_start_ms =
35758 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35759 fnvoid_va_end_sysv =
35760 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
35761 fnvoid_va_start_sysv =
35762 build_varargs_function_type_list (void_type_node, sysv_va_ref,
35763 NULL_TREE);
35764 fnvoid_va_copy_ms =
35765 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
35766 NULL_TREE);
35767 fnvoid_va_copy_sysv =
35768 build_function_type_list (void_type_node, sysv_va_ref,
35769 sysv_va_ref, NULL_TREE);
35771 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
35772 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
35773 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
35774 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
35775 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
35776 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
35777 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
35778 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35779 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
35780 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35781 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
35782 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35785 static void
35786 ix86_init_builtin_types (void)
35788 tree float128_type_node, float80_type_node;
35790 /* The __float80 type. */
35791 float80_type_node = long_double_type_node;
35792 if (TYPE_MODE (float80_type_node) != XFmode)
35794 /* The __float80 type. */
35795 float80_type_node = make_node (REAL_TYPE);
35797 TYPE_PRECISION (float80_type_node) = 80;
35798 layout_type (float80_type_node);
35800 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
35802 /* The __float128 type. */
35803 float128_type_node = make_node (REAL_TYPE);
35804 TYPE_PRECISION (float128_type_node) = 128;
35805 layout_type (float128_type_node);
35806 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
35808 /* This macro is built by i386-builtin-types.awk. */
35809 DEFINE_BUILTIN_PRIMITIVE_TYPES;
35812 static void
35813 ix86_init_builtins (void)
35815 tree t;
35817 ix86_init_builtin_types ();
35819 /* Builtins to get CPU type and features. */
35820 ix86_init_platform_type_builtins ();
35822 /* TFmode support builtins. */
35823 def_builtin_const (0, "__builtin_infq",
35824 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
35825 def_builtin_const (0, "__builtin_huge_valq",
35826 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
35828 /* We will expand them to normal call if SSE isn't available since
35829 they are used by libgcc. */
35830 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
35831 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
35832 BUILT_IN_MD, "__fabstf2", NULL_TREE);
35833 TREE_READONLY (t) = 1;
35834 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
35836 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
35837 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
35838 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
35839 TREE_READONLY (t) = 1;
35840 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
35842 ix86_init_tm_builtins ();
35843 ix86_init_mmx_sse_builtins ();
35844 ix86_init_mpx_builtins ();
35846 if (TARGET_LP64)
35847 ix86_init_builtins_va_builtins_abi ();
35849 #ifdef SUBTARGET_INIT_BUILTINS
35850 SUBTARGET_INIT_BUILTINS;
35851 #endif
35854 /* Return the ix86 builtin for CODE. */
35856 static tree
35857 ix86_builtin_decl (unsigned code, bool)
35859 if (code >= IX86_BUILTIN_MAX)
35860 return error_mark_node;
35862 return ix86_builtins[code];
35865 /* Errors in the source file can cause expand_expr to return const0_rtx
35866 where we expect a vector. To avoid crashing, use one of the vector
35867 clear instructions. */
35868 static rtx
35869 safe_vector_operand (rtx x, machine_mode mode)
35871 if (x == const0_rtx)
35872 x = CONST0_RTX (mode);
35873 return x;
35876 /* Fixup modeless constants to fit required mode. */
35877 static rtx
35878 fixup_modeless_constant (rtx x, machine_mode mode)
35880 if (GET_MODE (x) == VOIDmode)
35881 x = convert_to_mode (mode, x, 1);
35882 return x;
35885 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
35887 static rtx
35888 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
35890 rtx pat;
35891 tree arg0 = CALL_EXPR_ARG (exp, 0);
35892 tree arg1 = CALL_EXPR_ARG (exp, 1);
35893 rtx op0 = expand_normal (arg0);
35894 rtx op1 = expand_normal (arg1);
35895 machine_mode tmode = insn_data[icode].operand[0].mode;
35896 machine_mode mode0 = insn_data[icode].operand[1].mode;
35897 machine_mode mode1 = insn_data[icode].operand[2].mode;
35899 if (VECTOR_MODE_P (mode0))
35900 op0 = safe_vector_operand (op0, mode0);
35901 if (VECTOR_MODE_P (mode1))
35902 op1 = safe_vector_operand (op1, mode1);
35904 if (optimize || !target
35905 || GET_MODE (target) != tmode
35906 || !insn_data[icode].operand[0].predicate (target, tmode))
35907 target = gen_reg_rtx (tmode);
35909 if (GET_MODE (op1) == SImode && mode1 == TImode)
35911 rtx x = gen_reg_rtx (V4SImode);
35912 emit_insn (gen_sse2_loadd (x, op1));
35913 op1 = gen_lowpart (TImode, x);
35916 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35917 op0 = copy_to_mode_reg (mode0, op0);
35918 if (!insn_data[icode].operand[2].predicate (op1, mode1))
35919 op1 = copy_to_mode_reg (mode1, op1);
35921 pat = GEN_FCN (icode) (target, op0, op1);
35922 if (! pat)
35923 return 0;
35925 emit_insn (pat);
35927 return target;
35930 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
35932 static rtx
35933 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
35934 enum ix86_builtin_func_type m_type,
35935 enum rtx_code sub_code)
35937 rtx pat;
35938 int i;
35939 int nargs;
35940 bool comparison_p = false;
35941 bool tf_p = false;
35942 bool last_arg_constant = false;
35943 int num_memory = 0;
35944 struct {
35945 rtx op;
35946 machine_mode mode;
35947 } args[4];
35949 machine_mode tmode = insn_data[icode].operand[0].mode;
35951 switch (m_type)
35953 case MULTI_ARG_4_DF2_DI_I:
35954 case MULTI_ARG_4_DF2_DI_I1:
35955 case MULTI_ARG_4_SF2_SI_I:
35956 case MULTI_ARG_4_SF2_SI_I1:
35957 nargs = 4;
35958 last_arg_constant = true;
35959 break;
35961 case MULTI_ARG_3_SF:
35962 case MULTI_ARG_3_DF:
35963 case MULTI_ARG_3_SF2:
35964 case MULTI_ARG_3_DF2:
35965 case MULTI_ARG_3_DI:
35966 case MULTI_ARG_3_SI:
35967 case MULTI_ARG_3_SI_DI:
35968 case MULTI_ARG_3_HI:
35969 case MULTI_ARG_3_HI_SI:
35970 case MULTI_ARG_3_QI:
35971 case MULTI_ARG_3_DI2:
35972 case MULTI_ARG_3_SI2:
35973 case MULTI_ARG_3_HI2:
35974 case MULTI_ARG_3_QI2:
35975 nargs = 3;
35976 break;
35978 case MULTI_ARG_2_SF:
35979 case MULTI_ARG_2_DF:
35980 case MULTI_ARG_2_DI:
35981 case MULTI_ARG_2_SI:
35982 case MULTI_ARG_2_HI:
35983 case MULTI_ARG_2_QI:
35984 nargs = 2;
35985 break;
35987 case MULTI_ARG_2_DI_IMM:
35988 case MULTI_ARG_2_SI_IMM:
35989 case MULTI_ARG_2_HI_IMM:
35990 case MULTI_ARG_2_QI_IMM:
35991 nargs = 2;
35992 last_arg_constant = true;
35993 break;
35995 case MULTI_ARG_1_SF:
35996 case MULTI_ARG_1_DF:
35997 case MULTI_ARG_1_SF2:
35998 case MULTI_ARG_1_DF2:
35999 case MULTI_ARG_1_DI:
36000 case MULTI_ARG_1_SI:
36001 case MULTI_ARG_1_HI:
36002 case MULTI_ARG_1_QI:
36003 case MULTI_ARG_1_SI_DI:
36004 case MULTI_ARG_1_HI_DI:
36005 case MULTI_ARG_1_HI_SI:
36006 case MULTI_ARG_1_QI_DI:
36007 case MULTI_ARG_1_QI_SI:
36008 case MULTI_ARG_1_QI_HI:
36009 nargs = 1;
36010 break;
36012 case MULTI_ARG_2_DI_CMP:
36013 case MULTI_ARG_2_SI_CMP:
36014 case MULTI_ARG_2_HI_CMP:
36015 case MULTI_ARG_2_QI_CMP:
36016 nargs = 2;
36017 comparison_p = true;
36018 break;
36020 case MULTI_ARG_2_SF_TF:
36021 case MULTI_ARG_2_DF_TF:
36022 case MULTI_ARG_2_DI_TF:
36023 case MULTI_ARG_2_SI_TF:
36024 case MULTI_ARG_2_HI_TF:
36025 case MULTI_ARG_2_QI_TF:
36026 nargs = 2;
36027 tf_p = true;
36028 break;
36030 default:
36031 gcc_unreachable ();
36034 if (optimize || !target
36035 || GET_MODE (target) != tmode
36036 || !insn_data[icode].operand[0].predicate (target, tmode))
36037 target = gen_reg_rtx (tmode);
36039 gcc_assert (nargs <= 4);
36041 for (i = 0; i < nargs; i++)
36043 tree arg = CALL_EXPR_ARG (exp, i);
36044 rtx op = expand_normal (arg);
36045 int adjust = (comparison_p) ? 1 : 0;
36046 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
36048 if (last_arg_constant && i == nargs - 1)
36050 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
36052 enum insn_code new_icode = icode;
36053 switch (icode)
36055 case CODE_FOR_xop_vpermil2v2df3:
36056 case CODE_FOR_xop_vpermil2v4sf3:
36057 case CODE_FOR_xop_vpermil2v4df3:
36058 case CODE_FOR_xop_vpermil2v8sf3:
36059 error ("the last argument must be a 2-bit immediate");
36060 return gen_reg_rtx (tmode);
36061 case CODE_FOR_xop_rotlv2di3:
36062 new_icode = CODE_FOR_rotlv2di3;
36063 goto xop_rotl;
36064 case CODE_FOR_xop_rotlv4si3:
36065 new_icode = CODE_FOR_rotlv4si3;
36066 goto xop_rotl;
36067 case CODE_FOR_xop_rotlv8hi3:
36068 new_icode = CODE_FOR_rotlv8hi3;
36069 goto xop_rotl;
36070 case CODE_FOR_xop_rotlv16qi3:
36071 new_icode = CODE_FOR_rotlv16qi3;
36072 xop_rotl:
36073 if (CONST_INT_P (op))
36075 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
36076 op = GEN_INT (INTVAL (op) & mask);
36077 gcc_checking_assert
36078 (insn_data[icode].operand[i + 1].predicate (op, mode));
36080 else
36082 gcc_checking_assert
36083 (nargs == 2
36084 && insn_data[new_icode].operand[0].mode == tmode
36085 && insn_data[new_icode].operand[1].mode == tmode
36086 && insn_data[new_icode].operand[2].mode == mode
36087 && insn_data[new_icode].operand[0].predicate
36088 == insn_data[icode].operand[0].predicate
36089 && insn_data[new_icode].operand[1].predicate
36090 == insn_data[icode].operand[1].predicate);
36091 icode = new_icode;
36092 goto non_constant;
36094 break;
36095 default:
36096 gcc_unreachable ();
36100 else
36102 non_constant:
36103 if (VECTOR_MODE_P (mode))
36104 op = safe_vector_operand (op, mode);
36106 /* If we aren't optimizing, only allow one memory operand to be
36107 generated. */
36108 if (memory_operand (op, mode))
36109 num_memory++;
36111 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
36113 if (optimize
36114 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
36115 || num_memory > 1)
36116 op = force_reg (mode, op);
36119 args[i].op = op;
36120 args[i].mode = mode;
36123 switch (nargs)
36125 case 1:
36126 pat = GEN_FCN (icode) (target, args[0].op);
36127 break;
36129 case 2:
36130 if (tf_p)
36131 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
36132 GEN_INT ((int)sub_code));
36133 else if (! comparison_p)
36134 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
36135 else
36137 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
36138 args[0].op,
36139 args[1].op);
36141 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
36143 break;
36145 case 3:
36146 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
36147 break;
36149 case 4:
36150 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
36151 break;
36153 default:
36154 gcc_unreachable ();
36157 if (! pat)
36158 return 0;
36160 emit_insn (pat);
36161 return target;
36164 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
36165 insns with vec_merge. */
36167 static rtx
36168 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
36169 rtx target)
36171 rtx pat;
36172 tree arg0 = CALL_EXPR_ARG (exp, 0);
36173 rtx op1, op0 = expand_normal (arg0);
36174 machine_mode tmode = insn_data[icode].operand[0].mode;
36175 machine_mode mode0 = insn_data[icode].operand[1].mode;
36177 if (optimize || !target
36178 || GET_MODE (target) != tmode
36179 || !insn_data[icode].operand[0].predicate (target, tmode))
36180 target = gen_reg_rtx (tmode);
36182 if (VECTOR_MODE_P (mode0))
36183 op0 = safe_vector_operand (op0, mode0);
36185 if ((optimize && !register_operand (op0, mode0))
36186 || !insn_data[icode].operand[1].predicate (op0, mode0))
36187 op0 = copy_to_mode_reg (mode0, op0);
36189 op1 = op0;
36190 if (!insn_data[icode].operand[2].predicate (op1, mode0))
36191 op1 = copy_to_mode_reg (mode0, op1);
36193 pat = GEN_FCN (icode) (target, op0, op1);
36194 if (! pat)
36195 return 0;
36196 emit_insn (pat);
36197 return target;
36200 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
36202 static rtx
36203 ix86_expand_sse_compare (const struct builtin_description *d,
36204 tree exp, rtx target, bool swap)
36206 rtx pat;
36207 tree arg0 = CALL_EXPR_ARG (exp, 0);
36208 tree arg1 = CALL_EXPR_ARG (exp, 1);
36209 rtx op0 = expand_normal (arg0);
36210 rtx op1 = expand_normal (arg1);
36211 rtx op2;
36212 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36213 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36214 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36215 enum rtx_code comparison = d->comparison;
36217 if (VECTOR_MODE_P (mode0))
36218 op0 = safe_vector_operand (op0, mode0);
36219 if (VECTOR_MODE_P (mode1))
36220 op1 = safe_vector_operand (op1, mode1);
36222 /* Swap operands if we have a comparison that isn't available in
36223 hardware. */
36224 if (swap)
36225 std::swap (op0, op1);
36227 if (optimize || !target
36228 || GET_MODE (target) != tmode
36229 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36230 target = gen_reg_rtx (tmode);
36232 if ((optimize && !register_operand (op0, mode0))
36233 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
36234 op0 = copy_to_mode_reg (mode0, op0);
36235 if ((optimize && !register_operand (op1, mode1))
36236 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
36237 op1 = copy_to_mode_reg (mode1, op1);
36239 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
36240 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36241 if (! pat)
36242 return 0;
36243 emit_insn (pat);
36244 return target;
36247 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
36249 static rtx
36250 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
36251 rtx target)
36253 rtx pat;
36254 tree arg0 = CALL_EXPR_ARG (exp, 0);
36255 tree arg1 = CALL_EXPR_ARG (exp, 1);
36256 rtx op0 = expand_normal (arg0);
36257 rtx op1 = expand_normal (arg1);
36258 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36259 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36260 enum rtx_code comparison = d->comparison;
36262 if (VECTOR_MODE_P (mode0))
36263 op0 = safe_vector_operand (op0, mode0);
36264 if (VECTOR_MODE_P (mode1))
36265 op1 = safe_vector_operand (op1, mode1);
36267 /* Swap operands if we have a comparison that isn't available in
36268 hardware. */
36269 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
36270 std::swap (op0, op1);
36272 target = gen_reg_rtx (SImode);
36273 emit_move_insn (target, const0_rtx);
36274 target = gen_rtx_SUBREG (QImode, target, 0);
36276 if ((optimize && !register_operand (op0, mode0))
36277 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36278 op0 = copy_to_mode_reg (mode0, op0);
36279 if ((optimize && !register_operand (op1, mode1))
36280 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36281 op1 = copy_to_mode_reg (mode1, op1);
36283 pat = GEN_FCN (d->icode) (op0, op1);
36284 if (! pat)
36285 return 0;
36286 emit_insn (pat);
36287 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36288 gen_rtx_fmt_ee (comparison, QImode,
36289 SET_DEST (pat),
36290 const0_rtx)));
36292 return SUBREG_REG (target);
36295 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
36297 static rtx
36298 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
36299 rtx target)
36301 rtx pat;
36302 tree arg0 = CALL_EXPR_ARG (exp, 0);
36303 rtx op1, op0 = expand_normal (arg0);
36304 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36305 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36307 if (optimize || target == 0
36308 || GET_MODE (target) != tmode
36309 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36310 target = gen_reg_rtx (tmode);
36312 if (VECTOR_MODE_P (mode0))
36313 op0 = safe_vector_operand (op0, mode0);
36315 if ((optimize && !register_operand (op0, mode0))
36316 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36317 op0 = copy_to_mode_reg (mode0, op0);
36319 op1 = GEN_INT (d->comparison);
36321 pat = GEN_FCN (d->icode) (target, op0, op1);
36322 if (! pat)
36323 return 0;
36324 emit_insn (pat);
36325 return target;
36328 static rtx
36329 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
36330 tree exp, rtx target)
36332 rtx pat;
36333 tree arg0 = CALL_EXPR_ARG (exp, 0);
36334 tree arg1 = CALL_EXPR_ARG (exp, 1);
36335 rtx op0 = expand_normal (arg0);
36336 rtx op1 = expand_normal (arg1);
36337 rtx op2;
36338 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36339 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36340 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36342 if (optimize || target == 0
36343 || GET_MODE (target) != tmode
36344 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36345 target = gen_reg_rtx (tmode);
36347 op0 = safe_vector_operand (op0, mode0);
36348 op1 = safe_vector_operand (op1, mode1);
36350 if ((optimize && !register_operand (op0, mode0))
36351 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36352 op0 = copy_to_mode_reg (mode0, op0);
36353 if ((optimize && !register_operand (op1, mode1))
36354 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36355 op1 = copy_to_mode_reg (mode1, op1);
36357 op2 = GEN_INT (d->comparison);
36359 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36360 if (! pat)
36361 return 0;
36362 emit_insn (pat);
36363 return target;
36366 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
36368 static rtx
36369 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
36370 rtx target)
36372 rtx pat;
36373 tree arg0 = CALL_EXPR_ARG (exp, 0);
36374 tree arg1 = CALL_EXPR_ARG (exp, 1);
36375 rtx op0 = expand_normal (arg0);
36376 rtx op1 = expand_normal (arg1);
36377 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36378 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36379 enum rtx_code comparison = d->comparison;
36381 if (VECTOR_MODE_P (mode0))
36382 op0 = safe_vector_operand (op0, mode0);
36383 if (VECTOR_MODE_P (mode1))
36384 op1 = safe_vector_operand (op1, mode1);
36386 target = gen_reg_rtx (SImode);
36387 emit_move_insn (target, const0_rtx);
36388 target = gen_rtx_SUBREG (QImode, target, 0);
36390 if ((optimize && !register_operand (op0, mode0))
36391 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36392 op0 = copy_to_mode_reg (mode0, op0);
36393 if ((optimize && !register_operand (op1, mode1))
36394 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36395 op1 = copy_to_mode_reg (mode1, op1);
36397 pat = GEN_FCN (d->icode) (op0, op1);
36398 if (! pat)
36399 return 0;
36400 emit_insn (pat);
36401 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36402 gen_rtx_fmt_ee (comparison, QImode,
36403 SET_DEST (pat),
36404 const0_rtx)));
36406 return SUBREG_REG (target);
36409 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
36411 static rtx
36412 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
36413 tree exp, rtx target)
36415 rtx pat;
36416 tree arg0 = CALL_EXPR_ARG (exp, 0);
36417 tree arg1 = CALL_EXPR_ARG (exp, 1);
36418 tree arg2 = CALL_EXPR_ARG (exp, 2);
36419 tree arg3 = CALL_EXPR_ARG (exp, 3);
36420 tree arg4 = CALL_EXPR_ARG (exp, 4);
36421 rtx scratch0, scratch1;
36422 rtx op0 = expand_normal (arg0);
36423 rtx op1 = expand_normal (arg1);
36424 rtx op2 = expand_normal (arg2);
36425 rtx op3 = expand_normal (arg3);
36426 rtx op4 = expand_normal (arg4);
36427 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
36429 tmode0 = insn_data[d->icode].operand[0].mode;
36430 tmode1 = insn_data[d->icode].operand[1].mode;
36431 modev2 = insn_data[d->icode].operand[2].mode;
36432 modei3 = insn_data[d->icode].operand[3].mode;
36433 modev4 = insn_data[d->icode].operand[4].mode;
36434 modei5 = insn_data[d->icode].operand[5].mode;
36435 modeimm = insn_data[d->icode].operand[6].mode;
36437 if (VECTOR_MODE_P (modev2))
36438 op0 = safe_vector_operand (op0, modev2);
36439 if (VECTOR_MODE_P (modev4))
36440 op2 = safe_vector_operand (op2, modev4);
36442 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36443 op0 = copy_to_mode_reg (modev2, op0);
36444 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
36445 op1 = copy_to_mode_reg (modei3, op1);
36446 if ((optimize && !register_operand (op2, modev4))
36447 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
36448 op2 = copy_to_mode_reg (modev4, op2);
36449 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
36450 op3 = copy_to_mode_reg (modei5, op3);
36452 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
36454 error ("the fifth argument must be an 8-bit immediate");
36455 return const0_rtx;
36458 if (d->code == IX86_BUILTIN_PCMPESTRI128)
36460 if (optimize || !target
36461 || GET_MODE (target) != tmode0
36462 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36463 target = gen_reg_rtx (tmode0);
36465 scratch1 = gen_reg_rtx (tmode1);
36467 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
36469 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
36471 if (optimize || !target
36472 || GET_MODE (target) != tmode1
36473 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36474 target = gen_reg_rtx (tmode1);
36476 scratch0 = gen_reg_rtx (tmode0);
36478 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
36480 else
36482 gcc_assert (d->flag);
36484 scratch0 = gen_reg_rtx (tmode0);
36485 scratch1 = gen_reg_rtx (tmode1);
36487 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
36490 if (! pat)
36491 return 0;
36493 emit_insn (pat);
36495 if (d->flag)
36497 target = gen_reg_rtx (SImode);
36498 emit_move_insn (target, const0_rtx);
36499 target = gen_rtx_SUBREG (QImode, target, 0);
36501 emit_insn
36502 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36503 gen_rtx_fmt_ee (EQ, QImode,
36504 gen_rtx_REG ((machine_mode) d->flag,
36505 FLAGS_REG),
36506 const0_rtx)));
36507 return SUBREG_REG (target);
36509 else
36510 return target;
36514 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
36516 static rtx
36517 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
36518 tree exp, rtx target)
36520 rtx pat;
36521 tree arg0 = CALL_EXPR_ARG (exp, 0);
36522 tree arg1 = CALL_EXPR_ARG (exp, 1);
36523 tree arg2 = CALL_EXPR_ARG (exp, 2);
36524 rtx scratch0, scratch1;
36525 rtx op0 = expand_normal (arg0);
36526 rtx op1 = expand_normal (arg1);
36527 rtx op2 = expand_normal (arg2);
36528 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
36530 tmode0 = insn_data[d->icode].operand[0].mode;
36531 tmode1 = insn_data[d->icode].operand[1].mode;
36532 modev2 = insn_data[d->icode].operand[2].mode;
36533 modev3 = insn_data[d->icode].operand[3].mode;
36534 modeimm = insn_data[d->icode].operand[4].mode;
36536 if (VECTOR_MODE_P (modev2))
36537 op0 = safe_vector_operand (op0, modev2);
36538 if (VECTOR_MODE_P (modev3))
36539 op1 = safe_vector_operand (op1, modev3);
36541 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36542 op0 = copy_to_mode_reg (modev2, op0);
36543 if ((optimize && !register_operand (op1, modev3))
36544 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
36545 op1 = copy_to_mode_reg (modev3, op1);
36547 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
36549 error ("the third argument must be an 8-bit immediate");
36550 return const0_rtx;
36553 if (d->code == IX86_BUILTIN_PCMPISTRI128)
36555 if (optimize || !target
36556 || GET_MODE (target) != tmode0
36557 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36558 target = gen_reg_rtx (tmode0);
36560 scratch1 = gen_reg_rtx (tmode1);
36562 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
36564 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
36566 if (optimize || !target
36567 || GET_MODE (target) != tmode1
36568 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36569 target = gen_reg_rtx (tmode1);
36571 scratch0 = gen_reg_rtx (tmode0);
36573 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
36575 else
36577 gcc_assert (d->flag);
36579 scratch0 = gen_reg_rtx (tmode0);
36580 scratch1 = gen_reg_rtx (tmode1);
36582 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
36585 if (! pat)
36586 return 0;
36588 emit_insn (pat);
36590 if (d->flag)
36592 target = gen_reg_rtx (SImode);
36593 emit_move_insn (target, const0_rtx);
36594 target = gen_rtx_SUBREG (QImode, target, 0);
36596 emit_insn
36597 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36598 gen_rtx_fmt_ee (EQ, QImode,
36599 gen_rtx_REG ((machine_mode) d->flag,
36600 FLAGS_REG),
36601 const0_rtx)));
36602 return SUBREG_REG (target);
36604 else
36605 return target;
36608 /* Subroutine of ix86_expand_builtin to take care of insns with
36609 variable number of operands. */
36611 static rtx
36612 ix86_expand_args_builtin (const struct builtin_description *d,
36613 tree exp, rtx target)
36615 rtx pat, real_target;
36616 unsigned int i, nargs;
36617 unsigned int nargs_constant = 0;
36618 unsigned int mask_pos = 0;
36619 int num_memory = 0;
36620 struct
36622 rtx op;
36623 machine_mode mode;
36624 } args[6];
36625 bool last_arg_count = false;
36626 enum insn_code icode = d->icode;
36627 const struct insn_data_d *insn_p = &insn_data[icode];
36628 machine_mode tmode = insn_p->operand[0].mode;
36629 machine_mode rmode = VOIDmode;
36630 bool swap = false;
36631 enum rtx_code comparison = d->comparison;
36633 switch ((enum ix86_builtin_func_type) d->flag)
36635 case V2DF_FTYPE_V2DF_ROUND:
36636 case V4DF_FTYPE_V4DF_ROUND:
36637 case V4SF_FTYPE_V4SF_ROUND:
36638 case V8SF_FTYPE_V8SF_ROUND:
36639 case V4SI_FTYPE_V4SF_ROUND:
36640 case V8SI_FTYPE_V8SF_ROUND:
36641 return ix86_expand_sse_round (d, exp, target);
36642 case V4SI_FTYPE_V2DF_V2DF_ROUND:
36643 case V8SI_FTYPE_V4DF_V4DF_ROUND:
36644 case V16SI_FTYPE_V8DF_V8DF_ROUND:
36645 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
36646 case INT_FTYPE_V8SF_V8SF_PTEST:
36647 case INT_FTYPE_V4DI_V4DI_PTEST:
36648 case INT_FTYPE_V4DF_V4DF_PTEST:
36649 case INT_FTYPE_V4SF_V4SF_PTEST:
36650 case INT_FTYPE_V2DI_V2DI_PTEST:
36651 case INT_FTYPE_V2DF_V2DF_PTEST:
36652 return ix86_expand_sse_ptest (d, exp, target);
36653 case FLOAT128_FTYPE_FLOAT128:
36654 case FLOAT_FTYPE_FLOAT:
36655 case INT_FTYPE_INT:
36656 case UINT64_FTYPE_INT:
36657 case UINT16_FTYPE_UINT16:
36658 case INT64_FTYPE_INT64:
36659 case INT64_FTYPE_V4SF:
36660 case INT64_FTYPE_V2DF:
36661 case INT_FTYPE_V16QI:
36662 case INT_FTYPE_V8QI:
36663 case INT_FTYPE_V8SF:
36664 case INT_FTYPE_V4DF:
36665 case INT_FTYPE_V4SF:
36666 case INT_FTYPE_V2DF:
36667 case INT_FTYPE_V32QI:
36668 case V16QI_FTYPE_V16QI:
36669 case V8SI_FTYPE_V8SF:
36670 case V8SI_FTYPE_V4SI:
36671 case V8HI_FTYPE_V8HI:
36672 case V8HI_FTYPE_V16QI:
36673 case V8QI_FTYPE_V8QI:
36674 case V8SF_FTYPE_V8SF:
36675 case V8SF_FTYPE_V8SI:
36676 case V8SF_FTYPE_V4SF:
36677 case V8SF_FTYPE_V8HI:
36678 case V4SI_FTYPE_V4SI:
36679 case V4SI_FTYPE_V16QI:
36680 case V4SI_FTYPE_V4SF:
36681 case V4SI_FTYPE_V8SI:
36682 case V4SI_FTYPE_V8HI:
36683 case V4SI_FTYPE_V4DF:
36684 case V4SI_FTYPE_V2DF:
36685 case V4HI_FTYPE_V4HI:
36686 case V4DF_FTYPE_V4DF:
36687 case V4DF_FTYPE_V4SI:
36688 case V4DF_FTYPE_V4SF:
36689 case V4DF_FTYPE_V2DF:
36690 case V4SF_FTYPE_V4SF:
36691 case V4SF_FTYPE_V4SI:
36692 case V4SF_FTYPE_V8SF:
36693 case V4SF_FTYPE_V4DF:
36694 case V4SF_FTYPE_V8HI:
36695 case V4SF_FTYPE_V2DF:
36696 case V2DI_FTYPE_V2DI:
36697 case V2DI_FTYPE_V16QI:
36698 case V2DI_FTYPE_V8HI:
36699 case V2DI_FTYPE_V4SI:
36700 case V2DF_FTYPE_V2DF:
36701 case V2DF_FTYPE_V4SI:
36702 case V2DF_FTYPE_V4DF:
36703 case V2DF_FTYPE_V4SF:
36704 case V2DF_FTYPE_V2SI:
36705 case V2SI_FTYPE_V2SI:
36706 case V2SI_FTYPE_V4SF:
36707 case V2SI_FTYPE_V2SF:
36708 case V2SI_FTYPE_V2DF:
36709 case V2SF_FTYPE_V2SF:
36710 case V2SF_FTYPE_V2SI:
36711 case V32QI_FTYPE_V32QI:
36712 case V32QI_FTYPE_V16QI:
36713 case V16HI_FTYPE_V16HI:
36714 case V16HI_FTYPE_V8HI:
36715 case V8SI_FTYPE_V8SI:
36716 case V16HI_FTYPE_V16QI:
36717 case V8SI_FTYPE_V16QI:
36718 case V4DI_FTYPE_V16QI:
36719 case V8SI_FTYPE_V8HI:
36720 case V4DI_FTYPE_V8HI:
36721 case V4DI_FTYPE_V4SI:
36722 case V4DI_FTYPE_V2DI:
36723 case HI_FTYPE_HI:
36724 case HI_FTYPE_V16QI:
36725 case SI_FTYPE_V32QI:
36726 case DI_FTYPE_V64QI:
36727 case V16QI_FTYPE_HI:
36728 case V32QI_FTYPE_SI:
36729 case V64QI_FTYPE_DI:
36730 case V8HI_FTYPE_QI:
36731 case V16HI_FTYPE_HI:
36732 case V32HI_FTYPE_SI:
36733 case V4SI_FTYPE_QI:
36734 case V8SI_FTYPE_QI:
36735 case V4SI_FTYPE_HI:
36736 case V8SI_FTYPE_HI:
36737 case QI_FTYPE_V8HI:
36738 case HI_FTYPE_V16HI:
36739 case SI_FTYPE_V32HI:
36740 case QI_FTYPE_V4SI:
36741 case QI_FTYPE_V8SI:
36742 case HI_FTYPE_V16SI:
36743 case QI_FTYPE_V2DI:
36744 case QI_FTYPE_V4DI:
36745 case QI_FTYPE_V8DI:
36746 case UINT_FTYPE_V2DF:
36747 case UINT_FTYPE_V4SF:
36748 case UINT64_FTYPE_V2DF:
36749 case UINT64_FTYPE_V4SF:
36750 case V16QI_FTYPE_V8DI:
36751 case V16HI_FTYPE_V16SI:
36752 case V16SI_FTYPE_HI:
36753 case V2DI_FTYPE_QI:
36754 case V4DI_FTYPE_QI:
36755 case V16SI_FTYPE_V16SI:
36756 case V16SI_FTYPE_INT:
36757 case V16SF_FTYPE_FLOAT:
36758 case V16SF_FTYPE_V8SF:
36759 case V16SI_FTYPE_V8SI:
36760 case V16SF_FTYPE_V4SF:
36761 case V16SI_FTYPE_V4SI:
36762 case V16SF_FTYPE_V16SF:
36763 case V8HI_FTYPE_V8DI:
36764 case V8UHI_FTYPE_V8UHI:
36765 case V8SI_FTYPE_V8DI:
36766 case V8SF_FTYPE_V8DF:
36767 case V8DI_FTYPE_QI:
36768 case V8DI_FTYPE_INT64:
36769 case V8DI_FTYPE_V4DI:
36770 case V8DI_FTYPE_V8DI:
36771 case V8DF_FTYPE_DOUBLE:
36772 case V8DF_FTYPE_V4DF:
36773 case V8DF_FTYPE_V2DF:
36774 case V8DF_FTYPE_V8DF:
36775 case V8DF_FTYPE_V8SI:
36776 nargs = 1;
36777 break;
36778 case V4SF_FTYPE_V4SF_VEC_MERGE:
36779 case V2DF_FTYPE_V2DF_VEC_MERGE:
36780 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
36781 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
36782 case V16QI_FTYPE_V16QI_V16QI:
36783 case V16QI_FTYPE_V8HI_V8HI:
36784 case V16SI_FTYPE_V16SI_V16SI:
36785 case V16SF_FTYPE_V16SF_V16SF:
36786 case V16SF_FTYPE_V16SF_V16SI:
36787 case V8QI_FTYPE_V8QI_V8QI:
36788 case V8QI_FTYPE_V4HI_V4HI:
36789 case V8HI_FTYPE_V8HI_V8HI:
36790 case V8HI_FTYPE_V16QI_V16QI:
36791 case V8HI_FTYPE_V4SI_V4SI:
36792 case V8SF_FTYPE_V8SF_V8SF:
36793 case V8SF_FTYPE_V8SF_V8SI:
36794 case V8DI_FTYPE_V8DI_V8DI:
36795 case V8DF_FTYPE_V8DF_V8DF:
36796 case V8DF_FTYPE_V8DF_V8DI:
36797 case V4SI_FTYPE_V4SI_V4SI:
36798 case V4SI_FTYPE_V8HI_V8HI:
36799 case V4SI_FTYPE_V4SF_V4SF:
36800 case V4SI_FTYPE_V2DF_V2DF:
36801 case V4HI_FTYPE_V4HI_V4HI:
36802 case V4HI_FTYPE_V8QI_V8QI:
36803 case V4HI_FTYPE_V2SI_V2SI:
36804 case V4DF_FTYPE_V4DF_V4DF:
36805 case V4DF_FTYPE_V4DF_V4DI:
36806 case V4SF_FTYPE_V4SF_V4SF:
36807 case V4SF_FTYPE_V4SF_V4SI:
36808 case V4SF_FTYPE_V4SF_V2SI:
36809 case V4SF_FTYPE_V4SF_V2DF:
36810 case V4SF_FTYPE_V4SF_UINT:
36811 case V4SF_FTYPE_V4SF_UINT64:
36812 case V4SF_FTYPE_V4SF_DI:
36813 case V4SF_FTYPE_V4SF_SI:
36814 case V2DI_FTYPE_V2DI_V2DI:
36815 case V2DI_FTYPE_V16QI_V16QI:
36816 case V2DI_FTYPE_V4SI_V4SI:
36817 case V2UDI_FTYPE_V4USI_V4USI:
36818 case V2DI_FTYPE_V2DI_V16QI:
36819 case V2DI_FTYPE_V2DF_V2DF:
36820 case V2SI_FTYPE_V2SI_V2SI:
36821 case V2SI_FTYPE_V4HI_V4HI:
36822 case V2SI_FTYPE_V2SF_V2SF:
36823 case V2DF_FTYPE_V2DF_V2DF:
36824 case V2DF_FTYPE_V2DF_V4SF:
36825 case V2DF_FTYPE_V2DF_V2DI:
36826 case V2DF_FTYPE_V2DF_DI:
36827 case V2DF_FTYPE_V2DF_SI:
36828 case V2DF_FTYPE_V2DF_UINT:
36829 case V2DF_FTYPE_V2DF_UINT64:
36830 case V2SF_FTYPE_V2SF_V2SF:
36831 case V1DI_FTYPE_V1DI_V1DI:
36832 case V1DI_FTYPE_V8QI_V8QI:
36833 case V1DI_FTYPE_V2SI_V2SI:
36834 case V32QI_FTYPE_V16HI_V16HI:
36835 case V16HI_FTYPE_V8SI_V8SI:
36836 case V32QI_FTYPE_V32QI_V32QI:
36837 case V16HI_FTYPE_V32QI_V32QI:
36838 case V16HI_FTYPE_V16HI_V16HI:
36839 case V8SI_FTYPE_V4DF_V4DF:
36840 case V8SI_FTYPE_V8SI_V8SI:
36841 case V8SI_FTYPE_V16HI_V16HI:
36842 case V4DI_FTYPE_V4DI_V4DI:
36843 case V4DI_FTYPE_V8SI_V8SI:
36844 case V4UDI_FTYPE_V8USI_V8USI:
36845 case QI_FTYPE_V8DI_V8DI:
36846 case V8DI_FTYPE_V64QI_V64QI:
36847 case HI_FTYPE_V16SI_V16SI:
36848 if (comparison == UNKNOWN)
36849 return ix86_expand_binop_builtin (icode, exp, target);
36850 nargs = 2;
36851 break;
36852 case V4SF_FTYPE_V4SF_V4SF_SWAP:
36853 case V2DF_FTYPE_V2DF_V2DF_SWAP:
36854 gcc_assert (comparison != UNKNOWN);
36855 nargs = 2;
36856 swap = true;
36857 break;
36858 case V16HI_FTYPE_V16HI_V8HI_COUNT:
36859 case V16HI_FTYPE_V16HI_SI_COUNT:
36860 case V8SI_FTYPE_V8SI_V4SI_COUNT:
36861 case V8SI_FTYPE_V8SI_SI_COUNT:
36862 case V4DI_FTYPE_V4DI_V2DI_COUNT:
36863 case V4DI_FTYPE_V4DI_INT_COUNT:
36864 case V8HI_FTYPE_V8HI_V8HI_COUNT:
36865 case V8HI_FTYPE_V8HI_SI_COUNT:
36866 case V4SI_FTYPE_V4SI_V4SI_COUNT:
36867 case V4SI_FTYPE_V4SI_SI_COUNT:
36868 case V4HI_FTYPE_V4HI_V4HI_COUNT:
36869 case V4HI_FTYPE_V4HI_SI_COUNT:
36870 case V2DI_FTYPE_V2DI_V2DI_COUNT:
36871 case V2DI_FTYPE_V2DI_SI_COUNT:
36872 case V2SI_FTYPE_V2SI_V2SI_COUNT:
36873 case V2SI_FTYPE_V2SI_SI_COUNT:
36874 case V1DI_FTYPE_V1DI_V1DI_COUNT:
36875 case V1DI_FTYPE_V1DI_SI_COUNT:
36876 nargs = 2;
36877 last_arg_count = true;
36878 break;
36879 case UINT64_FTYPE_UINT64_UINT64:
36880 case UINT_FTYPE_UINT_UINT:
36881 case UINT_FTYPE_UINT_USHORT:
36882 case UINT_FTYPE_UINT_UCHAR:
36883 case UINT16_FTYPE_UINT16_INT:
36884 case UINT8_FTYPE_UINT8_INT:
36885 case HI_FTYPE_HI_HI:
36886 case SI_FTYPE_SI_SI:
36887 case DI_FTYPE_DI_DI:
36888 case V16SI_FTYPE_V8DF_V8DF:
36889 nargs = 2;
36890 break;
36891 case V2DI_FTYPE_V2DI_INT_CONVERT:
36892 nargs = 2;
36893 rmode = V1TImode;
36894 nargs_constant = 1;
36895 break;
36896 case V4DI_FTYPE_V4DI_INT_CONVERT:
36897 nargs = 2;
36898 rmode = V2TImode;
36899 nargs_constant = 1;
36900 break;
36901 case V8DI_FTYPE_V8DI_INT_CONVERT:
36902 nargs = 2;
36903 rmode = V4TImode;
36904 nargs_constant = 1;
36905 break;
36906 case V8HI_FTYPE_V8HI_INT:
36907 case V8HI_FTYPE_V8SF_INT:
36908 case V16HI_FTYPE_V16SF_INT:
36909 case V8HI_FTYPE_V4SF_INT:
36910 case V8SF_FTYPE_V8SF_INT:
36911 case V4SF_FTYPE_V16SF_INT:
36912 case V16SF_FTYPE_V16SF_INT:
36913 case V4SI_FTYPE_V4SI_INT:
36914 case V4SI_FTYPE_V8SI_INT:
36915 case V4HI_FTYPE_V4HI_INT:
36916 case V4DF_FTYPE_V4DF_INT:
36917 case V4DF_FTYPE_V8DF_INT:
36918 case V4SF_FTYPE_V4SF_INT:
36919 case V4SF_FTYPE_V8SF_INT:
36920 case V2DI_FTYPE_V2DI_INT:
36921 case V2DF_FTYPE_V2DF_INT:
36922 case V2DF_FTYPE_V4DF_INT:
36923 case V16HI_FTYPE_V16HI_INT:
36924 case V8SI_FTYPE_V8SI_INT:
36925 case V16SI_FTYPE_V16SI_INT:
36926 case V4SI_FTYPE_V16SI_INT:
36927 case V4DI_FTYPE_V4DI_INT:
36928 case V2DI_FTYPE_V4DI_INT:
36929 case V4DI_FTYPE_V8DI_INT:
36930 case HI_FTYPE_HI_INT:
36931 case QI_FTYPE_V4SF_INT:
36932 case QI_FTYPE_V2DF_INT:
36933 nargs = 2;
36934 nargs_constant = 1;
36935 break;
36936 case V16QI_FTYPE_V16QI_V16QI_V16QI:
36937 case V8SF_FTYPE_V8SF_V8SF_V8SF:
36938 case V4DF_FTYPE_V4DF_V4DF_V4DF:
36939 case V4SF_FTYPE_V4SF_V4SF_V4SF:
36940 case V2DF_FTYPE_V2DF_V2DF_V2DF:
36941 case V32QI_FTYPE_V32QI_V32QI_V32QI:
36942 case HI_FTYPE_V16SI_V16SI_HI:
36943 case QI_FTYPE_V8DI_V8DI_QI:
36944 case V16HI_FTYPE_V16SI_V16HI_HI:
36945 case V16QI_FTYPE_V16SI_V16QI_HI:
36946 case V16QI_FTYPE_V8DI_V16QI_QI:
36947 case V16SF_FTYPE_V16SF_V16SF_HI:
36948 case V16SF_FTYPE_V16SF_V16SF_V16SF:
36949 case V16SF_FTYPE_V16SF_V16SI_V16SF:
36950 case V16SF_FTYPE_V16SI_V16SF_HI:
36951 case V16SF_FTYPE_V16SI_V16SF_V16SF:
36952 case V16SF_FTYPE_V4SF_V16SF_HI:
36953 case V16SI_FTYPE_SI_V16SI_HI:
36954 case V16SI_FTYPE_V16HI_V16SI_HI:
36955 case V16SI_FTYPE_V16QI_V16SI_HI:
36956 case V16SI_FTYPE_V16SF_V16SI_HI:
36957 case V8SF_FTYPE_V4SF_V8SF_QI:
36958 case V4DF_FTYPE_V2DF_V4DF_QI:
36959 case V8SI_FTYPE_V4SI_V8SI_QI:
36960 case V8SI_FTYPE_SI_V8SI_QI:
36961 case V4SI_FTYPE_V4SI_V4SI_QI:
36962 case V4SI_FTYPE_SI_V4SI_QI:
36963 case V4DI_FTYPE_V2DI_V4DI_QI:
36964 case V4DI_FTYPE_DI_V4DI_QI:
36965 case V2DI_FTYPE_V2DI_V2DI_QI:
36966 case V2DI_FTYPE_DI_V2DI_QI:
36967 case V64QI_FTYPE_V64QI_V64QI_DI:
36968 case V64QI_FTYPE_V16QI_V64QI_DI:
36969 case V64QI_FTYPE_QI_V64QI_DI:
36970 case V32QI_FTYPE_V32QI_V32QI_SI:
36971 case V32QI_FTYPE_V16QI_V32QI_SI:
36972 case V32QI_FTYPE_QI_V32QI_SI:
36973 case V16QI_FTYPE_V16QI_V16QI_HI:
36974 case V16QI_FTYPE_QI_V16QI_HI:
36975 case V32HI_FTYPE_V8HI_V32HI_SI:
36976 case V32HI_FTYPE_HI_V32HI_SI:
36977 case V16HI_FTYPE_V8HI_V16HI_HI:
36978 case V16HI_FTYPE_HI_V16HI_HI:
36979 case V8HI_FTYPE_V8HI_V8HI_QI:
36980 case V8HI_FTYPE_HI_V8HI_QI:
36981 case V8SF_FTYPE_V8HI_V8SF_QI:
36982 case V4SF_FTYPE_V8HI_V4SF_QI:
36983 case V8SI_FTYPE_V8SF_V8SI_QI:
36984 case V4SI_FTYPE_V4SF_V4SI_QI:
36985 case V8DI_FTYPE_V8SF_V8DI_QI:
36986 case V4DI_FTYPE_V4SF_V4DI_QI:
36987 case V2DI_FTYPE_V4SF_V2DI_QI:
36988 case V8SF_FTYPE_V8DI_V8SF_QI:
36989 case V4SF_FTYPE_V4DI_V4SF_QI:
36990 case V4SF_FTYPE_V2DI_V4SF_QI:
36991 case V8DF_FTYPE_V8DI_V8DF_QI:
36992 case V4DF_FTYPE_V4DI_V4DF_QI:
36993 case V2DF_FTYPE_V2DI_V2DF_QI:
36994 case V16QI_FTYPE_V8HI_V16QI_QI:
36995 case V16QI_FTYPE_V16HI_V16QI_HI:
36996 case V16QI_FTYPE_V4SI_V16QI_QI:
36997 case V16QI_FTYPE_V8SI_V16QI_QI:
36998 case V8HI_FTYPE_V4SI_V8HI_QI:
36999 case V8HI_FTYPE_V8SI_V8HI_QI:
37000 case V16QI_FTYPE_V2DI_V16QI_QI:
37001 case V16QI_FTYPE_V4DI_V16QI_QI:
37002 case V8HI_FTYPE_V2DI_V8HI_QI:
37003 case V8HI_FTYPE_V4DI_V8HI_QI:
37004 case V4SI_FTYPE_V2DI_V4SI_QI:
37005 case V4SI_FTYPE_V4DI_V4SI_QI:
37006 case V32QI_FTYPE_V32HI_V32QI_SI:
37007 case HI_FTYPE_V16QI_V16QI_HI:
37008 case SI_FTYPE_V32QI_V32QI_SI:
37009 case DI_FTYPE_V64QI_V64QI_DI:
37010 case QI_FTYPE_V8HI_V8HI_QI:
37011 case HI_FTYPE_V16HI_V16HI_HI:
37012 case SI_FTYPE_V32HI_V32HI_SI:
37013 case QI_FTYPE_V4SI_V4SI_QI:
37014 case QI_FTYPE_V8SI_V8SI_QI:
37015 case QI_FTYPE_V2DI_V2DI_QI:
37016 case QI_FTYPE_V4DI_V4DI_QI:
37017 case V4SF_FTYPE_V2DF_V4SF_QI:
37018 case V4SF_FTYPE_V4DF_V4SF_QI:
37019 case V16SI_FTYPE_V16SI_V16SI_HI:
37020 case V16SI_FTYPE_V16SI_V16SI_V16SI:
37021 case V16SI_FTYPE_V4SI_V16SI_HI:
37022 case V2DI_FTYPE_V2DI_V2DI_V2DI:
37023 case V2DI_FTYPE_V4SI_V2DI_QI:
37024 case V2DI_FTYPE_V8HI_V2DI_QI:
37025 case V2DI_FTYPE_V16QI_V2DI_QI:
37026 case V4DI_FTYPE_V4DI_V4DI_QI:
37027 case V4DI_FTYPE_V4SI_V4DI_QI:
37028 case V4DI_FTYPE_V8HI_V4DI_QI:
37029 case V4DI_FTYPE_V16QI_V4DI_QI:
37030 case V8DI_FTYPE_V8DF_V8DI_QI:
37031 case V4DI_FTYPE_V4DF_V4DI_QI:
37032 case V2DI_FTYPE_V2DF_V2DI_QI:
37033 case V4SI_FTYPE_V4DF_V4SI_QI:
37034 case V4SI_FTYPE_V2DF_V4SI_QI:
37035 case V4SI_FTYPE_V8HI_V4SI_QI:
37036 case V4SI_FTYPE_V16QI_V4SI_QI:
37037 case V8SI_FTYPE_V8SI_V8SI_V8SI:
37038 case V4DI_FTYPE_V4DI_V4DI_V4DI:
37039 case V8DF_FTYPE_V2DF_V8DF_QI:
37040 case V8DF_FTYPE_V4DF_V8DF_QI:
37041 case V8DF_FTYPE_V8DF_V8DF_QI:
37042 case V8DF_FTYPE_V8DF_V8DF_V8DF:
37043 case V8SF_FTYPE_V8SF_V8SF_QI:
37044 case V8SF_FTYPE_V8SI_V8SF_QI:
37045 case V4DF_FTYPE_V4DF_V4DF_QI:
37046 case V4SF_FTYPE_V4SF_V4SF_QI:
37047 case V2DF_FTYPE_V2DF_V2DF_QI:
37048 case V2DF_FTYPE_V4SF_V2DF_QI:
37049 case V2DF_FTYPE_V4SI_V2DF_QI:
37050 case V4SF_FTYPE_V4SI_V4SF_QI:
37051 case V4DF_FTYPE_V4SF_V4DF_QI:
37052 case V4DF_FTYPE_V4SI_V4DF_QI:
37053 case V8SI_FTYPE_V8SI_V8SI_QI:
37054 case V8SI_FTYPE_V8HI_V8SI_QI:
37055 case V8SI_FTYPE_V16QI_V8SI_QI:
37056 case V8DF_FTYPE_V8DF_V8DI_V8DF:
37057 case V8DF_FTYPE_V8DI_V8DF_V8DF:
37058 case V8DF_FTYPE_V8SF_V8DF_QI:
37059 case V8DF_FTYPE_V8SI_V8DF_QI:
37060 case V8DI_FTYPE_DI_V8DI_QI:
37061 case V16SF_FTYPE_V8SF_V16SF_HI:
37062 case V16SI_FTYPE_V8SI_V16SI_HI:
37063 case V16HI_FTYPE_V16HI_V16HI_HI:
37064 case V8HI_FTYPE_V16QI_V8HI_QI:
37065 case V16HI_FTYPE_V16QI_V16HI_HI:
37066 case V32HI_FTYPE_V32HI_V32HI_SI:
37067 case V32HI_FTYPE_V32QI_V32HI_SI:
37068 case V8DI_FTYPE_V16QI_V8DI_QI:
37069 case V8DI_FTYPE_V2DI_V8DI_QI:
37070 case V8DI_FTYPE_V4DI_V8DI_QI:
37071 case V8DI_FTYPE_V8DI_V8DI_QI:
37072 case V8DI_FTYPE_V8DI_V8DI_V8DI:
37073 case V8DI_FTYPE_V8HI_V8DI_QI:
37074 case V8DI_FTYPE_V8SI_V8DI_QI:
37075 case V8HI_FTYPE_V8DI_V8HI_QI:
37076 case V8SF_FTYPE_V8DF_V8SF_QI:
37077 case V8SI_FTYPE_V8DF_V8SI_QI:
37078 case V8SI_FTYPE_V8DI_V8SI_QI:
37079 case V4SI_FTYPE_V4SI_V4SI_V4SI:
37080 nargs = 3;
37081 break;
37082 case V32QI_FTYPE_V32QI_V32QI_INT:
37083 case V16HI_FTYPE_V16HI_V16HI_INT:
37084 case V16QI_FTYPE_V16QI_V16QI_INT:
37085 case V4DI_FTYPE_V4DI_V4DI_INT:
37086 case V8HI_FTYPE_V8HI_V8HI_INT:
37087 case V8SI_FTYPE_V8SI_V8SI_INT:
37088 case V8SI_FTYPE_V8SI_V4SI_INT:
37089 case V8SF_FTYPE_V8SF_V8SF_INT:
37090 case V8SF_FTYPE_V8SF_V4SF_INT:
37091 case V4SI_FTYPE_V4SI_V4SI_INT:
37092 case V4DF_FTYPE_V4DF_V4DF_INT:
37093 case V16SF_FTYPE_V16SF_V16SF_INT:
37094 case V16SF_FTYPE_V16SF_V4SF_INT:
37095 case V16SI_FTYPE_V16SI_V4SI_INT:
37096 case V4DF_FTYPE_V4DF_V2DF_INT:
37097 case V4SF_FTYPE_V4SF_V4SF_INT:
37098 case V2DI_FTYPE_V2DI_V2DI_INT:
37099 case V4DI_FTYPE_V4DI_V2DI_INT:
37100 case V2DF_FTYPE_V2DF_V2DF_INT:
37101 case QI_FTYPE_V8DI_V8DI_INT:
37102 case QI_FTYPE_V8DF_V8DF_INT:
37103 case QI_FTYPE_V2DF_V2DF_INT:
37104 case QI_FTYPE_V4SF_V4SF_INT:
37105 case HI_FTYPE_V16SI_V16SI_INT:
37106 case HI_FTYPE_V16SF_V16SF_INT:
37107 nargs = 3;
37108 nargs_constant = 1;
37109 break;
37110 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
37111 nargs = 3;
37112 rmode = V4DImode;
37113 nargs_constant = 1;
37114 break;
37115 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
37116 nargs = 3;
37117 rmode = V2DImode;
37118 nargs_constant = 1;
37119 break;
37120 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
37121 nargs = 3;
37122 rmode = DImode;
37123 nargs_constant = 1;
37124 break;
37125 case V2DI_FTYPE_V2DI_UINT_UINT:
37126 nargs = 3;
37127 nargs_constant = 2;
37128 break;
37129 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
37130 nargs = 3;
37131 rmode = V8DImode;
37132 nargs_constant = 1;
37133 break;
37134 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
37135 nargs = 5;
37136 rmode = V8DImode;
37137 mask_pos = 2;
37138 nargs_constant = 1;
37139 break;
37140 case QI_FTYPE_V8DF_INT_QI:
37141 case QI_FTYPE_V4DF_INT_QI:
37142 case QI_FTYPE_V2DF_INT_QI:
37143 case HI_FTYPE_V16SF_INT_HI:
37144 case QI_FTYPE_V8SF_INT_QI:
37145 case QI_FTYPE_V4SF_INT_QI:
37146 nargs = 3;
37147 mask_pos = 1;
37148 nargs_constant = 1;
37149 break;
37150 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
37151 nargs = 5;
37152 rmode = V4DImode;
37153 mask_pos = 2;
37154 nargs_constant = 1;
37155 break;
37156 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
37157 nargs = 5;
37158 rmode = V2DImode;
37159 mask_pos = 2;
37160 nargs_constant = 1;
37161 break;
37162 case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
37163 case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
37164 case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
37165 case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
37166 case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
37167 case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
37168 case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
37169 case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
37170 case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
37171 case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
37172 case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
37173 case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
37174 case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
37175 case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
37176 case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
37177 case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
37178 case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
37179 case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
37180 case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
37181 case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
37182 case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
37183 case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
37184 case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
37185 case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
37186 case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
37187 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
37188 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
37189 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
37190 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
37191 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
37192 case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
37193 case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
37194 case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
37195 case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
37196 case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
37197 case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
37198 case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
37199 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
37200 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
37201 case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
37202 case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
37203 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
37204 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
37205 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
37206 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
37207 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
37208 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
37209 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
37210 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
37211 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
37212 case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
37213 case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
37214 case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
37215 case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
37216 nargs = 4;
37217 break;
37218 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
37219 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
37220 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
37221 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
37222 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
37223 nargs = 4;
37224 nargs_constant = 1;
37225 break;
37226 case QI_FTYPE_V4DI_V4DI_INT_QI:
37227 case QI_FTYPE_V8SI_V8SI_INT_QI:
37228 case QI_FTYPE_V4DF_V4DF_INT_QI:
37229 case QI_FTYPE_V8SF_V8SF_INT_QI:
37230 case QI_FTYPE_V2DI_V2DI_INT_QI:
37231 case QI_FTYPE_V4SI_V4SI_INT_QI:
37232 case QI_FTYPE_V2DF_V2DF_INT_QI:
37233 case QI_FTYPE_V4SF_V4SF_INT_QI:
37234 case DI_FTYPE_V64QI_V64QI_INT_DI:
37235 case SI_FTYPE_V32QI_V32QI_INT_SI:
37236 case HI_FTYPE_V16QI_V16QI_INT_HI:
37237 case SI_FTYPE_V32HI_V32HI_INT_SI:
37238 case HI_FTYPE_V16HI_V16HI_INT_HI:
37239 case QI_FTYPE_V8HI_V8HI_INT_QI:
37240 nargs = 4;
37241 mask_pos = 1;
37242 nargs_constant = 1;
37243 break;
37244 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
37245 nargs = 4;
37246 nargs_constant = 2;
37247 break;
37248 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
37249 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
37250 nargs = 4;
37251 break;
37252 case QI_FTYPE_V8DI_V8DI_INT_QI:
37253 case HI_FTYPE_V16SI_V16SI_INT_HI:
37254 case QI_FTYPE_V8DF_V8DF_INT_QI:
37255 case HI_FTYPE_V16SF_V16SF_INT_HI:
37256 mask_pos = 1;
37257 nargs = 4;
37258 nargs_constant = 1;
37259 break;
37260 case V8SF_FTYPE_V8SF_INT_V8SF_QI:
37261 case V4SF_FTYPE_V4SF_INT_V4SF_QI:
37262 case V2DF_FTYPE_V4DF_INT_V2DF_QI:
37263 case V2DI_FTYPE_V4DI_INT_V2DI_QI:
37264 case V8SF_FTYPE_V16SF_INT_V8SF_QI:
37265 case V8SI_FTYPE_V16SI_INT_V8SI_QI:
37266 case V2DF_FTYPE_V8DF_INT_V2DF_QI:
37267 case V2DI_FTYPE_V8DI_INT_V2DI_QI:
37268 case V4SF_FTYPE_V8SF_INT_V4SF_QI:
37269 case V4SI_FTYPE_V8SI_INT_V4SI_QI:
37270 case V8HI_FTYPE_V8SF_INT_V8HI_QI:
37271 case V8HI_FTYPE_V4SF_INT_V8HI_QI:
37272 case V32HI_FTYPE_V32HI_INT_V32HI_SI:
37273 case V16HI_FTYPE_V16HI_INT_V16HI_HI:
37274 case V8HI_FTYPE_V8HI_INT_V8HI_QI:
37275 case V4DI_FTYPE_V4DI_INT_V4DI_QI:
37276 case V2DI_FTYPE_V2DI_INT_V2DI_QI:
37277 case V8SI_FTYPE_V8SI_INT_V8SI_QI:
37278 case V4SI_FTYPE_V4SI_INT_V4SI_QI:
37279 case V4DF_FTYPE_V4DF_INT_V4DF_QI:
37280 case V2DF_FTYPE_V2DF_INT_V2DF_QI:
37281 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
37282 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
37283 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
37284 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
37285 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
37286 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
37287 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
37288 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
37289 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
37290 nargs = 4;
37291 mask_pos = 2;
37292 nargs_constant = 1;
37293 break;
37294 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
37295 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
37296 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
37297 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
37298 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
37299 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
37300 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
37301 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
37302 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
37303 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
37304 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
37305 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
37306 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
37307 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
37308 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
37309 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
37310 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
37311 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
37312 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
37313 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
37314 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
37315 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
37316 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
37317 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
37318 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
37319 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
37320 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
37321 nargs = 5;
37322 mask_pos = 2;
37323 nargs_constant = 1;
37324 break;
37325 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
37326 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
37327 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
37328 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
37329 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
37330 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
37331 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
37332 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
37333 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
37334 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
37335 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
37336 nargs = 5;
37337 nargs = 5;
37338 mask_pos = 1;
37339 nargs_constant = 1;
37340 break;
37342 default:
37343 gcc_unreachable ();
37346 gcc_assert (nargs <= ARRAY_SIZE (args));
37348 if (comparison != UNKNOWN)
37350 gcc_assert (nargs == 2);
37351 return ix86_expand_sse_compare (d, exp, target, swap);
37354 if (rmode == VOIDmode || rmode == tmode)
37356 if (optimize
37357 || target == 0
37358 || GET_MODE (target) != tmode
37359 || !insn_p->operand[0].predicate (target, tmode))
37360 target = gen_reg_rtx (tmode);
37361 real_target = target;
37363 else
37365 real_target = gen_reg_rtx (tmode);
37366 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
37369 for (i = 0; i < nargs; i++)
37371 tree arg = CALL_EXPR_ARG (exp, i);
37372 rtx op = expand_normal (arg);
37373 machine_mode mode = insn_p->operand[i + 1].mode;
37374 bool match = insn_p->operand[i + 1].predicate (op, mode);
37376 if (last_arg_count && (i + 1) == nargs)
37378 /* SIMD shift insns take either an 8-bit immediate or
37379 register as count. But builtin functions take int as
37380 count. If count doesn't match, we put it in register. */
37381 if (!match)
37383 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
37384 if (!insn_p->operand[i + 1].predicate (op, mode))
37385 op = copy_to_reg (op);
37388 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37389 (!mask_pos && (nargs - i) <= nargs_constant))
37391 if (!match)
37392 switch (icode)
37394 case CODE_FOR_avx_vinsertf128v4di:
37395 case CODE_FOR_avx_vextractf128v4di:
37396 error ("the last argument must be an 1-bit immediate");
37397 return const0_rtx;
37399 case CODE_FOR_avx512f_cmpv8di3_mask:
37400 case CODE_FOR_avx512f_cmpv16si3_mask:
37401 case CODE_FOR_avx512f_ucmpv8di3_mask:
37402 case CODE_FOR_avx512f_ucmpv16si3_mask:
37403 case CODE_FOR_avx512vl_cmpv4di3_mask:
37404 case CODE_FOR_avx512vl_cmpv8si3_mask:
37405 case CODE_FOR_avx512vl_ucmpv4di3_mask:
37406 case CODE_FOR_avx512vl_ucmpv8si3_mask:
37407 case CODE_FOR_avx512vl_cmpv2di3_mask:
37408 case CODE_FOR_avx512vl_cmpv4si3_mask:
37409 case CODE_FOR_avx512vl_ucmpv2di3_mask:
37410 case CODE_FOR_avx512vl_ucmpv4si3_mask:
37411 error ("the last argument must be a 3-bit immediate");
37412 return const0_rtx;
37414 case CODE_FOR_sse4_1_roundsd:
37415 case CODE_FOR_sse4_1_roundss:
37417 case CODE_FOR_sse4_1_roundpd:
37418 case CODE_FOR_sse4_1_roundps:
37419 case CODE_FOR_avx_roundpd256:
37420 case CODE_FOR_avx_roundps256:
37422 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
37423 case CODE_FOR_sse4_1_roundps_sfix:
37424 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
37425 case CODE_FOR_avx_roundps_sfix256:
37427 case CODE_FOR_sse4_1_blendps:
37428 case CODE_FOR_avx_blendpd256:
37429 case CODE_FOR_avx_vpermilv4df:
37430 case CODE_FOR_avx_vpermilv4df_mask:
37431 case CODE_FOR_avx512f_getmantv8df_mask:
37432 case CODE_FOR_avx512f_getmantv16sf_mask:
37433 case CODE_FOR_avx512vl_getmantv8sf_mask:
37434 case CODE_FOR_avx512vl_getmantv4df_mask:
37435 case CODE_FOR_avx512vl_getmantv4sf_mask:
37436 case CODE_FOR_avx512vl_getmantv2df_mask:
37437 case CODE_FOR_avx512dq_rangepv8df_mask_round:
37438 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
37439 case CODE_FOR_avx512dq_rangepv4df_mask:
37440 case CODE_FOR_avx512dq_rangepv8sf_mask:
37441 case CODE_FOR_avx512dq_rangepv2df_mask:
37442 case CODE_FOR_avx512dq_rangepv4sf_mask:
37443 case CODE_FOR_avx_shufpd256_mask:
37444 error ("the last argument must be a 4-bit immediate");
37445 return const0_rtx;
37447 case CODE_FOR_sha1rnds4:
37448 case CODE_FOR_sse4_1_blendpd:
37449 case CODE_FOR_avx_vpermilv2df:
37450 case CODE_FOR_avx_vpermilv2df_mask:
37451 case CODE_FOR_xop_vpermil2v2df3:
37452 case CODE_FOR_xop_vpermil2v4sf3:
37453 case CODE_FOR_xop_vpermil2v4df3:
37454 case CODE_FOR_xop_vpermil2v8sf3:
37455 case CODE_FOR_avx512f_vinsertf32x4_mask:
37456 case CODE_FOR_avx512f_vinserti32x4_mask:
37457 case CODE_FOR_avx512f_vextractf32x4_mask:
37458 case CODE_FOR_avx512f_vextracti32x4_mask:
37459 case CODE_FOR_sse2_shufpd:
37460 case CODE_FOR_sse2_shufpd_mask:
37461 case CODE_FOR_avx512dq_shuf_f64x2_mask:
37462 case CODE_FOR_avx512dq_shuf_i64x2_mask:
37463 case CODE_FOR_avx512vl_shuf_i32x4_mask:
37464 case CODE_FOR_avx512vl_shuf_f32x4_mask:
37465 error ("the last argument must be a 2-bit immediate");
37466 return const0_rtx;
37468 case CODE_FOR_avx_vextractf128v4df:
37469 case CODE_FOR_avx_vextractf128v8sf:
37470 case CODE_FOR_avx_vextractf128v8si:
37471 case CODE_FOR_avx_vinsertf128v4df:
37472 case CODE_FOR_avx_vinsertf128v8sf:
37473 case CODE_FOR_avx_vinsertf128v8si:
37474 case CODE_FOR_avx512f_vinsertf64x4_mask:
37475 case CODE_FOR_avx512f_vinserti64x4_mask:
37476 case CODE_FOR_avx512f_vextractf64x4_mask:
37477 case CODE_FOR_avx512f_vextracti64x4_mask:
37478 case CODE_FOR_avx512dq_vinsertf32x8_mask:
37479 case CODE_FOR_avx512dq_vinserti32x8_mask:
37480 case CODE_FOR_avx512vl_vinsertv4df:
37481 case CODE_FOR_avx512vl_vinsertv4di:
37482 case CODE_FOR_avx512vl_vinsertv8sf:
37483 case CODE_FOR_avx512vl_vinsertv8si:
37484 error ("the last argument must be a 1-bit immediate");
37485 return const0_rtx;
37487 case CODE_FOR_avx_vmcmpv2df3:
37488 case CODE_FOR_avx_vmcmpv4sf3:
37489 case CODE_FOR_avx_cmpv2df3:
37490 case CODE_FOR_avx_cmpv4sf3:
37491 case CODE_FOR_avx_cmpv4df3:
37492 case CODE_FOR_avx_cmpv8sf3:
37493 case CODE_FOR_avx512f_cmpv8df3_mask:
37494 case CODE_FOR_avx512f_cmpv16sf3_mask:
37495 case CODE_FOR_avx512f_vmcmpv2df3_mask:
37496 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
37497 error ("the last argument must be a 5-bit immediate");
37498 return const0_rtx;
37500 default:
37501 switch (nargs_constant)
37503 case 2:
37504 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37505 (!mask_pos && (nargs - i) == nargs_constant))
37507 error ("the next to last argument must be an 8-bit immediate");
37508 break;
37510 case 1:
37511 error ("the last argument must be an 8-bit immediate");
37512 break;
37513 default:
37514 gcc_unreachable ();
37516 return const0_rtx;
37519 else
37521 if (VECTOR_MODE_P (mode))
37522 op = safe_vector_operand (op, mode);
37524 /* If we aren't optimizing, only allow one memory operand to
37525 be generated. */
37526 if (memory_operand (op, mode))
37527 num_memory++;
37529 op = fixup_modeless_constant (op, mode);
37531 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37533 if (optimize || !match || num_memory > 1)
37534 op = copy_to_mode_reg (mode, op);
37536 else
37538 op = copy_to_reg (op);
37539 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37543 args[i].op = op;
37544 args[i].mode = mode;
37547 switch (nargs)
37549 case 1:
37550 pat = GEN_FCN (icode) (real_target, args[0].op);
37551 break;
37552 case 2:
37553 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
37554 break;
37555 case 3:
37556 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37557 args[2].op);
37558 break;
37559 case 4:
37560 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37561 args[2].op, args[3].op);
37562 break;
37563 case 5:
37564 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37565 args[2].op, args[3].op, args[4].op);
37566 case 6:
37567 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37568 args[2].op, args[3].op, args[4].op,
37569 args[5].op);
37570 break;
37571 default:
37572 gcc_unreachable ();
37575 if (! pat)
37576 return 0;
37578 emit_insn (pat);
37579 return target;
37582 /* Transform pattern of following layout:
37583 (parallel [
37584 set (A B)
37585 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
37587 into:
37588 (set (A B))
37591 (parallel [ A B
37593 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
37596 into:
37597 (parallel [ A B ... ]) */
37599 static rtx
37600 ix86_erase_embedded_rounding (rtx pat)
37602 if (GET_CODE (pat) == INSN)
37603 pat = PATTERN (pat);
37605 gcc_assert (GET_CODE (pat) == PARALLEL);
37607 if (XVECLEN (pat, 0) == 2)
37609 rtx p0 = XVECEXP (pat, 0, 0);
37610 rtx p1 = XVECEXP (pat, 0, 1);
37612 gcc_assert (GET_CODE (p0) == SET
37613 && GET_CODE (p1) == UNSPEC
37614 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
37616 return p0;
37618 else
37620 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
37621 int i = 0;
37622 int j = 0;
37624 for (; i < XVECLEN (pat, 0); ++i)
37626 rtx elem = XVECEXP (pat, 0, i);
37627 if (GET_CODE (elem) != UNSPEC
37628 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
37629 res [j++] = elem;
37632 /* No more than 1 occurence was removed. */
37633 gcc_assert (j >= XVECLEN (pat, 0) - 1);
37635 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
37639 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
37640 with rounding. */
37641 static rtx
37642 ix86_expand_sse_comi_round (const struct builtin_description *d,
37643 tree exp, rtx target)
37645 rtx pat, set_dst;
37646 tree arg0 = CALL_EXPR_ARG (exp, 0);
37647 tree arg1 = CALL_EXPR_ARG (exp, 1);
37648 tree arg2 = CALL_EXPR_ARG (exp, 2);
37649 tree arg3 = CALL_EXPR_ARG (exp, 3);
37650 rtx op0 = expand_normal (arg0);
37651 rtx op1 = expand_normal (arg1);
37652 rtx op2 = expand_normal (arg2);
37653 rtx op3 = expand_normal (arg3);
37654 enum insn_code icode = d->icode;
37655 const struct insn_data_d *insn_p = &insn_data[icode];
37656 machine_mode mode0 = insn_p->operand[0].mode;
37657 machine_mode mode1 = insn_p->operand[1].mode;
37658 enum rtx_code comparison = UNEQ;
37659 bool need_ucomi = false;
37661 /* See avxintrin.h for values. */
37662 enum rtx_code comi_comparisons[32] =
37664 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
37665 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
37666 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
37668 bool need_ucomi_values[32] =
37670 true, false, false, true, true, false, false, true,
37671 true, false, false, true, true, false, false, true,
37672 false, true, true, false, false, true, true, false,
37673 false, true, true, false, false, true, true, false
37676 if (!CONST_INT_P (op2))
37678 error ("the third argument must be comparison constant");
37679 return const0_rtx;
37681 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
37683 error ("incorrect comparison mode");
37684 return const0_rtx;
37687 if (!insn_p->operand[2].predicate (op3, SImode))
37689 error ("incorrect rounding operand");
37690 return const0_rtx;
37693 comparison = comi_comparisons[INTVAL (op2)];
37694 need_ucomi = need_ucomi_values[INTVAL (op2)];
37696 if (VECTOR_MODE_P (mode0))
37697 op0 = safe_vector_operand (op0, mode0);
37698 if (VECTOR_MODE_P (mode1))
37699 op1 = safe_vector_operand (op1, mode1);
37701 target = gen_reg_rtx (SImode);
37702 emit_move_insn (target, const0_rtx);
37703 target = gen_rtx_SUBREG (QImode, target, 0);
37705 if ((optimize && !register_operand (op0, mode0))
37706 || !insn_p->operand[0].predicate (op0, mode0))
37707 op0 = copy_to_mode_reg (mode0, op0);
37708 if ((optimize && !register_operand (op1, mode1))
37709 || !insn_p->operand[1].predicate (op1, mode1))
37710 op1 = copy_to_mode_reg (mode1, op1);
37712 if (need_ucomi)
37713 icode = icode == CODE_FOR_sse_comi_round
37714 ? CODE_FOR_sse_ucomi_round
37715 : CODE_FOR_sse2_ucomi_round;
37717 pat = GEN_FCN (icode) (op0, op1, op3);
37718 if (! pat)
37719 return 0;
37721 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
37722 if (INTVAL (op3) == NO_ROUND)
37724 pat = ix86_erase_embedded_rounding (pat);
37725 if (! pat)
37726 return 0;
37728 set_dst = SET_DEST (pat);
37730 else
37732 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
37733 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
37736 emit_insn (pat);
37737 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37738 gen_rtx_fmt_ee (comparison, QImode,
37739 set_dst,
37740 const0_rtx)));
37742 return SUBREG_REG (target);
37745 static rtx
37746 ix86_expand_round_builtin (const struct builtin_description *d,
37747 tree exp, rtx target)
37749 rtx pat;
37750 unsigned int i, nargs;
37751 struct
37753 rtx op;
37754 machine_mode mode;
37755 } args[6];
37756 enum insn_code icode = d->icode;
37757 const struct insn_data_d *insn_p = &insn_data[icode];
37758 machine_mode tmode = insn_p->operand[0].mode;
37759 unsigned int nargs_constant = 0;
37760 unsigned int redundant_embed_rnd = 0;
37762 switch ((enum ix86_builtin_func_type) d->flag)
37764 case UINT64_FTYPE_V2DF_INT:
37765 case UINT64_FTYPE_V4SF_INT:
37766 case UINT_FTYPE_V2DF_INT:
37767 case UINT_FTYPE_V4SF_INT:
37768 case INT64_FTYPE_V2DF_INT:
37769 case INT64_FTYPE_V4SF_INT:
37770 case INT_FTYPE_V2DF_INT:
37771 case INT_FTYPE_V4SF_INT:
37772 nargs = 2;
37773 break;
37774 case V4SF_FTYPE_V4SF_UINT_INT:
37775 case V4SF_FTYPE_V4SF_UINT64_INT:
37776 case V2DF_FTYPE_V2DF_UINT64_INT:
37777 case V4SF_FTYPE_V4SF_INT_INT:
37778 case V4SF_FTYPE_V4SF_INT64_INT:
37779 case V2DF_FTYPE_V2DF_INT64_INT:
37780 case V4SF_FTYPE_V4SF_V4SF_INT:
37781 case V2DF_FTYPE_V2DF_V2DF_INT:
37782 case V4SF_FTYPE_V4SF_V2DF_INT:
37783 case V2DF_FTYPE_V2DF_V4SF_INT:
37784 nargs = 3;
37785 break;
37786 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
37787 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
37788 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
37789 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
37790 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
37791 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
37792 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
37793 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
37794 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
37795 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
37796 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
37797 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
37798 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
37799 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
37800 nargs = 4;
37801 break;
37802 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
37803 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
37804 nargs_constant = 2;
37805 nargs = 4;
37806 break;
37807 case INT_FTYPE_V4SF_V4SF_INT_INT:
37808 case INT_FTYPE_V2DF_V2DF_INT_INT:
37809 return ix86_expand_sse_comi_round (d, exp, target);
37810 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
37811 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
37812 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
37813 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
37814 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
37815 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
37816 nargs = 5;
37817 break;
37818 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
37819 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
37820 nargs_constant = 4;
37821 nargs = 5;
37822 break;
37823 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
37824 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
37825 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
37826 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
37827 nargs_constant = 3;
37828 nargs = 5;
37829 break;
37830 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
37831 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
37832 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
37833 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
37834 nargs = 6;
37835 nargs_constant = 4;
37836 break;
37837 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
37838 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
37839 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
37840 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
37841 nargs = 6;
37842 nargs_constant = 3;
37843 break;
37844 default:
37845 gcc_unreachable ();
37847 gcc_assert (nargs <= ARRAY_SIZE (args));
37849 if (optimize
37850 || target == 0
37851 || GET_MODE (target) != tmode
37852 || !insn_p->operand[0].predicate (target, tmode))
37853 target = gen_reg_rtx (tmode);
37855 for (i = 0; i < nargs; i++)
37857 tree arg = CALL_EXPR_ARG (exp, i);
37858 rtx op = expand_normal (arg);
37859 machine_mode mode = insn_p->operand[i + 1].mode;
37860 bool match = insn_p->operand[i + 1].predicate (op, mode);
37862 if (i == nargs - nargs_constant)
37864 if (!match)
37866 switch (icode)
37868 case CODE_FOR_avx512f_getmantv8df_mask_round:
37869 case CODE_FOR_avx512f_getmantv16sf_mask_round:
37870 case CODE_FOR_avx512f_vgetmantv2df_round:
37871 case CODE_FOR_avx512f_vgetmantv4sf_round:
37872 error ("the immediate argument must be a 4-bit immediate");
37873 return const0_rtx;
37874 case CODE_FOR_avx512f_cmpv8df3_mask_round:
37875 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
37876 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
37877 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
37878 error ("the immediate argument must be a 5-bit immediate");
37879 return const0_rtx;
37880 default:
37881 error ("the immediate argument must be an 8-bit immediate");
37882 return const0_rtx;
37886 else if (i == nargs-1)
37888 if (!insn_p->operand[nargs].predicate (op, SImode))
37890 error ("incorrect rounding operand");
37891 return const0_rtx;
37894 /* If there is no rounding use normal version of the pattern. */
37895 if (INTVAL (op) == NO_ROUND)
37896 redundant_embed_rnd = 1;
37898 else
37900 if (VECTOR_MODE_P (mode))
37901 op = safe_vector_operand (op, mode);
37903 op = fixup_modeless_constant (op, mode);
37905 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37907 if (optimize || !match)
37908 op = copy_to_mode_reg (mode, op);
37910 else
37912 op = copy_to_reg (op);
37913 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37917 args[i].op = op;
37918 args[i].mode = mode;
37921 switch (nargs)
37923 case 1:
37924 pat = GEN_FCN (icode) (target, args[0].op);
37925 break;
37926 case 2:
37927 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37928 break;
37929 case 3:
37930 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37931 args[2].op);
37932 break;
37933 case 4:
37934 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37935 args[2].op, args[3].op);
37936 break;
37937 case 5:
37938 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37939 args[2].op, args[3].op, args[4].op);
37940 case 6:
37941 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37942 args[2].op, args[3].op, args[4].op,
37943 args[5].op);
37944 break;
37945 default:
37946 gcc_unreachable ();
37949 if (!pat)
37950 return 0;
37952 if (redundant_embed_rnd)
37953 pat = ix86_erase_embedded_rounding (pat);
37955 emit_insn (pat);
37956 return target;
37959 /* Subroutine of ix86_expand_builtin to take care of special insns
37960 with variable number of operands. */
37962 static rtx
37963 ix86_expand_special_args_builtin (const struct builtin_description *d,
37964 tree exp, rtx target)
37966 tree arg;
37967 rtx pat, op;
37968 unsigned int i, nargs, arg_adjust, memory;
37969 bool aligned_mem = false;
37970 struct
37972 rtx op;
37973 machine_mode mode;
37974 } args[3];
37975 enum insn_code icode = d->icode;
37976 bool last_arg_constant = false;
37977 const struct insn_data_d *insn_p = &insn_data[icode];
37978 machine_mode tmode = insn_p->operand[0].mode;
37979 enum { load, store } klass;
37981 switch ((enum ix86_builtin_func_type) d->flag)
37983 case VOID_FTYPE_VOID:
37984 emit_insn (GEN_FCN (icode) (target));
37985 return 0;
37986 case VOID_FTYPE_UINT64:
37987 case VOID_FTYPE_UNSIGNED:
37988 nargs = 0;
37989 klass = store;
37990 memory = 0;
37991 break;
37993 case INT_FTYPE_VOID:
37994 case USHORT_FTYPE_VOID:
37995 case UINT64_FTYPE_VOID:
37996 case UNSIGNED_FTYPE_VOID:
37997 nargs = 0;
37998 klass = load;
37999 memory = 0;
38000 break;
38001 case UINT64_FTYPE_PUNSIGNED:
38002 case V2DI_FTYPE_PV2DI:
38003 case V4DI_FTYPE_PV4DI:
38004 case V32QI_FTYPE_PCCHAR:
38005 case V16QI_FTYPE_PCCHAR:
38006 case V8SF_FTYPE_PCV4SF:
38007 case V8SF_FTYPE_PCFLOAT:
38008 case V4SF_FTYPE_PCFLOAT:
38009 case V4DF_FTYPE_PCV2DF:
38010 case V4DF_FTYPE_PCDOUBLE:
38011 case V2DF_FTYPE_PCDOUBLE:
38012 case VOID_FTYPE_PVOID:
38013 case V16SI_FTYPE_PV4SI:
38014 case V16SF_FTYPE_PV4SF:
38015 case V8DI_FTYPE_PV4DI:
38016 case V8DI_FTYPE_PV8DI:
38017 case V8DF_FTYPE_PV4DF:
38018 nargs = 1;
38019 klass = load;
38020 memory = 0;
38021 switch (icode)
38023 case CODE_FOR_sse4_1_movntdqa:
38024 case CODE_FOR_avx2_movntdqa:
38025 case CODE_FOR_avx512f_movntdqa:
38026 aligned_mem = true;
38027 break;
38028 default:
38029 break;
38031 break;
38032 case VOID_FTYPE_PV2SF_V4SF:
38033 case VOID_FTYPE_PV8DI_V8DI:
38034 case VOID_FTYPE_PV4DI_V4DI:
38035 case VOID_FTYPE_PV2DI_V2DI:
38036 case VOID_FTYPE_PCHAR_V32QI:
38037 case VOID_FTYPE_PCHAR_V16QI:
38038 case VOID_FTYPE_PFLOAT_V16SF:
38039 case VOID_FTYPE_PFLOAT_V8SF:
38040 case VOID_FTYPE_PFLOAT_V4SF:
38041 case VOID_FTYPE_PDOUBLE_V8DF:
38042 case VOID_FTYPE_PDOUBLE_V4DF:
38043 case VOID_FTYPE_PDOUBLE_V2DF:
38044 case VOID_FTYPE_PLONGLONG_LONGLONG:
38045 case VOID_FTYPE_PULONGLONG_ULONGLONG:
38046 case VOID_FTYPE_PINT_INT:
38047 nargs = 1;
38048 klass = store;
38049 /* Reserve memory operand for target. */
38050 memory = ARRAY_SIZE (args);
38051 switch (icode)
38053 /* These builtins and instructions require the memory
38054 to be properly aligned. */
38055 case CODE_FOR_avx_movntv4di:
38056 case CODE_FOR_sse2_movntv2di:
38057 case CODE_FOR_avx_movntv8sf:
38058 case CODE_FOR_sse_movntv4sf:
38059 case CODE_FOR_sse4a_vmmovntv4sf:
38060 case CODE_FOR_avx_movntv4df:
38061 case CODE_FOR_sse2_movntv2df:
38062 case CODE_FOR_sse4a_vmmovntv2df:
38063 case CODE_FOR_sse2_movntidi:
38064 case CODE_FOR_sse_movntq:
38065 case CODE_FOR_sse2_movntisi:
38066 case CODE_FOR_avx512f_movntv16sf:
38067 case CODE_FOR_avx512f_movntv8df:
38068 case CODE_FOR_avx512f_movntv8di:
38069 aligned_mem = true;
38070 break;
38071 default:
38072 break;
38074 break;
38075 case V4SF_FTYPE_V4SF_PCV2SF:
38076 case V2DF_FTYPE_V2DF_PCDOUBLE:
38077 nargs = 2;
38078 klass = load;
38079 memory = 1;
38080 break;
38081 case V8SF_FTYPE_PCV8SF_V8SI:
38082 case V4DF_FTYPE_PCV4DF_V4DI:
38083 case V4SF_FTYPE_PCV4SF_V4SI:
38084 case V2DF_FTYPE_PCV2DF_V2DI:
38085 case V8SI_FTYPE_PCV8SI_V8SI:
38086 case V4DI_FTYPE_PCV4DI_V4DI:
38087 case V4SI_FTYPE_PCV4SI_V4SI:
38088 case V2DI_FTYPE_PCV2DI_V2DI:
38089 nargs = 2;
38090 klass = load;
38091 memory = 0;
38092 break;
38093 case VOID_FTYPE_PV8DF_V8DF_QI:
38094 case VOID_FTYPE_PV16SF_V16SF_HI:
38095 case VOID_FTYPE_PV8DI_V8DI_QI:
38096 case VOID_FTYPE_PV4DI_V4DI_QI:
38097 case VOID_FTYPE_PV2DI_V2DI_QI:
38098 case VOID_FTYPE_PV16SI_V16SI_HI:
38099 case VOID_FTYPE_PV8SI_V8SI_QI:
38100 case VOID_FTYPE_PV4SI_V4SI_QI:
38101 switch (icode)
38103 /* These builtins and instructions require the memory
38104 to be properly aligned. */
38105 case CODE_FOR_avx512f_storev16sf_mask:
38106 case CODE_FOR_avx512f_storev16si_mask:
38107 case CODE_FOR_avx512f_storev8df_mask:
38108 case CODE_FOR_avx512f_storev8di_mask:
38109 case CODE_FOR_avx512vl_storev8sf_mask:
38110 case CODE_FOR_avx512vl_storev8si_mask:
38111 case CODE_FOR_avx512vl_storev4df_mask:
38112 case CODE_FOR_avx512vl_storev4di_mask:
38113 case CODE_FOR_avx512vl_storev4sf_mask:
38114 case CODE_FOR_avx512vl_storev4si_mask:
38115 case CODE_FOR_avx512vl_storev2df_mask:
38116 case CODE_FOR_avx512vl_storev2di_mask:
38117 aligned_mem = true;
38118 break;
38119 default:
38120 break;
38122 /* FALLTHRU */
38123 case VOID_FTYPE_PV8SF_V8SI_V8SF:
38124 case VOID_FTYPE_PV4DF_V4DI_V4DF:
38125 case VOID_FTYPE_PV4SF_V4SI_V4SF:
38126 case VOID_FTYPE_PV2DF_V2DI_V2DF:
38127 case VOID_FTYPE_PV8SI_V8SI_V8SI:
38128 case VOID_FTYPE_PV4DI_V4DI_V4DI:
38129 case VOID_FTYPE_PV4SI_V4SI_V4SI:
38130 case VOID_FTYPE_PV2DI_V2DI_V2DI:
38131 case VOID_FTYPE_PDOUBLE_V2DF_QI:
38132 case VOID_FTYPE_PFLOAT_V4SF_QI:
38133 case VOID_FTYPE_PV8SI_V8DI_QI:
38134 case VOID_FTYPE_PV8HI_V8DI_QI:
38135 case VOID_FTYPE_PV16HI_V16SI_HI:
38136 case VOID_FTYPE_PV16QI_V8DI_QI:
38137 case VOID_FTYPE_PV16QI_V16SI_HI:
38138 case VOID_FTYPE_PV4SI_V4DI_QI:
38139 case VOID_FTYPE_PV4SI_V2DI_QI:
38140 case VOID_FTYPE_PV8HI_V4DI_QI:
38141 case VOID_FTYPE_PV8HI_V2DI_QI:
38142 case VOID_FTYPE_PV8HI_V8SI_QI:
38143 case VOID_FTYPE_PV8HI_V4SI_QI:
38144 case VOID_FTYPE_PV16QI_V4DI_QI:
38145 case VOID_FTYPE_PV16QI_V2DI_QI:
38146 case VOID_FTYPE_PV16QI_V8SI_QI:
38147 case VOID_FTYPE_PV16QI_V4SI_QI:
38148 case VOID_FTYPE_PV8HI_V8HI_QI:
38149 case VOID_FTYPE_PV16HI_V16HI_HI:
38150 case VOID_FTYPE_PV32HI_V32HI_SI:
38151 case VOID_FTYPE_PV16QI_V16QI_HI:
38152 case VOID_FTYPE_PV32QI_V32QI_SI:
38153 case VOID_FTYPE_PV64QI_V64QI_DI:
38154 case VOID_FTYPE_PV4DF_V4DF_QI:
38155 case VOID_FTYPE_PV2DF_V2DF_QI:
38156 case VOID_FTYPE_PV8SF_V8SF_QI:
38157 case VOID_FTYPE_PV4SF_V4SF_QI:
38158 nargs = 2;
38159 klass = store;
38160 /* Reserve memory operand for target. */
38161 memory = ARRAY_SIZE (args);
38162 break;
38163 case V4SF_FTYPE_PCV4SF_V4SF_QI:
38164 case V8SF_FTYPE_PCV8SF_V8SF_QI:
38165 case V16SF_FTYPE_PCV16SF_V16SF_HI:
38166 case V4SI_FTYPE_PCV4SI_V4SI_QI:
38167 case V8SI_FTYPE_PCV8SI_V8SI_QI:
38168 case V16SI_FTYPE_PCV16SI_V16SI_HI:
38169 case V2DF_FTYPE_PCV2DF_V2DF_QI:
38170 case V4DF_FTYPE_PCV4DF_V4DF_QI:
38171 case V8DF_FTYPE_PCV8DF_V8DF_QI:
38172 case V2DI_FTYPE_PCV2DI_V2DI_QI:
38173 case V4DI_FTYPE_PCV4DI_V4DI_QI:
38174 case V8DI_FTYPE_PCV8DI_V8DI_QI:
38175 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
38176 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
38177 case V8HI_FTYPE_PCV8HI_V8HI_QI:
38178 case V16HI_FTYPE_PCV16HI_V16HI_HI:
38179 case V32HI_FTYPE_PCV32HI_V32HI_SI:
38180 case V16QI_FTYPE_PCV16QI_V16QI_HI:
38181 case V32QI_FTYPE_PCV32QI_V32QI_SI:
38182 case V64QI_FTYPE_PCV64QI_V64QI_DI:
38183 nargs = 3;
38184 klass = load;
38185 memory = 0;
38186 switch (icode)
38188 /* These builtins and instructions require the memory
38189 to be properly aligned. */
38190 case CODE_FOR_avx512f_loadv16sf_mask:
38191 case CODE_FOR_avx512f_loadv16si_mask:
38192 case CODE_FOR_avx512f_loadv8df_mask:
38193 case CODE_FOR_avx512f_loadv8di_mask:
38194 case CODE_FOR_avx512vl_loadv8sf_mask:
38195 case CODE_FOR_avx512vl_loadv8si_mask:
38196 case CODE_FOR_avx512vl_loadv4df_mask:
38197 case CODE_FOR_avx512vl_loadv4di_mask:
38198 case CODE_FOR_avx512vl_loadv4sf_mask:
38199 case CODE_FOR_avx512vl_loadv4si_mask:
38200 case CODE_FOR_avx512vl_loadv2df_mask:
38201 case CODE_FOR_avx512vl_loadv2di_mask:
38202 case CODE_FOR_avx512bw_loadv64qi_mask:
38203 case CODE_FOR_avx512vl_loadv32qi_mask:
38204 case CODE_FOR_avx512vl_loadv16qi_mask:
38205 case CODE_FOR_avx512bw_loadv32hi_mask:
38206 case CODE_FOR_avx512vl_loadv16hi_mask:
38207 case CODE_FOR_avx512vl_loadv8hi_mask:
38208 aligned_mem = true;
38209 break;
38210 default:
38211 break;
38213 break;
38214 case VOID_FTYPE_UINT_UINT_UINT:
38215 case VOID_FTYPE_UINT64_UINT_UINT:
38216 case UCHAR_FTYPE_UINT_UINT_UINT:
38217 case UCHAR_FTYPE_UINT64_UINT_UINT:
38218 nargs = 3;
38219 klass = load;
38220 memory = ARRAY_SIZE (args);
38221 last_arg_constant = true;
38222 break;
38223 default:
38224 gcc_unreachable ();
38227 gcc_assert (nargs <= ARRAY_SIZE (args));
38229 if (klass == store)
38231 arg = CALL_EXPR_ARG (exp, 0);
38232 op = expand_normal (arg);
38233 gcc_assert (target == 0);
38234 if (memory)
38236 op = ix86_zero_extend_to_Pmode (op);
38237 target = gen_rtx_MEM (tmode, op);
38238 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
38239 on it. Try to improve it using get_pointer_alignment,
38240 and if the special builtin is one that requires strict
38241 mode alignment, also from it's GET_MODE_ALIGNMENT.
38242 Failure to do so could lead to ix86_legitimate_combined_insn
38243 rejecting all changes to such insns. */
38244 unsigned int align = get_pointer_alignment (arg);
38245 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
38246 align = GET_MODE_ALIGNMENT (tmode);
38247 if (MEM_ALIGN (target) < align)
38248 set_mem_align (target, align);
38250 else
38251 target = force_reg (tmode, op);
38252 arg_adjust = 1;
38254 else
38256 arg_adjust = 0;
38257 if (optimize
38258 || target == 0
38259 || !register_operand (target, tmode)
38260 || GET_MODE (target) != tmode)
38261 target = gen_reg_rtx (tmode);
38264 for (i = 0; i < nargs; i++)
38266 machine_mode mode = insn_p->operand[i + 1].mode;
38267 bool match;
38269 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
38270 op = expand_normal (arg);
38271 match = insn_p->operand[i + 1].predicate (op, mode);
38273 if (last_arg_constant && (i + 1) == nargs)
38275 if (!match)
38277 if (icode == CODE_FOR_lwp_lwpvalsi3
38278 || icode == CODE_FOR_lwp_lwpinssi3
38279 || icode == CODE_FOR_lwp_lwpvaldi3
38280 || icode == CODE_FOR_lwp_lwpinsdi3)
38281 error ("the last argument must be a 32-bit immediate");
38282 else
38283 error ("the last argument must be an 8-bit immediate");
38284 return const0_rtx;
38287 else
38289 if (i == memory)
38291 /* This must be the memory operand. */
38292 op = ix86_zero_extend_to_Pmode (op);
38293 op = gen_rtx_MEM (mode, op);
38294 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
38295 on it. Try to improve it using get_pointer_alignment,
38296 and if the special builtin is one that requires strict
38297 mode alignment, also from it's GET_MODE_ALIGNMENT.
38298 Failure to do so could lead to ix86_legitimate_combined_insn
38299 rejecting all changes to such insns. */
38300 unsigned int align = get_pointer_alignment (arg);
38301 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
38302 align = GET_MODE_ALIGNMENT (mode);
38303 if (MEM_ALIGN (op) < align)
38304 set_mem_align (op, align);
38306 else
38308 /* This must be register. */
38309 if (VECTOR_MODE_P (mode))
38310 op = safe_vector_operand (op, mode);
38312 op = fixup_modeless_constant (op, mode);
38314 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38315 op = copy_to_mode_reg (mode, op);
38316 else
38318 op = copy_to_reg (op);
38319 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38324 args[i].op = op;
38325 args[i].mode = mode;
38328 switch (nargs)
38330 case 0:
38331 pat = GEN_FCN (icode) (target);
38332 break;
38333 case 1:
38334 pat = GEN_FCN (icode) (target, args[0].op);
38335 break;
38336 case 2:
38337 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38338 break;
38339 case 3:
38340 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
38341 break;
38342 default:
38343 gcc_unreachable ();
38346 if (! pat)
38347 return 0;
38348 emit_insn (pat);
38349 return klass == store ? 0 : target;
38352 /* Return the integer constant in ARG. Constrain it to be in the range
38353 of the subparts of VEC_TYPE; issue an error if not. */
38355 static int
38356 get_element_number (tree vec_type, tree arg)
38358 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
38360 if (!tree_fits_uhwi_p (arg)
38361 || (elt = tree_to_uhwi (arg), elt > max))
38363 error ("selector must be an integer constant in the range 0..%wi", max);
38364 return 0;
38367 return elt;
38370 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38371 ix86_expand_vector_init. We DO have language-level syntax for this, in
38372 the form of (type){ init-list }. Except that since we can't place emms
38373 instructions from inside the compiler, we can't allow the use of MMX
38374 registers unless the user explicitly asks for it. So we do *not* define
38375 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
38376 we have builtins invoked by mmintrin.h that gives us license to emit
38377 these sorts of instructions. */
38379 static rtx
38380 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
38382 machine_mode tmode = TYPE_MODE (type);
38383 machine_mode inner_mode = GET_MODE_INNER (tmode);
38384 int i, n_elt = GET_MODE_NUNITS (tmode);
38385 rtvec v = rtvec_alloc (n_elt);
38387 gcc_assert (VECTOR_MODE_P (tmode));
38388 gcc_assert (call_expr_nargs (exp) == n_elt);
38390 for (i = 0; i < n_elt; ++i)
38392 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
38393 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
38396 if (!target || !register_operand (target, tmode))
38397 target = gen_reg_rtx (tmode);
38399 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
38400 return target;
38403 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38404 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
38405 had a language-level syntax for referencing vector elements. */
38407 static rtx
38408 ix86_expand_vec_ext_builtin (tree exp, rtx target)
38410 machine_mode tmode, mode0;
38411 tree arg0, arg1;
38412 int elt;
38413 rtx op0;
38415 arg0 = CALL_EXPR_ARG (exp, 0);
38416 arg1 = CALL_EXPR_ARG (exp, 1);
38418 op0 = expand_normal (arg0);
38419 elt = get_element_number (TREE_TYPE (arg0), arg1);
38421 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38422 mode0 = TYPE_MODE (TREE_TYPE (arg0));
38423 gcc_assert (VECTOR_MODE_P (mode0));
38425 op0 = force_reg (mode0, op0);
38427 if (optimize || !target || !register_operand (target, tmode))
38428 target = gen_reg_rtx (tmode);
38430 ix86_expand_vector_extract (true, target, op0, elt);
38432 return target;
38435 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38436 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
38437 a language-level syntax for referencing vector elements. */
38439 static rtx
38440 ix86_expand_vec_set_builtin (tree exp)
38442 machine_mode tmode, mode1;
38443 tree arg0, arg1, arg2;
38444 int elt;
38445 rtx op0, op1, target;
38447 arg0 = CALL_EXPR_ARG (exp, 0);
38448 arg1 = CALL_EXPR_ARG (exp, 1);
38449 arg2 = CALL_EXPR_ARG (exp, 2);
38451 tmode = TYPE_MODE (TREE_TYPE (arg0));
38452 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38453 gcc_assert (VECTOR_MODE_P (tmode));
38455 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
38456 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
38457 elt = get_element_number (TREE_TYPE (arg0), arg2);
38459 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
38460 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
38462 op0 = force_reg (tmode, op0);
38463 op1 = force_reg (mode1, op1);
38465 /* OP0 is the source of these builtin functions and shouldn't be
38466 modified. Create a copy, use it and return it as target. */
38467 target = gen_reg_rtx (tmode);
38468 emit_move_insn (target, op0);
38469 ix86_expand_vector_set (true, target, op1, elt);
38471 return target;
38474 /* Emit conditional move of SRC to DST with condition
38475 OP1 CODE OP2. */
38476 static void
38477 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
38479 rtx t;
38481 if (TARGET_CMOVE)
38483 t = ix86_expand_compare (code, op1, op2);
38484 emit_insn (gen_rtx_SET (dst, gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
38485 src, dst)));
38487 else
38489 rtx_code_label *nomove = gen_label_rtx ();
38490 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
38491 const0_rtx, GET_MODE (op1), 1, nomove);
38492 emit_move_insn (dst, src);
38493 emit_label (nomove);
38497 /* Choose max of DST and SRC and put it to DST. */
38498 static void
38499 ix86_emit_move_max (rtx dst, rtx src)
38501 ix86_emit_cmove (dst, src, LTU, dst, src);
38504 /* Expand an expression EXP that calls a built-in function,
38505 with result going to TARGET if that's convenient
38506 (and in mode MODE if that's convenient).
38507 SUBTARGET may be used as the target for computing one of EXP's operands.
38508 IGNORE is nonzero if the value is to be ignored. */
38510 static rtx
38511 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
38512 machine_mode mode, int ignore)
38514 const struct builtin_description *d;
38515 size_t i;
38516 enum insn_code icode;
38517 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
38518 tree arg0, arg1, arg2, arg3, arg4;
38519 rtx op0, op1, op2, op3, op4, pat, insn;
38520 machine_mode mode0, mode1, mode2, mode3, mode4;
38521 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
38523 /* For CPU builtins that can be folded, fold first and expand the fold. */
38524 switch (fcode)
38526 case IX86_BUILTIN_CPU_INIT:
38528 /* Make it call __cpu_indicator_init in libgcc. */
38529 tree call_expr, fndecl, type;
38530 type = build_function_type_list (integer_type_node, NULL_TREE);
38531 fndecl = build_fn_decl ("__cpu_indicator_init", type);
38532 call_expr = build_call_expr (fndecl, 0);
38533 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
38535 case IX86_BUILTIN_CPU_IS:
38536 case IX86_BUILTIN_CPU_SUPPORTS:
38538 tree arg0 = CALL_EXPR_ARG (exp, 0);
38539 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
38540 gcc_assert (fold_expr != NULL_TREE);
38541 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
38545 /* Determine whether the builtin function is available under the current ISA.
38546 Originally the builtin was not created if it wasn't applicable to the
38547 current ISA based on the command line switches. With function specific
38548 options, we need to check in the context of the function making the call
38549 whether it is supported. */
38550 if (ix86_builtins_isa[fcode].isa
38551 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
38553 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
38554 NULL, (enum fpmath_unit) 0, false);
38556 if (!opts)
38557 error ("%qE needs unknown isa option", fndecl);
38558 else
38560 gcc_assert (opts != NULL);
38561 error ("%qE needs isa option %s", fndecl, opts);
38562 free (opts);
38564 return const0_rtx;
38567 switch (fcode)
38569 case IX86_BUILTIN_BNDMK:
38570 if (!target
38571 || GET_MODE (target) != BNDmode
38572 || !register_operand (target, BNDmode))
38573 target = gen_reg_rtx (BNDmode);
38575 arg0 = CALL_EXPR_ARG (exp, 0);
38576 arg1 = CALL_EXPR_ARG (exp, 1);
38578 op0 = expand_normal (arg0);
38579 op1 = expand_normal (arg1);
38581 if (!register_operand (op0, Pmode))
38582 op0 = ix86_zero_extend_to_Pmode (op0);
38583 if (!register_operand (op1, Pmode))
38584 op1 = ix86_zero_extend_to_Pmode (op1);
38586 /* Builtin arg1 is size of block but instruction op1 should
38587 be (size - 1). */
38588 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
38589 NULL_RTX, 1, OPTAB_DIRECT);
38591 emit_insn (BNDmode == BND64mode
38592 ? gen_bnd64_mk (target, op0, op1)
38593 : gen_bnd32_mk (target, op0, op1));
38594 return target;
38596 case IX86_BUILTIN_BNDSTX:
38597 arg0 = CALL_EXPR_ARG (exp, 0);
38598 arg1 = CALL_EXPR_ARG (exp, 1);
38599 arg2 = CALL_EXPR_ARG (exp, 2);
38601 op0 = expand_normal (arg0);
38602 op1 = expand_normal (arg1);
38603 op2 = expand_normal (arg2);
38605 if (!register_operand (op0, Pmode))
38606 op0 = ix86_zero_extend_to_Pmode (op0);
38607 if (!register_operand (op1, BNDmode))
38608 op1 = copy_to_mode_reg (BNDmode, op1);
38609 if (!register_operand (op2, Pmode))
38610 op2 = ix86_zero_extend_to_Pmode (op2);
38612 emit_insn (BNDmode == BND64mode
38613 ? gen_bnd64_stx (op2, op0, op1)
38614 : gen_bnd32_stx (op2, op0, op1));
38615 return 0;
38617 case IX86_BUILTIN_BNDLDX:
38618 if (!target
38619 || GET_MODE (target) != BNDmode
38620 || !register_operand (target, BNDmode))
38621 target = gen_reg_rtx (BNDmode);
38623 arg0 = CALL_EXPR_ARG (exp, 0);
38624 arg1 = CALL_EXPR_ARG (exp, 1);
38626 op0 = expand_normal (arg0);
38627 op1 = expand_normal (arg1);
38629 if (!register_operand (op0, Pmode))
38630 op0 = ix86_zero_extend_to_Pmode (op0);
38631 if (!register_operand (op1, Pmode))
38632 op1 = ix86_zero_extend_to_Pmode (op1);
38634 emit_insn (BNDmode == BND64mode
38635 ? gen_bnd64_ldx (target, op0, op1)
38636 : gen_bnd32_ldx (target, op0, op1));
38637 return target;
38639 case IX86_BUILTIN_BNDCL:
38640 arg0 = CALL_EXPR_ARG (exp, 0);
38641 arg1 = CALL_EXPR_ARG (exp, 1);
38643 op0 = expand_normal (arg0);
38644 op1 = expand_normal (arg1);
38646 if (!register_operand (op0, Pmode))
38647 op0 = ix86_zero_extend_to_Pmode (op0);
38648 if (!register_operand (op1, BNDmode))
38649 op1 = copy_to_mode_reg (BNDmode, op1);
38651 emit_insn (BNDmode == BND64mode
38652 ? gen_bnd64_cl (op1, op0)
38653 : gen_bnd32_cl (op1, op0));
38654 return 0;
38656 case IX86_BUILTIN_BNDCU:
38657 arg0 = CALL_EXPR_ARG (exp, 0);
38658 arg1 = CALL_EXPR_ARG (exp, 1);
38660 op0 = expand_normal (arg0);
38661 op1 = expand_normal (arg1);
38663 if (!register_operand (op0, Pmode))
38664 op0 = ix86_zero_extend_to_Pmode (op0);
38665 if (!register_operand (op1, BNDmode))
38666 op1 = copy_to_mode_reg (BNDmode, op1);
38668 emit_insn (BNDmode == BND64mode
38669 ? gen_bnd64_cu (op1, op0)
38670 : gen_bnd32_cu (op1, op0));
38671 return 0;
38673 case IX86_BUILTIN_BNDRET:
38674 arg0 = CALL_EXPR_ARG (exp, 0);
38675 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
38676 target = chkp_get_rtl_bounds (arg0);
38678 /* If no bounds were specified for returned value,
38679 then use INIT bounds. It usually happens when
38680 some built-in function is expanded. */
38681 if (!target)
38683 rtx t1 = gen_reg_rtx (Pmode);
38684 rtx t2 = gen_reg_rtx (Pmode);
38685 target = gen_reg_rtx (BNDmode);
38686 emit_move_insn (t1, const0_rtx);
38687 emit_move_insn (t2, constm1_rtx);
38688 emit_insn (BNDmode == BND64mode
38689 ? gen_bnd64_mk (target, t1, t2)
38690 : gen_bnd32_mk (target, t1, t2));
38693 gcc_assert (target && REG_P (target));
38694 return target;
38696 case IX86_BUILTIN_BNDNARROW:
38698 rtx m1, m1h1, m1h2, lb, ub, t1;
38700 /* Return value and lb. */
38701 arg0 = CALL_EXPR_ARG (exp, 0);
38702 /* Bounds. */
38703 arg1 = CALL_EXPR_ARG (exp, 1);
38704 /* Size. */
38705 arg2 = CALL_EXPR_ARG (exp, 2);
38707 lb = expand_normal (arg0);
38708 op1 = expand_normal (arg1);
38709 op2 = expand_normal (arg2);
38711 /* Size was passed but we need to use (size - 1) as for bndmk. */
38712 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
38713 NULL_RTX, 1, OPTAB_DIRECT);
38715 /* Add LB to size and inverse to get UB. */
38716 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
38717 op2, 1, OPTAB_DIRECT);
38718 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
38720 if (!register_operand (lb, Pmode))
38721 lb = ix86_zero_extend_to_Pmode (lb);
38722 if (!register_operand (ub, Pmode))
38723 ub = ix86_zero_extend_to_Pmode (ub);
38725 /* We need to move bounds to memory before any computations. */
38726 if (MEM_P (op1))
38727 m1 = op1;
38728 else
38730 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
38731 emit_move_insn (m1, op1);
38734 /* Generate mem expression to be used for access to LB and UB. */
38735 m1h1 = adjust_address (m1, Pmode, 0);
38736 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
38738 t1 = gen_reg_rtx (Pmode);
38740 /* Compute LB. */
38741 emit_move_insn (t1, m1h1);
38742 ix86_emit_move_max (t1, lb);
38743 emit_move_insn (m1h1, t1);
38745 /* Compute UB. UB is stored in 1's complement form. Therefore
38746 we also use max here. */
38747 emit_move_insn (t1, m1h2);
38748 ix86_emit_move_max (t1, ub);
38749 emit_move_insn (m1h2, t1);
38751 op2 = gen_reg_rtx (BNDmode);
38752 emit_move_insn (op2, m1);
38754 return chkp_join_splitted_slot (lb, op2);
38757 case IX86_BUILTIN_BNDINT:
38759 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
38761 if (!target
38762 || GET_MODE (target) != BNDmode
38763 || !register_operand (target, BNDmode))
38764 target = gen_reg_rtx (BNDmode);
38766 arg0 = CALL_EXPR_ARG (exp, 0);
38767 arg1 = CALL_EXPR_ARG (exp, 1);
38769 op0 = expand_normal (arg0);
38770 op1 = expand_normal (arg1);
38772 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
38773 rh1 = adjust_address (res, Pmode, 0);
38774 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
38776 /* Put first bounds to temporaries. */
38777 lb1 = gen_reg_rtx (Pmode);
38778 ub1 = gen_reg_rtx (Pmode);
38779 if (MEM_P (op0))
38781 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
38782 emit_move_insn (ub1, adjust_address (op0, Pmode,
38783 GET_MODE_SIZE (Pmode)));
38785 else
38787 emit_move_insn (res, op0);
38788 emit_move_insn (lb1, rh1);
38789 emit_move_insn (ub1, rh2);
38792 /* Put second bounds to temporaries. */
38793 lb2 = gen_reg_rtx (Pmode);
38794 ub2 = gen_reg_rtx (Pmode);
38795 if (MEM_P (op1))
38797 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
38798 emit_move_insn (ub2, adjust_address (op1, Pmode,
38799 GET_MODE_SIZE (Pmode)));
38801 else
38803 emit_move_insn (res, op1);
38804 emit_move_insn (lb2, rh1);
38805 emit_move_insn (ub2, rh2);
38808 /* Compute LB. */
38809 ix86_emit_move_max (lb1, lb2);
38810 emit_move_insn (rh1, lb1);
38812 /* Compute UB. UB is stored in 1's complement form. Therefore
38813 we also use max here. */
38814 ix86_emit_move_max (ub1, ub2);
38815 emit_move_insn (rh2, ub1);
38817 emit_move_insn (target, res);
38819 return target;
38822 case IX86_BUILTIN_SIZEOF:
38824 tree name;
38825 rtx symbol;
38827 if (!target
38828 || GET_MODE (target) != Pmode
38829 || !register_operand (target, Pmode))
38830 target = gen_reg_rtx (Pmode);
38832 arg0 = CALL_EXPR_ARG (exp, 0);
38833 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
38835 name = DECL_ASSEMBLER_NAME (arg0);
38836 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
38838 emit_insn (Pmode == SImode
38839 ? gen_move_size_reloc_si (target, symbol)
38840 : gen_move_size_reloc_di (target, symbol));
38842 return target;
38845 case IX86_BUILTIN_BNDLOWER:
38847 rtx mem, hmem;
38849 if (!target
38850 || GET_MODE (target) != Pmode
38851 || !register_operand (target, Pmode))
38852 target = gen_reg_rtx (Pmode);
38854 arg0 = CALL_EXPR_ARG (exp, 0);
38855 op0 = expand_normal (arg0);
38857 /* We need to move bounds to memory first. */
38858 if (MEM_P (op0))
38859 mem = op0;
38860 else
38862 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38863 emit_move_insn (mem, op0);
38866 /* Generate mem expression to access LB and load it. */
38867 hmem = adjust_address (mem, Pmode, 0);
38868 emit_move_insn (target, hmem);
38870 return target;
38873 case IX86_BUILTIN_BNDUPPER:
38875 rtx mem, hmem, res;
38877 if (!target
38878 || GET_MODE (target) != Pmode
38879 || !register_operand (target, Pmode))
38880 target = gen_reg_rtx (Pmode);
38882 arg0 = CALL_EXPR_ARG (exp, 0);
38883 op0 = expand_normal (arg0);
38885 /* We need to move bounds to memory first. */
38886 if (MEM_P (op0))
38887 mem = op0;
38888 else
38890 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38891 emit_move_insn (mem, op0);
38894 /* Generate mem expression to access UB. */
38895 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
38897 /* We need to inverse all bits of UB. */
38898 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
38900 if (res != target)
38901 emit_move_insn (target, res);
38903 return target;
38906 case IX86_BUILTIN_MASKMOVQ:
38907 case IX86_BUILTIN_MASKMOVDQU:
38908 icode = (fcode == IX86_BUILTIN_MASKMOVQ
38909 ? CODE_FOR_mmx_maskmovq
38910 : CODE_FOR_sse2_maskmovdqu);
38911 /* Note the arg order is different from the operand order. */
38912 arg1 = CALL_EXPR_ARG (exp, 0);
38913 arg2 = CALL_EXPR_ARG (exp, 1);
38914 arg0 = CALL_EXPR_ARG (exp, 2);
38915 op0 = expand_normal (arg0);
38916 op1 = expand_normal (arg1);
38917 op2 = expand_normal (arg2);
38918 mode0 = insn_data[icode].operand[0].mode;
38919 mode1 = insn_data[icode].operand[1].mode;
38920 mode2 = insn_data[icode].operand[2].mode;
38922 op0 = ix86_zero_extend_to_Pmode (op0);
38923 op0 = gen_rtx_MEM (mode1, op0);
38925 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38926 op0 = copy_to_mode_reg (mode0, op0);
38927 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38928 op1 = copy_to_mode_reg (mode1, op1);
38929 if (!insn_data[icode].operand[2].predicate (op2, mode2))
38930 op2 = copy_to_mode_reg (mode2, op2);
38931 pat = GEN_FCN (icode) (op0, op1, op2);
38932 if (! pat)
38933 return 0;
38934 emit_insn (pat);
38935 return 0;
38937 case IX86_BUILTIN_LDMXCSR:
38938 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
38939 target = assign_386_stack_local (SImode, SLOT_TEMP);
38940 emit_move_insn (target, op0);
38941 emit_insn (gen_sse_ldmxcsr (target));
38942 return 0;
38944 case IX86_BUILTIN_STMXCSR:
38945 target = assign_386_stack_local (SImode, SLOT_TEMP);
38946 emit_insn (gen_sse_stmxcsr (target));
38947 return copy_to_mode_reg (SImode, target);
38949 case IX86_BUILTIN_CLFLUSH:
38950 arg0 = CALL_EXPR_ARG (exp, 0);
38951 op0 = expand_normal (arg0);
38952 icode = CODE_FOR_sse2_clflush;
38953 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38954 op0 = ix86_zero_extend_to_Pmode (op0);
38956 emit_insn (gen_sse2_clflush (op0));
38957 return 0;
38959 case IX86_BUILTIN_CLWB:
38960 arg0 = CALL_EXPR_ARG (exp, 0);
38961 op0 = expand_normal (arg0);
38962 icode = CODE_FOR_clwb;
38963 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38964 op0 = ix86_zero_extend_to_Pmode (op0);
38966 emit_insn (gen_clwb (op0));
38967 return 0;
38969 case IX86_BUILTIN_CLFLUSHOPT:
38970 arg0 = CALL_EXPR_ARG (exp, 0);
38971 op0 = expand_normal (arg0);
38972 icode = CODE_FOR_clflushopt;
38973 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38974 op0 = ix86_zero_extend_to_Pmode (op0);
38976 emit_insn (gen_clflushopt (op0));
38977 return 0;
38979 case IX86_BUILTIN_MONITOR:
38980 case IX86_BUILTIN_MONITORX:
38981 arg0 = CALL_EXPR_ARG (exp, 0);
38982 arg1 = CALL_EXPR_ARG (exp, 1);
38983 arg2 = CALL_EXPR_ARG (exp, 2);
38984 op0 = expand_normal (arg0);
38985 op1 = expand_normal (arg1);
38986 op2 = expand_normal (arg2);
38987 if (!REG_P (op0))
38988 op0 = ix86_zero_extend_to_Pmode (op0);
38989 if (!REG_P (op1))
38990 op1 = copy_to_mode_reg (SImode, op1);
38991 if (!REG_P (op2))
38992 op2 = copy_to_mode_reg (SImode, op2);
38994 emit_insn (fcode == IX86_BUILTIN_MONITOR
38995 ? ix86_gen_monitor (op0, op1, op2)
38996 : ix86_gen_monitorx (op0, op1, op2));
38997 return 0;
38999 case IX86_BUILTIN_MWAIT:
39000 arg0 = CALL_EXPR_ARG (exp, 0);
39001 arg1 = CALL_EXPR_ARG (exp, 1);
39002 op0 = expand_normal (arg0);
39003 op1 = expand_normal (arg1);
39004 if (!REG_P (op0))
39005 op0 = copy_to_mode_reg (SImode, op0);
39006 if (!REG_P (op1))
39007 op1 = copy_to_mode_reg (SImode, op1);
39008 emit_insn (gen_sse3_mwait (op0, op1));
39009 return 0;
39011 case IX86_BUILTIN_MWAITX:
39012 arg0 = CALL_EXPR_ARG (exp, 0);
39013 arg1 = CALL_EXPR_ARG (exp, 1);
39014 arg2 = CALL_EXPR_ARG (exp, 2);
39015 op0 = expand_normal (arg0);
39016 op1 = expand_normal (arg1);
39017 op2 = expand_normal (arg2);
39018 if (!REG_P (op0))
39019 op0 = copy_to_mode_reg (SImode, op0);
39020 if (!REG_P (op1))
39021 op1 = copy_to_mode_reg (SImode, op1);
39022 if (!REG_P (op2))
39023 op2 = copy_to_mode_reg (SImode, op2);
39024 emit_insn (gen_mwaitx (op0, op1, op2));
39025 return 0;
39027 case IX86_BUILTIN_VEC_INIT_V2SI:
39028 case IX86_BUILTIN_VEC_INIT_V4HI:
39029 case IX86_BUILTIN_VEC_INIT_V8QI:
39030 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
39032 case IX86_BUILTIN_VEC_EXT_V2DF:
39033 case IX86_BUILTIN_VEC_EXT_V2DI:
39034 case IX86_BUILTIN_VEC_EXT_V4SF:
39035 case IX86_BUILTIN_VEC_EXT_V4SI:
39036 case IX86_BUILTIN_VEC_EXT_V8HI:
39037 case IX86_BUILTIN_VEC_EXT_V2SI:
39038 case IX86_BUILTIN_VEC_EXT_V4HI:
39039 case IX86_BUILTIN_VEC_EXT_V16QI:
39040 return ix86_expand_vec_ext_builtin (exp, target);
39042 case IX86_BUILTIN_VEC_SET_V2DI:
39043 case IX86_BUILTIN_VEC_SET_V4SF:
39044 case IX86_BUILTIN_VEC_SET_V4SI:
39045 case IX86_BUILTIN_VEC_SET_V8HI:
39046 case IX86_BUILTIN_VEC_SET_V4HI:
39047 case IX86_BUILTIN_VEC_SET_V16QI:
39048 return ix86_expand_vec_set_builtin (exp);
39050 case IX86_BUILTIN_INFQ:
39051 case IX86_BUILTIN_HUGE_VALQ:
39053 REAL_VALUE_TYPE inf;
39054 rtx tmp;
39056 real_inf (&inf);
39057 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
39059 tmp = validize_mem (force_const_mem (mode, tmp));
39061 if (target == 0)
39062 target = gen_reg_rtx (mode);
39064 emit_move_insn (target, tmp);
39065 return target;
39068 case IX86_BUILTIN_RDPMC:
39069 case IX86_BUILTIN_RDTSC:
39070 case IX86_BUILTIN_RDTSCP:
39072 op0 = gen_reg_rtx (DImode);
39073 op1 = gen_reg_rtx (DImode);
39075 if (fcode == IX86_BUILTIN_RDPMC)
39077 arg0 = CALL_EXPR_ARG (exp, 0);
39078 op2 = expand_normal (arg0);
39079 if (!register_operand (op2, SImode))
39080 op2 = copy_to_mode_reg (SImode, op2);
39082 insn = (TARGET_64BIT
39083 ? gen_rdpmc_rex64 (op0, op1, op2)
39084 : gen_rdpmc (op0, op2));
39085 emit_insn (insn);
39087 else if (fcode == IX86_BUILTIN_RDTSC)
39089 insn = (TARGET_64BIT
39090 ? gen_rdtsc_rex64 (op0, op1)
39091 : gen_rdtsc (op0));
39092 emit_insn (insn);
39094 else
39096 op2 = gen_reg_rtx (SImode);
39098 insn = (TARGET_64BIT
39099 ? gen_rdtscp_rex64 (op0, op1, op2)
39100 : gen_rdtscp (op0, op2));
39101 emit_insn (insn);
39103 arg0 = CALL_EXPR_ARG (exp, 0);
39104 op4 = expand_normal (arg0);
39105 if (!address_operand (op4, VOIDmode))
39107 op4 = convert_memory_address (Pmode, op4);
39108 op4 = copy_addr_to_reg (op4);
39110 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
39113 if (target == 0)
39115 /* mode is VOIDmode if __builtin_rd* has been called
39116 without lhs. */
39117 if (mode == VOIDmode)
39118 return target;
39119 target = gen_reg_rtx (mode);
39122 if (TARGET_64BIT)
39124 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
39125 op1, 1, OPTAB_DIRECT);
39126 op0 = expand_simple_binop (DImode, IOR, op0, op1,
39127 op0, 1, OPTAB_DIRECT);
39130 emit_move_insn (target, op0);
39131 return target;
39133 case IX86_BUILTIN_FXSAVE:
39134 case IX86_BUILTIN_FXRSTOR:
39135 case IX86_BUILTIN_FXSAVE64:
39136 case IX86_BUILTIN_FXRSTOR64:
39137 case IX86_BUILTIN_FNSTENV:
39138 case IX86_BUILTIN_FLDENV:
39139 mode0 = BLKmode;
39140 switch (fcode)
39142 case IX86_BUILTIN_FXSAVE:
39143 icode = CODE_FOR_fxsave;
39144 break;
39145 case IX86_BUILTIN_FXRSTOR:
39146 icode = CODE_FOR_fxrstor;
39147 break;
39148 case IX86_BUILTIN_FXSAVE64:
39149 icode = CODE_FOR_fxsave64;
39150 break;
39151 case IX86_BUILTIN_FXRSTOR64:
39152 icode = CODE_FOR_fxrstor64;
39153 break;
39154 case IX86_BUILTIN_FNSTENV:
39155 icode = CODE_FOR_fnstenv;
39156 break;
39157 case IX86_BUILTIN_FLDENV:
39158 icode = CODE_FOR_fldenv;
39159 break;
39160 default:
39161 gcc_unreachable ();
39164 arg0 = CALL_EXPR_ARG (exp, 0);
39165 op0 = expand_normal (arg0);
39167 if (!address_operand (op0, VOIDmode))
39169 op0 = convert_memory_address (Pmode, op0);
39170 op0 = copy_addr_to_reg (op0);
39172 op0 = gen_rtx_MEM (mode0, op0);
39174 pat = GEN_FCN (icode) (op0);
39175 if (pat)
39176 emit_insn (pat);
39177 return 0;
39179 case IX86_BUILTIN_XSAVE:
39180 case IX86_BUILTIN_XRSTOR:
39181 case IX86_BUILTIN_XSAVE64:
39182 case IX86_BUILTIN_XRSTOR64:
39183 case IX86_BUILTIN_XSAVEOPT:
39184 case IX86_BUILTIN_XSAVEOPT64:
39185 case IX86_BUILTIN_XSAVES:
39186 case IX86_BUILTIN_XRSTORS:
39187 case IX86_BUILTIN_XSAVES64:
39188 case IX86_BUILTIN_XRSTORS64:
39189 case IX86_BUILTIN_XSAVEC:
39190 case IX86_BUILTIN_XSAVEC64:
39191 arg0 = CALL_EXPR_ARG (exp, 0);
39192 arg1 = CALL_EXPR_ARG (exp, 1);
39193 op0 = expand_normal (arg0);
39194 op1 = expand_normal (arg1);
39196 if (!address_operand (op0, VOIDmode))
39198 op0 = convert_memory_address (Pmode, op0);
39199 op0 = copy_addr_to_reg (op0);
39201 op0 = gen_rtx_MEM (BLKmode, op0);
39203 op1 = force_reg (DImode, op1);
39205 if (TARGET_64BIT)
39207 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
39208 NULL, 1, OPTAB_DIRECT);
39209 switch (fcode)
39211 case IX86_BUILTIN_XSAVE:
39212 icode = CODE_FOR_xsave_rex64;
39213 break;
39214 case IX86_BUILTIN_XRSTOR:
39215 icode = CODE_FOR_xrstor_rex64;
39216 break;
39217 case IX86_BUILTIN_XSAVE64:
39218 icode = CODE_FOR_xsave64;
39219 break;
39220 case IX86_BUILTIN_XRSTOR64:
39221 icode = CODE_FOR_xrstor64;
39222 break;
39223 case IX86_BUILTIN_XSAVEOPT:
39224 icode = CODE_FOR_xsaveopt_rex64;
39225 break;
39226 case IX86_BUILTIN_XSAVEOPT64:
39227 icode = CODE_FOR_xsaveopt64;
39228 break;
39229 case IX86_BUILTIN_XSAVES:
39230 icode = CODE_FOR_xsaves_rex64;
39231 break;
39232 case IX86_BUILTIN_XRSTORS:
39233 icode = CODE_FOR_xrstors_rex64;
39234 break;
39235 case IX86_BUILTIN_XSAVES64:
39236 icode = CODE_FOR_xsaves64;
39237 break;
39238 case IX86_BUILTIN_XRSTORS64:
39239 icode = CODE_FOR_xrstors64;
39240 break;
39241 case IX86_BUILTIN_XSAVEC:
39242 icode = CODE_FOR_xsavec_rex64;
39243 break;
39244 case IX86_BUILTIN_XSAVEC64:
39245 icode = CODE_FOR_xsavec64;
39246 break;
39247 default:
39248 gcc_unreachable ();
39251 op2 = gen_lowpart (SImode, op2);
39252 op1 = gen_lowpart (SImode, op1);
39253 pat = GEN_FCN (icode) (op0, op1, op2);
39255 else
39257 switch (fcode)
39259 case IX86_BUILTIN_XSAVE:
39260 icode = CODE_FOR_xsave;
39261 break;
39262 case IX86_BUILTIN_XRSTOR:
39263 icode = CODE_FOR_xrstor;
39264 break;
39265 case IX86_BUILTIN_XSAVEOPT:
39266 icode = CODE_FOR_xsaveopt;
39267 break;
39268 case IX86_BUILTIN_XSAVES:
39269 icode = CODE_FOR_xsaves;
39270 break;
39271 case IX86_BUILTIN_XRSTORS:
39272 icode = CODE_FOR_xrstors;
39273 break;
39274 case IX86_BUILTIN_XSAVEC:
39275 icode = CODE_FOR_xsavec;
39276 break;
39277 default:
39278 gcc_unreachable ();
39280 pat = GEN_FCN (icode) (op0, op1);
39283 if (pat)
39284 emit_insn (pat);
39285 return 0;
39287 case IX86_BUILTIN_LLWPCB:
39288 arg0 = CALL_EXPR_ARG (exp, 0);
39289 op0 = expand_normal (arg0);
39290 icode = CODE_FOR_lwp_llwpcb;
39291 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39292 op0 = ix86_zero_extend_to_Pmode (op0);
39293 emit_insn (gen_lwp_llwpcb (op0));
39294 return 0;
39296 case IX86_BUILTIN_SLWPCB:
39297 icode = CODE_FOR_lwp_slwpcb;
39298 if (!target
39299 || !insn_data[icode].operand[0].predicate (target, Pmode))
39300 target = gen_reg_rtx (Pmode);
39301 emit_insn (gen_lwp_slwpcb (target));
39302 return target;
39304 case IX86_BUILTIN_BEXTRI32:
39305 case IX86_BUILTIN_BEXTRI64:
39306 arg0 = CALL_EXPR_ARG (exp, 0);
39307 arg1 = CALL_EXPR_ARG (exp, 1);
39308 op0 = expand_normal (arg0);
39309 op1 = expand_normal (arg1);
39310 icode = (fcode == IX86_BUILTIN_BEXTRI32
39311 ? CODE_FOR_tbm_bextri_si
39312 : CODE_FOR_tbm_bextri_di);
39313 if (!CONST_INT_P (op1))
39315 error ("last argument must be an immediate");
39316 return const0_rtx;
39318 else
39320 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
39321 unsigned char lsb_index = INTVAL (op1) & 0xFF;
39322 op1 = GEN_INT (length);
39323 op2 = GEN_INT (lsb_index);
39324 pat = GEN_FCN (icode) (target, op0, op1, op2);
39325 if (pat)
39326 emit_insn (pat);
39327 return target;
39330 case IX86_BUILTIN_RDRAND16_STEP:
39331 icode = CODE_FOR_rdrandhi_1;
39332 mode0 = HImode;
39333 goto rdrand_step;
39335 case IX86_BUILTIN_RDRAND32_STEP:
39336 icode = CODE_FOR_rdrandsi_1;
39337 mode0 = SImode;
39338 goto rdrand_step;
39340 case IX86_BUILTIN_RDRAND64_STEP:
39341 icode = CODE_FOR_rdranddi_1;
39342 mode0 = DImode;
39344 rdrand_step:
39345 op0 = gen_reg_rtx (mode0);
39346 emit_insn (GEN_FCN (icode) (op0));
39348 arg0 = CALL_EXPR_ARG (exp, 0);
39349 op1 = expand_normal (arg0);
39350 if (!address_operand (op1, VOIDmode))
39352 op1 = convert_memory_address (Pmode, op1);
39353 op1 = copy_addr_to_reg (op1);
39355 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39357 op1 = gen_reg_rtx (SImode);
39358 emit_move_insn (op1, CONST1_RTX (SImode));
39360 /* Emit SImode conditional move. */
39361 if (mode0 == HImode)
39363 op2 = gen_reg_rtx (SImode);
39364 emit_insn (gen_zero_extendhisi2 (op2, op0));
39366 else if (mode0 == SImode)
39367 op2 = op0;
39368 else
39369 op2 = gen_rtx_SUBREG (SImode, op0, 0);
39371 if (target == 0
39372 || !register_operand (target, SImode))
39373 target = gen_reg_rtx (SImode);
39375 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
39376 const0_rtx);
39377 emit_insn (gen_rtx_SET (target,
39378 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
39379 return target;
39381 case IX86_BUILTIN_RDSEED16_STEP:
39382 icode = CODE_FOR_rdseedhi_1;
39383 mode0 = HImode;
39384 goto rdseed_step;
39386 case IX86_BUILTIN_RDSEED32_STEP:
39387 icode = CODE_FOR_rdseedsi_1;
39388 mode0 = SImode;
39389 goto rdseed_step;
39391 case IX86_BUILTIN_RDSEED64_STEP:
39392 icode = CODE_FOR_rdseeddi_1;
39393 mode0 = DImode;
39395 rdseed_step:
39396 op0 = gen_reg_rtx (mode0);
39397 emit_insn (GEN_FCN (icode) (op0));
39399 arg0 = CALL_EXPR_ARG (exp, 0);
39400 op1 = expand_normal (arg0);
39401 if (!address_operand (op1, VOIDmode))
39403 op1 = convert_memory_address (Pmode, op1);
39404 op1 = copy_addr_to_reg (op1);
39406 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39408 op2 = gen_reg_rtx (QImode);
39410 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
39411 const0_rtx);
39412 emit_insn (gen_rtx_SET (op2, pat));
39414 if (target == 0
39415 || !register_operand (target, SImode))
39416 target = gen_reg_rtx (SImode);
39418 emit_insn (gen_zero_extendqisi2 (target, op2));
39419 return target;
39421 case IX86_BUILTIN_SBB32:
39422 icode = CODE_FOR_subsi3_carry;
39423 mode0 = SImode;
39424 goto addcarryx;
39426 case IX86_BUILTIN_SBB64:
39427 icode = CODE_FOR_subdi3_carry;
39428 mode0 = DImode;
39429 goto addcarryx;
39431 case IX86_BUILTIN_ADDCARRYX32:
39432 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
39433 mode0 = SImode;
39434 goto addcarryx;
39436 case IX86_BUILTIN_ADDCARRYX64:
39437 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
39438 mode0 = DImode;
39440 addcarryx:
39441 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
39442 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
39443 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
39444 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
39446 op0 = gen_reg_rtx (QImode);
39448 /* Generate CF from input operand. */
39449 op1 = expand_normal (arg0);
39450 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
39451 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
39453 /* Gen ADCX instruction to compute X+Y+CF. */
39454 op2 = expand_normal (arg1);
39455 op3 = expand_normal (arg2);
39457 if (!REG_P (op2))
39458 op2 = copy_to_mode_reg (mode0, op2);
39459 if (!REG_P (op3))
39460 op3 = copy_to_mode_reg (mode0, op3);
39462 op0 = gen_reg_rtx (mode0);
39464 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
39465 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
39466 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
39468 /* Store the result. */
39469 op4 = expand_normal (arg3);
39470 if (!address_operand (op4, VOIDmode))
39472 op4 = convert_memory_address (Pmode, op4);
39473 op4 = copy_addr_to_reg (op4);
39475 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
39477 /* Return current CF value. */
39478 if (target == 0)
39479 target = gen_reg_rtx (QImode);
39481 PUT_MODE (pat, QImode);
39482 emit_insn (gen_rtx_SET (target, pat));
39483 return target;
39485 case IX86_BUILTIN_READ_FLAGS:
39486 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
39488 if (optimize
39489 || target == NULL_RTX
39490 || !nonimmediate_operand (target, word_mode)
39491 || GET_MODE (target) != word_mode)
39492 target = gen_reg_rtx (word_mode);
39494 emit_insn (gen_pop (target));
39495 return target;
39497 case IX86_BUILTIN_WRITE_FLAGS:
39499 arg0 = CALL_EXPR_ARG (exp, 0);
39500 op0 = expand_normal (arg0);
39501 if (!general_no_elim_operand (op0, word_mode))
39502 op0 = copy_to_mode_reg (word_mode, op0);
39504 emit_insn (gen_push (op0));
39505 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
39506 return 0;
39508 case IX86_BUILTIN_KORTESTC16:
39509 icode = CODE_FOR_kortestchi;
39510 mode0 = HImode;
39511 mode1 = CCCmode;
39512 goto kortest;
39514 case IX86_BUILTIN_KORTESTZ16:
39515 icode = CODE_FOR_kortestzhi;
39516 mode0 = HImode;
39517 mode1 = CCZmode;
39519 kortest:
39520 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
39521 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
39522 op0 = expand_normal (arg0);
39523 op1 = expand_normal (arg1);
39525 op0 = copy_to_reg (op0);
39526 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39527 op1 = copy_to_reg (op1);
39528 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
39530 target = gen_reg_rtx (QImode);
39531 emit_insn (gen_rtx_SET (target, const0_rtx));
39533 /* Emit kortest. */
39534 emit_insn (GEN_FCN (icode) (op0, op1));
39535 /* And use setcc to return result from flags. */
39536 ix86_expand_setcc (target, EQ,
39537 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
39538 return target;
39540 case IX86_BUILTIN_GATHERSIV2DF:
39541 icode = CODE_FOR_avx2_gathersiv2df;
39542 goto gather_gen;
39543 case IX86_BUILTIN_GATHERSIV4DF:
39544 icode = CODE_FOR_avx2_gathersiv4df;
39545 goto gather_gen;
39546 case IX86_BUILTIN_GATHERDIV2DF:
39547 icode = CODE_FOR_avx2_gatherdiv2df;
39548 goto gather_gen;
39549 case IX86_BUILTIN_GATHERDIV4DF:
39550 icode = CODE_FOR_avx2_gatherdiv4df;
39551 goto gather_gen;
39552 case IX86_BUILTIN_GATHERSIV4SF:
39553 icode = CODE_FOR_avx2_gathersiv4sf;
39554 goto gather_gen;
39555 case IX86_BUILTIN_GATHERSIV8SF:
39556 icode = CODE_FOR_avx2_gathersiv8sf;
39557 goto gather_gen;
39558 case IX86_BUILTIN_GATHERDIV4SF:
39559 icode = CODE_FOR_avx2_gatherdiv4sf;
39560 goto gather_gen;
39561 case IX86_BUILTIN_GATHERDIV8SF:
39562 icode = CODE_FOR_avx2_gatherdiv8sf;
39563 goto gather_gen;
39564 case IX86_BUILTIN_GATHERSIV2DI:
39565 icode = CODE_FOR_avx2_gathersiv2di;
39566 goto gather_gen;
39567 case IX86_BUILTIN_GATHERSIV4DI:
39568 icode = CODE_FOR_avx2_gathersiv4di;
39569 goto gather_gen;
39570 case IX86_BUILTIN_GATHERDIV2DI:
39571 icode = CODE_FOR_avx2_gatherdiv2di;
39572 goto gather_gen;
39573 case IX86_BUILTIN_GATHERDIV4DI:
39574 icode = CODE_FOR_avx2_gatherdiv4di;
39575 goto gather_gen;
39576 case IX86_BUILTIN_GATHERSIV4SI:
39577 icode = CODE_FOR_avx2_gathersiv4si;
39578 goto gather_gen;
39579 case IX86_BUILTIN_GATHERSIV8SI:
39580 icode = CODE_FOR_avx2_gathersiv8si;
39581 goto gather_gen;
39582 case IX86_BUILTIN_GATHERDIV4SI:
39583 icode = CODE_FOR_avx2_gatherdiv4si;
39584 goto gather_gen;
39585 case IX86_BUILTIN_GATHERDIV8SI:
39586 icode = CODE_FOR_avx2_gatherdiv8si;
39587 goto gather_gen;
39588 case IX86_BUILTIN_GATHERALTSIV4DF:
39589 icode = CODE_FOR_avx2_gathersiv4df;
39590 goto gather_gen;
39591 case IX86_BUILTIN_GATHERALTDIV8SF:
39592 icode = CODE_FOR_avx2_gatherdiv8sf;
39593 goto gather_gen;
39594 case IX86_BUILTIN_GATHERALTSIV4DI:
39595 icode = CODE_FOR_avx2_gathersiv4di;
39596 goto gather_gen;
39597 case IX86_BUILTIN_GATHERALTDIV8SI:
39598 icode = CODE_FOR_avx2_gatherdiv8si;
39599 goto gather_gen;
39600 case IX86_BUILTIN_GATHER3SIV16SF:
39601 icode = CODE_FOR_avx512f_gathersiv16sf;
39602 goto gather_gen;
39603 case IX86_BUILTIN_GATHER3SIV8DF:
39604 icode = CODE_FOR_avx512f_gathersiv8df;
39605 goto gather_gen;
39606 case IX86_BUILTIN_GATHER3DIV16SF:
39607 icode = CODE_FOR_avx512f_gatherdiv16sf;
39608 goto gather_gen;
39609 case IX86_BUILTIN_GATHER3DIV8DF:
39610 icode = CODE_FOR_avx512f_gatherdiv8df;
39611 goto gather_gen;
39612 case IX86_BUILTIN_GATHER3SIV16SI:
39613 icode = CODE_FOR_avx512f_gathersiv16si;
39614 goto gather_gen;
39615 case IX86_BUILTIN_GATHER3SIV8DI:
39616 icode = CODE_FOR_avx512f_gathersiv8di;
39617 goto gather_gen;
39618 case IX86_BUILTIN_GATHER3DIV16SI:
39619 icode = CODE_FOR_avx512f_gatherdiv16si;
39620 goto gather_gen;
39621 case IX86_BUILTIN_GATHER3DIV8DI:
39622 icode = CODE_FOR_avx512f_gatherdiv8di;
39623 goto gather_gen;
39624 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39625 icode = CODE_FOR_avx512f_gathersiv8df;
39626 goto gather_gen;
39627 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39628 icode = CODE_FOR_avx512f_gatherdiv16sf;
39629 goto gather_gen;
39630 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39631 icode = CODE_FOR_avx512f_gathersiv8di;
39632 goto gather_gen;
39633 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39634 icode = CODE_FOR_avx512f_gatherdiv16si;
39635 goto gather_gen;
39636 case IX86_BUILTIN_GATHER3SIV2DF:
39637 icode = CODE_FOR_avx512vl_gathersiv2df;
39638 goto gather_gen;
39639 case IX86_BUILTIN_GATHER3SIV4DF:
39640 icode = CODE_FOR_avx512vl_gathersiv4df;
39641 goto gather_gen;
39642 case IX86_BUILTIN_GATHER3DIV2DF:
39643 icode = CODE_FOR_avx512vl_gatherdiv2df;
39644 goto gather_gen;
39645 case IX86_BUILTIN_GATHER3DIV4DF:
39646 icode = CODE_FOR_avx512vl_gatherdiv4df;
39647 goto gather_gen;
39648 case IX86_BUILTIN_GATHER3SIV4SF:
39649 icode = CODE_FOR_avx512vl_gathersiv4sf;
39650 goto gather_gen;
39651 case IX86_BUILTIN_GATHER3SIV8SF:
39652 icode = CODE_FOR_avx512vl_gathersiv8sf;
39653 goto gather_gen;
39654 case IX86_BUILTIN_GATHER3DIV4SF:
39655 icode = CODE_FOR_avx512vl_gatherdiv4sf;
39656 goto gather_gen;
39657 case IX86_BUILTIN_GATHER3DIV8SF:
39658 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39659 goto gather_gen;
39660 case IX86_BUILTIN_GATHER3SIV2DI:
39661 icode = CODE_FOR_avx512vl_gathersiv2di;
39662 goto gather_gen;
39663 case IX86_BUILTIN_GATHER3SIV4DI:
39664 icode = CODE_FOR_avx512vl_gathersiv4di;
39665 goto gather_gen;
39666 case IX86_BUILTIN_GATHER3DIV2DI:
39667 icode = CODE_FOR_avx512vl_gatherdiv2di;
39668 goto gather_gen;
39669 case IX86_BUILTIN_GATHER3DIV4DI:
39670 icode = CODE_FOR_avx512vl_gatherdiv4di;
39671 goto gather_gen;
39672 case IX86_BUILTIN_GATHER3SIV4SI:
39673 icode = CODE_FOR_avx512vl_gathersiv4si;
39674 goto gather_gen;
39675 case IX86_BUILTIN_GATHER3SIV8SI:
39676 icode = CODE_FOR_avx512vl_gathersiv8si;
39677 goto gather_gen;
39678 case IX86_BUILTIN_GATHER3DIV4SI:
39679 icode = CODE_FOR_avx512vl_gatherdiv4si;
39680 goto gather_gen;
39681 case IX86_BUILTIN_GATHER3DIV8SI:
39682 icode = CODE_FOR_avx512vl_gatherdiv8si;
39683 goto gather_gen;
39684 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39685 icode = CODE_FOR_avx512vl_gathersiv4df;
39686 goto gather_gen;
39687 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39688 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39689 goto gather_gen;
39690 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39691 icode = CODE_FOR_avx512vl_gathersiv4di;
39692 goto gather_gen;
39693 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39694 icode = CODE_FOR_avx512vl_gatherdiv8si;
39695 goto gather_gen;
39696 case IX86_BUILTIN_SCATTERSIV16SF:
39697 icode = CODE_FOR_avx512f_scattersiv16sf;
39698 goto scatter_gen;
39699 case IX86_BUILTIN_SCATTERSIV8DF:
39700 icode = CODE_FOR_avx512f_scattersiv8df;
39701 goto scatter_gen;
39702 case IX86_BUILTIN_SCATTERDIV16SF:
39703 icode = CODE_FOR_avx512f_scatterdiv16sf;
39704 goto scatter_gen;
39705 case IX86_BUILTIN_SCATTERDIV8DF:
39706 icode = CODE_FOR_avx512f_scatterdiv8df;
39707 goto scatter_gen;
39708 case IX86_BUILTIN_SCATTERSIV16SI:
39709 icode = CODE_FOR_avx512f_scattersiv16si;
39710 goto scatter_gen;
39711 case IX86_BUILTIN_SCATTERSIV8DI:
39712 icode = CODE_FOR_avx512f_scattersiv8di;
39713 goto scatter_gen;
39714 case IX86_BUILTIN_SCATTERDIV16SI:
39715 icode = CODE_FOR_avx512f_scatterdiv16si;
39716 goto scatter_gen;
39717 case IX86_BUILTIN_SCATTERDIV8DI:
39718 icode = CODE_FOR_avx512f_scatterdiv8di;
39719 goto scatter_gen;
39720 case IX86_BUILTIN_SCATTERSIV8SF:
39721 icode = CODE_FOR_avx512vl_scattersiv8sf;
39722 goto scatter_gen;
39723 case IX86_BUILTIN_SCATTERSIV4SF:
39724 icode = CODE_FOR_avx512vl_scattersiv4sf;
39725 goto scatter_gen;
39726 case IX86_BUILTIN_SCATTERSIV4DF:
39727 icode = CODE_FOR_avx512vl_scattersiv4df;
39728 goto scatter_gen;
39729 case IX86_BUILTIN_SCATTERSIV2DF:
39730 icode = CODE_FOR_avx512vl_scattersiv2df;
39731 goto scatter_gen;
39732 case IX86_BUILTIN_SCATTERDIV8SF:
39733 icode = CODE_FOR_avx512vl_scatterdiv8sf;
39734 goto scatter_gen;
39735 case IX86_BUILTIN_SCATTERDIV4SF:
39736 icode = CODE_FOR_avx512vl_scatterdiv4sf;
39737 goto scatter_gen;
39738 case IX86_BUILTIN_SCATTERDIV4DF:
39739 icode = CODE_FOR_avx512vl_scatterdiv4df;
39740 goto scatter_gen;
39741 case IX86_BUILTIN_SCATTERDIV2DF:
39742 icode = CODE_FOR_avx512vl_scatterdiv2df;
39743 goto scatter_gen;
39744 case IX86_BUILTIN_SCATTERSIV8SI:
39745 icode = CODE_FOR_avx512vl_scattersiv8si;
39746 goto scatter_gen;
39747 case IX86_BUILTIN_SCATTERSIV4SI:
39748 icode = CODE_FOR_avx512vl_scattersiv4si;
39749 goto scatter_gen;
39750 case IX86_BUILTIN_SCATTERSIV4DI:
39751 icode = CODE_FOR_avx512vl_scattersiv4di;
39752 goto scatter_gen;
39753 case IX86_BUILTIN_SCATTERSIV2DI:
39754 icode = CODE_FOR_avx512vl_scattersiv2di;
39755 goto scatter_gen;
39756 case IX86_BUILTIN_SCATTERDIV8SI:
39757 icode = CODE_FOR_avx512vl_scatterdiv8si;
39758 goto scatter_gen;
39759 case IX86_BUILTIN_SCATTERDIV4SI:
39760 icode = CODE_FOR_avx512vl_scatterdiv4si;
39761 goto scatter_gen;
39762 case IX86_BUILTIN_SCATTERDIV4DI:
39763 icode = CODE_FOR_avx512vl_scatterdiv4di;
39764 goto scatter_gen;
39765 case IX86_BUILTIN_SCATTERDIV2DI:
39766 icode = CODE_FOR_avx512vl_scatterdiv2di;
39767 goto scatter_gen;
39768 case IX86_BUILTIN_GATHERPFDPD:
39769 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
39770 goto vec_prefetch_gen;
39771 case IX86_BUILTIN_GATHERPFDPS:
39772 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
39773 goto vec_prefetch_gen;
39774 case IX86_BUILTIN_GATHERPFQPD:
39775 icode = CODE_FOR_avx512pf_gatherpfv8didf;
39776 goto vec_prefetch_gen;
39777 case IX86_BUILTIN_GATHERPFQPS:
39778 icode = CODE_FOR_avx512pf_gatherpfv8disf;
39779 goto vec_prefetch_gen;
39780 case IX86_BUILTIN_SCATTERPFDPD:
39781 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
39782 goto vec_prefetch_gen;
39783 case IX86_BUILTIN_SCATTERPFDPS:
39784 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
39785 goto vec_prefetch_gen;
39786 case IX86_BUILTIN_SCATTERPFQPD:
39787 icode = CODE_FOR_avx512pf_scatterpfv8didf;
39788 goto vec_prefetch_gen;
39789 case IX86_BUILTIN_SCATTERPFQPS:
39790 icode = CODE_FOR_avx512pf_scatterpfv8disf;
39791 goto vec_prefetch_gen;
39793 gather_gen:
39794 rtx half;
39795 rtx (*gen) (rtx, rtx);
39797 arg0 = CALL_EXPR_ARG (exp, 0);
39798 arg1 = CALL_EXPR_ARG (exp, 1);
39799 arg2 = CALL_EXPR_ARG (exp, 2);
39800 arg3 = CALL_EXPR_ARG (exp, 3);
39801 arg4 = CALL_EXPR_ARG (exp, 4);
39802 op0 = expand_normal (arg0);
39803 op1 = expand_normal (arg1);
39804 op2 = expand_normal (arg2);
39805 op3 = expand_normal (arg3);
39806 op4 = expand_normal (arg4);
39807 /* Note the arg order is different from the operand order. */
39808 mode0 = insn_data[icode].operand[1].mode;
39809 mode2 = insn_data[icode].operand[3].mode;
39810 mode3 = insn_data[icode].operand[4].mode;
39811 mode4 = insn_data[icode].operand[5].mode;
39813 if (target == NULL_RTX
39814 || GET_MODE (target) != insn_data[icode].operand[0].mode
39815 || !insn_data[icode].operand[0].predicate (target,
39816 GET_MODE (target)))
39817 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
39818 else
39819 subtarget = target;
39821 switch (fcode)
39823 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39824 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39825 half = gen_reg_rtx (V8SImode);
39826 if (!nonimmediate_operand (op2, V16SImode))
39827 op2 = copy_to_mode_reg (V16SImode, op2);
39828 emit_insn (gen_vec_extract_lo_v16si (half, op2));
39829 op2 = half;
39830 break;
39831 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39832 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39833 case IX86_BUILTIN_GATHERALTSIV4DF:
39834 case IX86_BUILTIN_GATHERALTSIV4DI:
39835 half = gen_reg_rtx (V4SImode);
39836 if (!nonimmediate_operand (op2, V8SImode))
39837 op2 = copy_to_mode_reg (V8SImode, op2);
39838 emit_insn (gen_vec_extract_lo_v8si (half, op2));
39839 op2 = half;
39840 break;
39841 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39842 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39843 half = gen_reg_rtx (mode0);
39844 if (mode0 == V8SFmode)
39845 gen = gen_vec_extract_lo_v16sf;
39846 else
39847 gen = gen_vec_extract_lo_v16si;
39848 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39849 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39850 emit_insn (gen (half, op0));
39851 op0 = half;
39852 if (GET_MODE (op3) != VOIDmode)
39854 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39855 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39856 emit_insn (gen (half, op3));
39857 op3 = half;
39859 break;
39860 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39861 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39862 case IX86_BUILTIN_GATHERALTDIV8SF:
39863 case IX86_BUILTIN_GATHERALTDIV8SI:
39864 half = gen_reg_rtx (mode0);
39865 if (mode0 == V4SFmode)
39866 gen = gen_vec_extract_lo_v8sf;
39867 else
39868 gen = gen_vec_extract_lo_v8si;
39869 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39870 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39871 emit_insn (gen (half, op0));
39872 op0 = half;
39873 if (GET_MODE (op3) != VOIDmode)
39875 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39876 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39877 emit_insn (gen (half, op3));
39878 op3 = half;
39880 break;
39881 default:
39882 break;
39885 /* Force memory operand only with base register here. But we
39886 don't want to do it on memory operand for other builtin
39887 functions. */
39888 op1 = ix86_zero_extend_to_Pmode (op1);
39890 if (!insn_data[icode].operand[1].predicate (op0, mode0))
39891 op0 = copy_to_mode_reg (mode0, op0);
39892 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
39893 op1 = copy_to_mode_reg (Pmode, op1);
39894 if (!insn_data[icode].operand[3].predicate (op2, mode2))
39895 op2 = copy_to_mode_reg (mode2, op2);
39897 op3 = fixup_modeless_constant (op3, mode3);
39899 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
39901 if (!insn_data[icode].operand[4].predicate (op3, mode3))
39902 op3 = copy_to_mode_reg (mode3, op3);
39904 else
39906 op3 = copy_to_reg (op3);
39907 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
39909 if (!insn_data[icode].operand[5].predicate (op4, mode4))
39911 error ("the last argument must be scale 1, 2, 4, 8");
39912 return const0_rtx;
39915 /* Optimize. If mask is known to have all high bits set,
39916 replace op0 with pc_rtx to signal that the instruction
39917 overwrites the whole destination and doesn't use its
39918 previous contents. */
39919 if (optimize)
39921 if (TREE_CODE (arg3) == INTEGER_CST)
39923 if (integer_all_onesp (arg3))
39924 op0 = pc_rtx;
39926 else if (TREE_CODE (arg3) == VECTOR_CST)
39928 unsigned int negative = 0;
39929 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
39931 tree cst = VECTOR_CST_ELT (arg3, i);
39932 if (TREE_CODE (cst) == INTEGER_CST
39933 && tree_int_cst_sign_bit (cst))
39934 negative++;
39935 else if (TREE_CODE (cst) == REAL_CST
39936 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
39937 negative++;
39939 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
39940 op0 = pc_rtx;
39942 else if (TREE_CODE (arg3) == SSA_NAME
39943 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
39945 /* Recognize also when mask is like:
39946 __v2df src = _mm_setzero_pd ();
39947 __v2df mask = _mm_cmpeq_pd (src, src);
39949 __v8sf src = _mm256_setzero_ps ();
39950 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
39951 as that is a cheaper way to load all ones into
39952 a register than having to load a constant from
39953 memory. */
39954 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
39955 if (is_gimple_call (def_stmt))
39957 tree fndecl = gimple_call_fndecl (def_stmt);
39958 if (fndecl
39959 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
39960 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
39962 case IX86_BUILTIN_CMPPD:
39963 case IX86_BUILTIN_CMPPS:
39964 case IX86_BUILTIN_CMPPD256:
39965 case IX86_BUILTIN_CMPPS256:
39966 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
39967 break;
39968 /* FALLTHRU */
39969 case IX86_BUILTIN_CMPEQPD:
39970 case IX86_BUILTIN_CMPEQPS:
39971 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
39972 && initializer_zerop (gimple_call_arg (def_stmt,
39973 1)))
39974 op0 = pc_rtx;
39975 break;
39976 default:
39977 break;
39983 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
39984 if (! pat)
39985 return const0_rtx;
39986 emit_insn (pat);
39988 switch (fcode)
39990 case IX86_BUILTIN_GATHER3DIV16SF:
39991 if (target == NULL_RTX)
39992 target = gen_reg_rtx (V8SFmode);
39993 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
39994 break;
39995 case IX86_BUILTIN_GATHER3DIV16SI:
39996 if (target == NULL_RTX)
39997 target = gen_reg_rtx (V8SImode);
39998 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
39999 break;
40000 case IX86_BUILTIN_GATHER3DIV8SF:
40001 case IX86_BUILTIN_GATHERDIV8SF:
40002 if (target == NULL_RTX)
40003 target = gen_reg_rtx (V4SFmode);
40004 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
40005 break;
40006 case IX86_BUILTIN_GATHER3DIV8SI:
40007 case IX86_BUILTIN_GATHERDIV8SI:
40008 if (target == NULL_RTX)
40009 target = gen_reg_rtx (V4SImode);
40010 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
40011 break;
40012 default:
40013 target = subtarget;
40014 break;
40016 return target;
40018 scatter_gen:
40019 arg0 = CALL_EXPR_ARG (exp, 0);
40020 arg1 = CALL_EXPR_ARG (exp, 1);
40021 arg2 = CALL_EXPR_ARG (exp, 2);
40022 arg3 = CALL_EXPR_ARG (exp, 3);
40023 arg4 = CALL_EXPR_ARG (exp, 4);
40024 op0 = expand_normal (arg0);
40025 op1 = expand_normal (arg1);
40026 op2 = expand_normal (arg2);
40027 op3 = expand_normal (arg3);
40028 op4 = expand_normal (arg4);
40029 mode1 = insn_data[icode].operand[1].mode;
40030 mode2 = insn_data[icode].operand[2].mode;
40031 mode3 = insn_data[icode].operand[3].mode;
40032 mode4 = insn_data[icode].operand[4].mode;
40034 /* Force memory operand only with base register here. But we
40035 don't want to do it on memory operand for other builtin
40036 functions. */
40037 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
40039 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40040 op0 = copy_to_mode_reg (Pmode, op0);
40042 op1 = fixup_modeless_constant (op1, mode1);
40044 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
40046 if (!insn_data[icode].operand[1].predicate (op1, mode1))
40047 op1 = copy_to_mode_reg (mode1, op1);
40049 else
40051 op1 = copy_to_reg (op1);
40052 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
40055 if (!insn_data[icode].operand[2].predicate (op2, mode2))
40056 op2 = copy_to_mode_reg (mode2, op2);
40058 if (!insn_data[icode].operand[3].predicate (op3, mode3))
40059 op3 = copy_to_mode_reg (mode3, op3);
40061 if (!insn_data[icode].operand[4].predicate (op4, mode4))
40063 error ("the last argument must be scale 1, 2, 4, 8");
40064 return const0_rtx;
40067 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
40068 if (! pat)
40069 return const0_rtx;
40071 emit_insn (pat);
40072 return 0;
40074 vec_prefetch_gen:
40075 arg0 = CALL_EXPR_ARG (exp, 0);
40076 arg1 = CALL_EXPR_ARG (exp, 1);
40077 arg2 = CALL_EXPR_ARG (exp, 2);
40078 arg3 = CALL_EXPR_ARG (exp, 3);
40079 arg4 = CALL_EXPR_ARG (exp, 4);
40080 op0 = expand_normal (arg0);
40081 op1 = expand_normal (arg1);
40082 op2 = expand_normal (arg2);
40083 op3 = expand_normal (arg3);
40084 op4 = expand_normal (arg4);
40085 mode0 = insn_data[icode].operand[0].mode;
40086 mode1 = insn_data[icode].operand[1].mode;
40087 mode3 = insn_data[icode].operand[3].mode;
40088 mode4 = insn_data[icode].operand[4].mode;
40090 op0 = fixup_modeless_constant (op0, mode0);
40092 if (GET_MODE (op0) == mode0
40093 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
40095 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40096 op0 = copy_to_mode_reg (mode0, op0);
40098 else if (op0 != constm1_rtx)
40100 op0 = copy_to_reg (op0);
40101 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
40104 if (!insn_data[icode].operand[1].predicate (op1, mode1))
40105 op1 = copy_to_mode_reg (mode1, op1);
40107 /* Force memory operand only with base register here. But we
40108 don't want to do it on memory operand for other builtin
40109 functions. */
40110 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
40112 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
40113 op2 = copy_to_mode_reg (Pmode, op2);
40115 if (!insn_data[icode].operand[3].predicate (op3, mode3))
40117 error ("the forth argument must be scale 1, 2, 4, 8");
40118 return const0_rtx;
40121 if (!insn_data[icode].operand[4].predicate (op4, mode4))
40123 error ("incorrect hint operand");
40124 return const0_rtx;
40127 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
40128 if (! pat)
40129 return const0_rtx;
40131 emit_insn (pat);
40133 return 0;
40135 case IX86_BUILTIN_XABORT:
40136 icode = CODE_FOR_xabort;
40137 arg0 = CALL_EXPR_ARG (exp, 0);
40138 op0 = expand_normal (arg0);
40139 mode0 = insn_data[icode].operand[0].mode;
40140 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40142 error ("the xabort's argument must be an 8-bit immediate");
40143 return const0_rtx;
40145 emit_insn (gen_xabort (op0));
40146 return 0;
40148 default:
40149 break;
40152 for (i = 0, d = bdesc_special_args;
40153 i < ARRAY_SIZE (bdesc_special_args);
40154 i++, d++)
40155 if (d->code == fcode)
40156 return ix86_expand_special_args_builtin (d, exp, target);
40158 for (i = 0, d = bdesc_args;
40159 i < ARRAY_SIZE (bdesc_args);
40160 i++, d++)
40161 if (d->code == fcode)
40162 switch (fcode)
40164 case IX86_BUILTIN_FABSQ:
40165 case IX86_BUILTIN_COPYSIGNQ:
40166 if (!TARGET_SSE)
40167 /* Emit a normal call if SSE isn't available. */
40168 return expand_call (exp, target, ignore);
40169 default:
40170 return ix86_expand_args_builtin (d, exp, target);
40173 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
40174 if (d->code == fcode)
40175 return ix86_expand_sse_comi (d, exp, target);
40177 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
40178 if (d->code == fcode)
40179 return ix86_expand_round_builtin (d, exp, target);
40181 for (i = 0, d = bdesc_pcmpestr;
40182 i < ARRAY_SIZE (bdesc_pcmpestr);
40183 i++, d++)
40184 if (d->code == fcode)
40185 return ix86_expand_sse_pcmpestr (d, exp, target);
40187 for (i = 0, d = bdesc_pcmpistr;
40188 i < ARRAY_SIZE (bdesc_pcmpistr);
40189 i++, d++)
40190 if (d->code == fcode)
40191 return ix86_expand_sse_pcmpistr (d, exp, target);
40193 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
40194 if (d->code == fcode)
40195 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
40196 (enum ix86_builtin_func_type)
40197 d->flag, d->comparison);
40199 gcc_unreachable ();
40202 /* This returns the target-specific builtin with code CODE if
40203 current_function_decl has visibility on this builtin, which is checked
40204 using isa flags. Returns NULL_TREE otherwise. */
40206 static tree ix86_get_builtin (enum ix86_builtins code)
40208 struct cl_target_option *opts;
40209 tree target_tree = NULL_TREE;
40211 /* Determine the isa flags of current_function_decl. */
40213 if (current_function_decl)
40214 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
40216 if (target_tree == NULL)
40217 target_tree = target_option_default_node;
40219 opts = TREE_TARGET_OPTION (target_tree);
40221 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
40222 return ix86_builtin_decl (code, true);
40223 else
40224 return NULL_TREE;
40227 /* Return function decl for target specific builtin
40228 for given MPX builtin passed i FCODE. */
40229 static tree
40230 ix86_builtin_mpx_function (unsigned fcode)
40232 switch (fcode)
40234 case BUILT_IN_CHKP_BNDMK:
40235 return ix86_builtins[IX86_BUILTIN_BNDMK];
40237 case BUILT_IN_CHKP_BNDSTX:
40238 return ix86_builtins[IX86_BUILTIN_BNDSTX];
40240 case BUILT_IN_CHKP_BNDLDX:
40241 return ix86_builtins[IX86_BUILTIN_BNDLDX];
40243 case BUILT_IN_CHKP_BNDCL:
40244 return ix86_builtins[IX86_BUILTIN_BNDCL];
40246 case BUILT_IN_CHKP_BNDCU:
40247 return ix86_builtins[IX86_BUILTIN_BNDCU];
40249 case BUILT_IN_CHKP_BNDRET:
40250 return ix86_builtins[IX86_BUILTIN_BNDRET];
40252 case BUILT_IN_CHKP_INTERSECT:
40253 return ix86_builtins[IX86_BUILTIN_BNDINT];
40255 case BUILT_IN_CHKP_NARROW:
40256 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
40258 case BUILT_IN_CHKP_SIZEOF:
40259 return ix86_builtins[IX86_BUILTIN_SIZEOF];
40261 case BUILT_IN_CHKP_EXTRACT_LOWER:
40262 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
40264 case BUILT_IN_CHKP_EXTRACT_UPPER:
40265 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
40267 default:
40268 return NULL_TREE;
40271 gcc_unreachable ();
40274 /* Helper function for ix86_load_bounds and ix86_store_bounds.
40276 Return an address to be used to load/store bounds for pointer
40277 passed in SLOT.
40279 SLOT_NO is an integer constant holding number of a target
40280 dependent special slot to be used in case SLOT is not a memory.
40282 SPECIAL_BASE is a pointer to be used as a base of fake address
40283 to access special slots in Bounds Table. SPECIAL_BASE[-1],
40284 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
40286 static rtx
40287 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
40289 rtx addr = NULL;
40291 /* NULL slot means we pass bounds for pointer not passed to the
40292 function at all. Register slot means we pass pointer in a
40293 register. In both these cases bounds are passed via Bounds
40294 Table. Since we do not have actual pointer stored in memory,
40295 we have to use fake addresses to access Bounds Table. We
40296 start with (special_base - sizeof (void*)) and decrease this
40297 address by pointer size to get addresses for other slots. */
40298 if (!slot || REG_P (slot))
40300 gcc_assert (CONST_INT_P (slot_no));
40301 addr = plus_constant (Pmode, special_base,
40302 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
40304 /* If pointer is passed in a memory then its address is used to
40305 access Bounds Table. */
40306 else if (MEM_P (slot))
40308 addr = XEXP (slot, 0);
40309 if (!register_operand (addr, Pmode))
40310 addr = copy_addr_to_reg (addr);
40312 else
40313 gcc_unreachable ();
40315 return addr;
40318 /* Expand pass uses this hook to load bounds for function parameter
40319 PTR passed in SLOT in case its bounds are not passed in a register.
40321 If SLOT is a memory, then bounds are loaded as for regular pointer
40322 loaded from memory. PTR may be NULL in case SLOT is a memory.
40323 In such case value of PTR (if required) may be loaded from SLOT.
40325 If SLOT is NULL or a register then SLOT_NO is an integer constant
40326 holding number of the target dependent special slot which should be
40327 used to obtain bounds.
40329 Return loaded bounds. */
40331 static rtx
40332 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
40334 rtx reg = gen_reg_rtx (BNDmode);
40335 rtx addr;
40337 /* Get address to be used to access Bounds Table. Special slots start
40338 at the location of return address of the current function. */
40339 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
40341 /* Load pointer value from a memory if we don't have it. */
40342 if (!ptr)
40344 gcc_assert (MEM_P (slot));
40345 ptr = copy_addr_to_reg (slot);
40348 emit_insn (BNDmode == BND64mode
40349 ? gen_bnd64_ldx (reg, addr, ptr)
40350 : gen_bnd32_ldx (reg, addr, ptr));
40352 return reg;
40355 /* Expand pass uses this hook to store BOUNDS for call argument PTR
40356 passed in SLOT in case BOUNDS are not passed in a register.
40358 If SLOT is a memory, then BOUNDS are stored as for regular pointer
40359 stored in memory. PTR may be NULL in case SLOT is a memory.
40360 In such case value of PTR (if required) may be loaded from SLOT.
40362 If SLOT is NULL or a register then SLOT_NO is an integer constant
40363 holding number of the target dependent special slot which should be
40364 used to store BOUNDS. */
40366 static void
40367 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
40369 rtx addr;
40371 /* Get address to be used to access Bounds Table. Special slots start
40372 at the location of return address of a called function. */
40373 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
40375 /* Load pointer value from a memory if we don't have it. */
40376 if (!ptr)
40378 gcc_assert (MEM_P (slot));
40379 ptr = copy_addr_to_reg (slot);
40382 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
40383 if (!register_operand (bounds, BNDmode))
40384 bounds = copy_to_mode_reg (BNDmode, bounds);
40386 emit_insn (BNDmode == BND64mode
40387 ? gen_bnd64_stx (addr, ptr, bounds)
40388 : gen_bnd32_stx (addr, ptr, bounds));
40391 /* Load and return bounds returned by function in SLOT. */
40393 static rtx
40394 ix86_load_returned_bounds (rtx slot)
40396 rtx res;
40398 gcc_assert (REG_P (slot));
40399 res = gen_reg_rtx (BNDmode);
40400 emit_move_insn (res, slot);
40402 return res;
40405 /* Store BOUNDS returned by function into SLOT. */
40407 static void
40408 ix86_store_returned_bounds (rtx slot, rtx bounds)
40410 gcc_assert (REG_P (slot));
40411 emit_move_insn (slot, bounds);
40414 /* Returns a function decl for a vectorized version of the builtin function
40415 with builtin function code FN and the result vector type TYPE, or NULL_TREE
40416 if it is not available. */
40418 static tree
40419 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
40420 tree type_in)
40422 machine_mode in_mode, out_mode;
40423 int in_n, out_n;
40424 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
40426 if (TREE_CODE (type_out) != VECTOR_TYPE
40427 || TREE_CODE (type_in) != VECTOR_TYPE
40428 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
40429 return NULL_TREE;
40431 out_mode = TYPE_MODE (TREE_TYPE (type_out));
40432 out_n = TYPE_VECTOR_SUBPARTS (type_out);
40433 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40434 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40436 switch (fn)
40438 case BUILT_IN_SQRT:
40439 if (out_mode == DFmode && in_mode == DFmode)
40441 if (out_n == 2 && in_n == 2)
40442 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
40443 else if (out_n == 4 && in_n == 4)
40444 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
40445 else if (out_n == 8 && in_n == 8)
40446 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
40448 break;
40450 case BUILT_IN_EXP2F:
40451 if (out_mode == SFmode && in_mode == SFmode)
40453 if (out_n == 16 && in_n == 16)
40454 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
40456 break;
40458 case BUILT_IN_SQRTF:
40459 if (out_mode == SFmode && in_mode == SFmode)
40461 if (out_n == 4 && in_n == 4)
40462 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
40463 else if (out_n == 8 && in_n == 8)
40464 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
40465 else if (out_n == 16 && in_n == 16)
40466 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
40468 break;
40470 case BUILT_IN_IFLOOR:
40471 case BUILT_IN_LFLOOR:
40472 case BUILT_IN_LLFLOOR:
40473 /* The round insn does not trap on denormals. */
40474 if (flag_trapping_math || !TARGET_ROUND)
40475 break;
40477 if (out_mode == SImode && in_mode == DFmode)
40479 if (out_n == 4 && in_n == 2)
40480 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
40481 else if (out_n == 8 && in_n == 4)
40482 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
40483 else if (out_n == 16 && in_n == 8)
40484 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
40486 break;
40488 case BUILT_IN_IFLOORF:
40489 case BUILT_IN_LFLOORF:
40490 case BUILT_IN_LLFLOORF:
40491 /* The round insn does not trap on denormals. */
40492 if (flag_trapping_math || !TARGET_ROUND)
40493 break;
40495 if (out_mode == SImode && in_mode == SFmode)
40497 if (out_n == 4 && in_n == 4)
40498 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
40499 else if (out_n == 8 && in_n == 8)
40500 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
40502 break;
40504 case BUILT_IN_ICEIL:
40505 case BUILT_IN_LCEIL:
40506 case BUILT_IN_LLCEIL:
40507 /* The round insn does not trap on denormals. */
40508 if (flag_trapping_math || !TARGET_ROUND)
40509 break;
40511 if (out_mode == SImode && in_mode == DFmode)
40513 if (out_n == 4 && in_n == 2)
40514 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
40515 else if (out_n == 8 && in_n == 4)
40516 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
40517 else if (out_n == 16 && in_n == 8)
40518 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
40520 break;
40522 case BUILT_IN_ICEILF:
40523 case BUILT_IN_LCEILF:
40524 case BUILT_IN_LLCEILF:
40525 /* The round insn does not trap on denormals. */
40526 if (flag_trapping_math || !TARGET_ROUND)
40527 break;
40529 if (out_mode == SImode && in_mode == SFmode)
40531 if (out_n == 4 && in_n == 4)
40532 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
40533 else if (out_n == 8 && in_n == 8)
40534 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
40536 break;
40538 case BUILT_IN_IRINT:
40539 case BUILT_IN_LRINT:
40540 case BUILT_IN_LLRINT:
40541 if (out_mode == SImode && in_mode == DFmode)
40543 if (out_n == 4 && in_n == 2)
40544 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
40545 else if (out_n == 8 && in_n == 4)
40546 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
40548 break;
40550 case BUILT_IN_IRINTF:
40551 case BUILT_IN_LRINTF:
40552 case BUILT_IN_LLRINTF:
40553 if (out_mode == SImode && in_mode == SFmode)
40555 if (out_n == 4 && in_n == 4)
40556 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
40557 else if (out_n == 8 && in_n == 8)
40558 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
40560 break;
40562 case BUILT_IN_IROUND:
40563 case BUILT_IN_LROUND:
40564 case BUILT_IN_LLROUND:
40565 /* The round insn does not trap on denormals. */
40566 if (flag_trapping_math || !TARGET_ROUND)
40567 break;
40569 if (out_mode == SImode && in_mode == DFmode)
40571 if (out_n == 4 && in_n == 2)
40572 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
40573 else if (out_n == 8 && in_n == 4)
40574 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
40575 else if (out_n == 16 && in_n == 8)
40576 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
40578 break;
40580 case BUILT_IN_IROUNDF:
40581 case BUILT_IN_LROUNDF:
40582 case BUILT_IN_LLROUNDF:
40583 /* The round insn does not trap on denormals. */
40584 if (flag_trapping_math || !TARGET_ROUND)
40585 break;
40587 if (out_mode == SImode && in_mode == SFmode)
40589 if (out_n == 4 && in_n == 4)
40590 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
40591 else if (out_n == 8 && in_n == 8)
40592 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
40594 break;
40596 case BUILT_IN_COPYSIGN:
40597 if (out_mode == DFmode && in_mode == DFmode)
40599 if (out_n == 2 && in_n == 2)
40600 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
40601 else if (out_n == 4 && in_n == 4)
40602 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
40603 else if (out_n == 8 && in_n == 8)
40604 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
40606 break;
40608 case BUILT_IN_COPYSIGNF:
40609 if (out_mode == SFmode && in_mode == SFmode)
40611 if (out_n == 4 && in_n == 4)
40612 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
40613 else if (out_n == 8 && in_n == 8)
40614 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
40615 else if (out_n == 16 && in_n == 16)
40616 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
40618 break;
40620 case BUILT_IN_FLOOR:
40621 /* The round insn does not trap on denormals. */
40622 if (flag_trapping_math || !TARGET_ROUND)
40623 break;
40625 if (out_mode == DFmode && in_mode == DFmode)
40627 if (out_n == 2 && in_n == 2)
40628 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
40629 else if (out_n == 4 && in_n == 4)
40630 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
40632 break;
40634 case BUILT_IN_FLOORF:
40635 /* The round insn does not trap on denormals. */
40636 if (flag_trapping_math || !TARGET_ROUND)
40637 break;
40639 if (out_mode == SFmode && in_mode == SFmode)
40641 if (out_n == 4 && in_n == 4)
40642 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
40643 else if (out_n == 8 && in_n == 8)
40644 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
40646 break;
40648 case BUILT_IN_CEIL:
40649 /* The round insn does not trap on denormals. */
40650 if (flag_trapping_math || !TARGET_ROUND)
40651 break;
40653 if (out_mode == DFmode && in_mode == DFmode)
40655 if (out_n == 2 && in_n == 2)
40656 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
40657 else if (out_n == 4 && in_n == 4)
40658 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
40660 break;
40662 case BUILT_IN_CEILF:
40663 /* The round insn does not trap on denormals. */
40664 if (flag_trapping_math || !TARGET_ROUND)
40665 break;
40667 if (out_mode == SFmode && in_mode == SFmode)
40669 if (out_n == 4 && in_n == 4)
40670 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
40671 else if (out_n == 8 && in_n == 8)
40672 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
40674 break;
40676 case BUILT_IN_TRUNC:
40677 /* The round insn does not trap on denormals. */
40678 if (flag_trapping_math || !TARGET_ROUND)
40679 break;
40681 if (out_mode == DFmode && in_mode == DFmode)
40683 if (out_n == 2 && in_n == 2)
40684 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
40685 else if (out_n == 4 && in_n == 4)
40686 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
40688 break;
40690 case BUILT_IN_TRUNCF:
40691 /* The round insn does not trap on denormals. */
40692 if (flag_trapping_math || !TARGET_ROUND)
40693 break;
40695 if (out_mode == SFmode && in_mode == SFmode)
40697 if (out_n == 4 && in_n == 4)
40698 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
40699 else if (out_n == 8 && in_n == 8)
40700 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
40702 break;
40704 case BUILT_IN_RINT:
40705 /* The round insn does not trap on denormals. */
40706 if (flag_trapping_math || !TARGET_ROUND)
40707 break;
40709 if (out_mode == DFmode && in_mode == DFmode)
40711 if (out_n == 2 && in_n == 2)
40712 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
40713 else if (out_n == 4 && in_n == 4)
40714 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
40716 break;
40718 case BUILT_IN_RINTF:
40719 /* The round insn does not trap on denormals. */
40720 if (flag_trapping_math || !TARGET_ROUND)
40721 break;
40723 if (out_mode == SFmode && in_mode == SFmode)
40725 if (out_n == 4 && in_n == 4)
40726 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
40727 else if (out_n == 8 && in_n == 8)
40728 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
40730 break;
40732 case BUILT_IN_ROUND:
40733 /* The round insn does not trap on denormals. */
40734 if (flag_trapping_math || !TARGET_ROUND)
40735 break;
40737 if (out_mode == DFmode && in_mode == DFmode)
40739 if (out_n == 2 && in_n == 2)
40740 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
40741 else if (out_n == 4 && in_n == 4)
40742 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
40744 break;
40746 case BUILT_IN_ROUNDF:
40747 /* The round insn does not trap on denormals. */
40748 if (flag_trapping_math || !TARGET_ROUND)
40749 break;
40751 if (out_mode == SFmode && in_mode == SFmode)
40753 if (out_n == 4 && in_n == 4)
40754 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
40755 else if (out_n == 8 && in_n == 8)
40756 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
40758 break;
40760 case BUILT_IN_FMA:
40761 if (out_mode == DFmode && in_mode == DFmode)
40763 if (out_n == 2 && in_n == 2)
40764 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
40765 if (out_n == 4 && in_n == 4)
40766 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
40768 break;
40770 case BUILT_IN_FMAF:
40771 if (out_mode == SFmode && in_mode == SFmode)
40773 if (out_n == 4 && in_n == 4)
40774 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
40775 if (out_n == 8 && in_n == 8)
40776 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
40778 break;
40780 default:
40781 break;
40784 /* Dispatch to a handler for a vectorization library. */
40785 if (ix86_veclib_handler)
40786 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
40787 type_in);
40789 return NULL_TREE;
40792 /* Handler for an SVML-style interface to
40793 a library with vectorized intrinsics. */
40795 static tree
40796 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
40798 char name[20];
40799 tree fntype, new_fndecl, args;
40800 unsigned arity;
40801 const char *bname;
40802 machine_mode el_mode, in_mode;
40803 int n, in_n;
40805 /* The SVML is suitable for unsafe math only. */
40806 if (!flag_unsafe_math_optimizations)
40807 return NULL_TREE;
40809 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40810 n = TYPE_VECTOR_SUBPARTS (type_out);
40811 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40812 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40813 if (el_mode != in_mode
40814 || n != in_n)
40815 return NULL_TREE;
40817 switch (fn)
40819 case BUILT_IN_EXP:
40820 case BUILT_IN_LOG:
40821 case BUILT_IN_LOG10:
40822 case BUILT_IN_POW:
40823 case BUILT_IN_TANH:
40824 case BUILT_IN_TAN:
40825 case BUILT_IN_ATAN:
40826 case BUILT_IN_ATAN2:
40827 case BUILT_IN_ATANH:
40828 case BUILT_IN_CBRT:
40829 case BUILT_IN_SINH:
40830 case BUILT_IN_SIN:
40831 case BUILT_IN_ASINH:
40832 case BUILT_IN_ASIN:
40833 case BUILT_IN_COSH:
40834 case BUILT_IN_COS:
40835 case BUILT_IN_ACOSH:
40836 case BUILT_IN_ACOS:
40837 if (el_mode != DFmode || n != 2)
40838 return NULL_TREE;
40839 break;
40841 case BUILT_IN_EXPF:
40842 case BUILT_IN_LOGF:
40843 case BUILT_IN_LOG10F:
40844 case BUILT_IN_POWF:
40845 case BUILT_IN_TANHF:
40846 case BUILT_IN_TANF:
40847 case BUILT_IN_ATANF:
40848 case BUILT_IN_ATAN2F:
40849 case BUILT_IN_ATANHF:
40850 case BUILT_IN_CBRTF:
40851 case BUILT_IN_SINHF:
40852 case BUILT_IN_SINF:
40853 case BUILT_IN_ASINHF:
40854 case BUILT_IN_ASINF:
40855 case BUILT_IN_COSHF:
40856 case BUILT_IN_COSF:
40857 case BUILT_IN_ACOSHF:
40858 case BUILT_IN_ACOSF:
40859 if (el_mode != SFmode || n != 4)
40860 return NULL_TREE;
40861 break;
40863 default:
40864 return NULL_TREE;
40867 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40869 if (fn == BUILT_IN_LOGF)
40870 strcpy (name, "vmlsLn4");
40871 else if (fn == BUILT_IN_LOG)
40872 strcpy (name, "vmldLn2");
40873 else if (n == 4)
40875 sprintf (name, "vmls%s", bname+10);
40876 name[strlen (name)-1] = '4';
40878 else
40879 sprintf (name, "vmld%s2", bname+10);
40881 /* Convert to uppercase. */
40882 name[4] &= ~0x20;
40884 arity = 0;
40885 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40886 args;
40887 args = TREE_CHAIN (args))
40888 arity++;
40890 if (arity == 1)
40891 fntype = build_function_type_list (type_out, type_in, NULL);
40892 else
40893 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40895 /* Build a function declaration for the vectorized function. */
40896 new_fndecl = build_decl (BUILTINS_LOCATION,
40897 FUNCTION_DECL, get_identifier (name), fntype);
40898 TREE_PUBLIC (new_fndecl) = 1;
40899 DECL_EXTERNAL (new_fndecl) = 1;
40900 DECL_IS_NOVOPS (new_fndecl) = 1;
40901 TREE_READONLY (new_fndecl) = 1;
40903 return new_fndecl;
40906 /* Handler for an ACML-style interface to
40907 a library with vectorized intrinsics. */
40909 static tree
40910 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
40912 char name[20] = "__vr.._";
40913 tree fntype, new_fndecl, args;
40914 unsigned arity;
40915 const char *bname;
40916 machine_mode el_mode, in_mode;
40917 int n, in_n;
40919 /* The ACML is 64bits only and suitable for unsafe math only as
40920 it does not correctly support parts of IEEE with the required
40921 precision such as denormals. */
40922 if (!TARGET_64BIT
40923 || !flag_unsafe_math_optimizations)
40924 return NULL_TREE;
40926 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40927 n = TYPE_VECTOR_SUBPARTS (type_out);
40928 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40929 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40930 if (el_mode != in_mode
40931 || n != in_n)
40932 return NULL_TREE;
40934 switch (fn)
40936 case BUILT_IN_SIN:
40937 case BUILT_IN_COS:
40938 case BUILT_IN_EXP:
40939 case BUILT_IN_LOG:
40940 case BUILT_IN_LOG2:
40941 case BUILT_IN_LOG10:
40942 name[4] = 'd';
40943 name[5] = '2';
40944 if (el_mode != DFmode
40945 || n != 2)
40946 return NULL_TREE;
40947 break;
40949 case BUILT_IN_SINF:
40950 case BUILT_IN_COSF:
40951 case BUILT_IN_EXPF:
40952 case BUILT_IN_POWF:
40953 case BUILT_IN_LOGF:
40954 case BUILT_IN_LOG2F:
40955 case BUILT_IN_LOG10F:
40956 name[4] = 's';
40957 name[5] = '4';
40958 if (el_mode != SFmode
40959 || n != 4)
40960 return NULL_TREE;
40961 break;
40963 default:
40964 return NULL_TREE;
40967 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40968 sprintf (name + 7, "%s", bname+10);
40970 arity = 0;
40971 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40972 args;
40973 args = TREE_CHAIN (args))
40974 arity++;
40976 if (arity == 1)
40977 fntype = build_function_type_list (type_out, type_in, NULL);
40978 else
40979 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40981 /* Build a function declaration for the vectorized function. */
40982 new_fndecl = build_decl (BUILTINS_LOCATION,
40983 FUNCTION_DECL, get_identifier (name), fntype);
40984 TREE_PUBLIC (new_fndecl) = 1;
40985 DECL_EXTERNAL (new_fndecl) = 1;
40986 DECL_IS_NOVOPS (new_fndecl) = 1;
40987 TREE_READONLY (new_fndecl) = 1;
40989 return new_fndecl;
40992 /* Returns a decl of a function that implements gather load with
40993 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
40994 Return NULL_TREE if it is not available. */
40996 static tree
40997 ix86_vectorize_builtin_gather (const_tree mem_vectype,
40998 const_tree index_type, int scale)
41000 bool si;
41001 enum ix86_builtins code;
41003 if (! TARGET_AVX2)
41004 return NULL_TREE;
41006 if ((TREE_CODE (index_type) != INTEGER_TYPE
41007 && !POINTER_TYPE_P (index_type))
41008 || (TYPE_MODE (index_type) != SImode
41009 && TYPE_MODE (index_type) != DImode))
41010 return NULL_TREE;
41012 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
41013 return NULL_TREE;
41015 /* v*gather* insn sign extends index to pointer mode. */
41016 if (TYPE_PRECISION (index_type) < POINTER_SIZE
41017 && TYPE_UNSIGNED (index_type))
41018 return NULL_TREE;
41020 if (scale <= 0
41021 || scale > 8
41022 || (scale & (scale - 1)) != 0)
41023 return NULL_TREE;
41025 si = TYPE_MODE (index_type) == SImode;
41026 switch (TYPE_MODE (mem_vectype))
41028 case V2DFmode:
41029 if (TARGET_AVX512VL)
41030 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
41031 else
41032 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
41033 break;
41034 case V4DFmode:
41035 if (TARGET_AVX512VL)
41036 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
41037 else
41038 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
41039 break;
41040 case V2DImode:
41041 if (TARGET_AVX512VL)
41042 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
41043 else
41044 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
41045 break;
41046 case V4DImode:
41047 if (TARGET_AVX512VL)
41048 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
41049 else
41050 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
41051 break;
41052 case V4SFmode:
41053 if (TARGET_AVX512VL)
41054 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
41055 else
41056 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
41057 break;
41058 case V8SFmode:
41059 if (TARGET_AVX512VL)
41060 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
41061 else
41062 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
41063 break;
41064 case V4SImode:
41065 if (TARGET_AVX512VL)
41066 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
41067 else
41068 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
41069 break;
41070 case V8SImode:
41071 if (TARGET_AVX512VL)
41072 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
41073 else
41074 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
41075 break;
41076 case V8DFmode:
41077 if (TARGET_AVX512F)
41078 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
41079 else
41080 return NULL_TREE;
41081 break;
41082 case V8DImode:
41083 if (TARGET_AVX512F)
41084 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
41085 else
41086 return NULL_TREE;
41087 break;
41088 case V16SFmode:
41089 if (TARGET_AVX512F)
41090 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
41091 else
41092 return NULL_TREE;
41093 break;
41094 case V16SImode:
41095 if (TARGET_AVX512F)
41096 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
41097 else
41098 return NULL_TREE;
41099 break;
41100 default:
41101 return NULL_TREE;
41104 return ix86_get_builtin (code);
41107 /* Returns a code for a target-specific builtin that implements
41108 reciprocal of the function, or NULL_TREE if not available. */
41110 static tree
41111 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
41113 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
41114 && flag_finite_math_only && !flag_trapping_math
41115 && flag_unsafe_math_optimizations))
41116 return NULL_TREE;
41118 if (md_fn)
41119 /* Machine dependent builtins. */
41120 switch (fn)
41122 /* Vectorized version of sqrt to rsqrt conversion. */
41123 case IX86_BUILTIN_SQRTPS_NR:
41124 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
41126 case IX86_BUILTIN_SQRTPS_NR256:
41127 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
41129 default:
41130 return NULL_TREE;
41132 else
41133 /* Normal builtins. */
41134 switch (fn)
41136 /* Sqrt to rsqrt conversion. */
41137 case BUILT_IN_SQRTF:
41138 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
41140 default:
41141 return NULL_TREE;
41145 /* Helper for avx_vpermilps256_operand et al. This is also used by
41146 the expansion functions to turn the parallel back into a mask.
41147 The return value is 0 for no match and the imm8+1 for a match. */
41150 avx_vpermilp_parallel (rtx par, machine_mode mode)
41152 unsigned i, nelt = GET_MODE_NUNITS (mode);
41153 unsigned mask = 0;
41154 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
41156 if (XVECLEN (par, 0) != (int) nelt)
41157 return 0;
41159 /* Validate that all of the elements are constants, and not totally
41160 out of range. Copy the data into an integral array to make the
41161 subsequent checks easier. */
41162 for (i = 0; i < nelt; ++i)
41164 rtx er = XVECEXP (par, 0, i);
41165 unsigned HOST_WIDE_INT ei;
41167 if (!CONST_INT_P (er))
41168 return 0;
41169 ei = INTVAL (er);
41170 if (ei >= nelt)
41171 return 0;
41172 ipar[i] = ei;
41175 switch (mode)
41177 case V8DFmode:
41178 /* In the 512-bit DFmode case, we can only move elements within
41179 a 128-bit lane. First fill the second part of the mask,
41180 then fallthru. */
41181 for (i = 4; i < 6; ++i)
41183 if (ipar[i] < 4 || ipar[i] >= 6)
41184 return 0;
41185 mask |= (ipar[i] - 4) << i;
41187 for (i = 6; i < 8; ++i)
41189 if (ipar[i] < 6)
41190 return 0;
41191 mask |= (ipar[i] - 6) << i;
41193 /* FALLTHRU */
41195 case V4DFmode:
41196 /* In the 256-bit DFmode case, we can only move elements within
41197 a 128-bit lane. */
41198 for (i = 0; i < 2; ++i)
41200 if (ipar[i] >= 2)
41201 return 0;
41202 mask |= ipar[i] << i;
41204 for (i = 2; i < 4; ++i)
41206 if (ipar[i] < 2)
41207 return 0;
41208 mask |= (ipar[i] - 2) << i;
41210 break;
41212 case V16SFmode:
41213 /* In 512 bit SFmode case, permutation in the upper 256 bits
41214 must mirror the permutation in the lower 256-bits. */
41215 for (i = 0; i < 8; ++i)
41216 if (ipar[i] + 8 != ipar[i + 8])
41217 return 0;
41218 /* FALLTHRU */
41220 case V8SFmode:
41221 /* In 256 bit SFmode case, we have full freedom of
41222 movement within the low 128-bit lane, but the high 128-bit
41223 lane must mirror the exact same pattern. */
41224 for (i = 0; i < 4; ++i)
41225 if (ipar[i] + 4 != ipar[i + 4])
41226 return 0;
41227 nelt = 4;
41228 /* FALLTHRU */
41230 case V2DFmode:
41231 case V4SFmode:
41232 /* In the 128-bit case, we've full freedom in the placement of
41233 the elements from the source operand. */
41234 for (i = 0; i < nelt; ++i)
41235 mask |= ipar[i] << (i * (nelt / 2));
41236 break;
41238 default:
41239 gcc_unreachable ();
41242 /* Make sure success has a non-zero value by adding one. */
41243 return mask + 1;
41246 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
41247 the expansion functions to turn the parallel back into a mask.
41248 The return value is 0 for no match and the imm8+1 for a match. */
41251 avx_vperm2f128_parallel (rtx par, machine_mode mode)
41253 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
41254 unsigned mask = 0;
41255 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
41257 if (XVECLEN (par, 0) != (int) nelt)
41258 return 0;
41260 /* Validate that all of the elements are constants, and not totally
41261 out of range. Copy the data into an integral array to make the
41262 subsequent checks easier. */
41263 for (i = 0; i < nelt; ++i)
41265 rtx er = XVECEXP (par, 0, i);
41266 unsigned HOST_WIDE_INT ei;
41268 if (!CONST_INT_P (er))
41269 return 0;
41270 ei = INTVAL (er);
41271 if (ei >= 2 * nelt)
41272 return 0;
41273 ipar[i] = ei;
41276 /* Validate that the halves of the permute are halves. */
41277 for (i = 0; i < nelt2 - 1; ++i)
41278 if (ipar[i] + 1 != ipar[i + 1])
41279 return 0;
41280 for (i = nelt2; i < nelt - 1; ++i)
41281 if (ipar[i] + 1 != ipar[i + 1])
41282 return 0;
41284 /* Reconstruct the mask. */
41285 for (i = 0; i < 2; ++i)
41287 unsigned e = ipar[i * nelt2];
41288 if (e % nelt2)
41289 return 0;
41290 e /= nelt2;
41291 mask |= e << (i * 4);
41294 /* Make sure success has a non-zero value by adding one. */
41295 return mask + 1;
41298 /* Return a register priority for hard reg REGNO. */
41299 static int
41300 ix86_register_priority (int hard_regno)
41302 /* ebp and r13 as the base always wants a displacement, r12 as the
41303 base always wants an index. So discourage their usage in an
41304 address. */
41305 if (hard_regno == R12_REG || hard_regno == R13_REG)
41306 return 0;
41307 if (hard_regno == BP_REG)
41308 return 1;
41309 /* New x86-64 int registers result in bigger code size. Discourage
41310 them. */
41311 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
41312 return 2;
41313 /* New x86-64 SSE registers result in bigger code size. Discourage
41314 them. */
41315 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
41316 return 2;
41317 /* Usage of AX register results in smaller code. Prefer it. */
41318 if (hard_regno == AX_REG)
41319 return 4;
41320 return 3;
41323 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
41325 Put float CONST_DOUBLE in the constant pool instead of fp regs.
41326 QImode must go into class Q_REGS.
41327 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
41328 movdf to do mem-to-mem moves through integer regs. */
41330 static reg_class_t
41331 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
41333 machine_mode mode = GET_MODE (x);
41335 /* We're only allowed to return a subclass of CLASS. Many of the
41336 following checks fail for NO_REGS, so eliminate that early. */
41337 if (regclass == NO_REGS)
41338 return NO_REGS;
41340 /* All classes can load zeros. */
41341 if (x == CONST0_RTX (mode))
41342 return regclass;
41344 /* Force constants into memory if we are loading a (nonzero) constant into
41345 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
41346 instructions to load from a constant. */
41347 if (CONSTANT_P (x)
41348 && (MAYBE_MMX_CLASS_P (regclass)
41349 || MAYBE_SSE_CLASS_P (regclass)
41350 || MAYBE_MASK_CLASS_P (regclass)))
41351 return NO_REGS;
41353 /* Prefer SSE regs only, if we can use them for math. */
41354 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
41355 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
41357 /* Floating-point constants need more complex checks. */
41358 if (CONST_DOUBLE_P (x))
41360 /* General regs can load everything. */
41361 if (reg_class_subset_p (regclass, GENERAL_REGS))
41362 return regclass;
41364 /* Floats can load 0 and 1 plus some others. Note that we eliminated
41365 zero above. We only want to wind up preferring 80387 registers if
41366 we plan on doing computation with them. */
41367 if (TARGET_80387
41368 && standard_80387_constant_p (x) > 0)
41370 /* Limit class to non-sse. */
41371 if (regclass == FLOAT_SSE_REGS)
41372 return FLOAT_REGS;
41373 if (regclass == FP_TOP_SSE_REGS)
41374 return FP_TOP_REG;
41375 if (regclass == FP_SECOND_SSE_REGS)
41376 return FP_SECOND_REG;
41377 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
41378 return regclass;
41381 return NO_REGS;
41384 /* Generally when we see PLUS here, it's the function invariant
41385 (plus soft-fp const_int). Which can only be computed into general
41386 regs. */
41387 if (GET_CODE (x) == PLUS)
41388 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
41390 /* QImode constants are easy to load, but non-constant QImode data
41391 must go into Q_REGS. */
41392 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
41394 if (reg_class_subset_p (regclass, Q_REGS))
41395 return regclass;
41396 if (reg_class_subset_p (Q_REGS, regclass))
41397 return Q_REGS;
41398 return NO_REGS;
41401 return regclass;
41404 /* Discourage putting floating-point values in SSE registers unless
41405 SSE math is being used, and likewise for the 387 registers. */
41406 static reg_class_t
41407 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
41409 machine_mode mode = GET_MODE (x);
41411 /* Restrict the output reload class to the register bank that we are doing
41412 math on. If we would like not to return a subset of CLASS, reject this
41413 alternative: if reload cannot do this, it will still use its choice. */
41414 mode = GET_MODE (x);
41415 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
41416 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
41418 if (X87_FLOAT_MODE_P (mode))
41420 if (regclass == FP_TOP_SSE_REGS)
41421 return FP_TOP_REG;
41422 else if (regclass == FP_SECOND_SSE_REGS)
41423 return FP_SECOND_REG;
41424 else
41425 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
41428 return regclass;
41431 static reg_class_t
41432 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
41433 machine_mode mode, secondary_reload_info *sri)
41435 /* Double-word spills from general registers to non-offsettable memory
41436 references (zero-extended addresses) require special handling. */
41437 if (TARGET_64BIT
41438 && MEM_P (x)
41439 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
41440 && INTEGER_CLASS_P (rclass)
41441 && !offsettable_memref_p (x))
41443 sri->icode = (in_p
41444 ? CODE_FOR_reload_noff_load
41445 : CODE_FOR_reload_noff_store);
41446 /* Add the cost of moving address to a temporary. */
41447 sri->extra_cost = 1;
41449 return NO_REGS;
41452 /* QImode spills from non-QI registers require
41453 intermediate register on 32bit targets. */
41454 if (mode == QImode
41455 && (MAYBE_MASK_CLASS_P (rclass)
41456 || (!TARGET_64BIT && !in_p
41457 && INTEGER_CLASS_P (rclass)
41458 && MAYBE_NON_Q_CLASS_P (rclass))))
41460 int regno;
41462 if (REG_P (x))
41463 regno = REGNO (x);
41464 else
41465 regno = -1;
41467 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
41468 regno = true_regnum (x);
41470 /* Return Q_REGS if the operand is in memory. */
41471 if (regno == -1)
41472 return Q_REGS;
41475 /* This condition handles corner case where an expression involving
41476 pointers gets vectorized. We're trying to use the address of a
41477 stack slot as a vector initializer.
41479 (set (reg:V2DI 74 [ vect_cst_.2 ])
41480 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
41482 Eventually frame gets turned into sp+offset like this:
41484 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41485 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41486 (const_int 392 [0x188]))))
41488 That later gets turned into:
41490 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41491 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41492 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
41494 We'll have the following reload recorded:
41496 Reload 0: reload_in (DI) =
41497 (plus:DI (reg/f:DI 7 sp)
41498 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
41499 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41500 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
41501 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
41502 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41503 reload_reg_rtx: (reg:V2DI 22 xmm1)
41505 Which isn't going to work since SSE instructions can't handle scalar
41506 additions. Returning GENERAL_REGS forces the addition into integer
41507 register and reload can handle subsequent reloads without problems. */
41509 if (in_p && GET_CODE (x) == PLUS
41510 && SSE_CLASS_P (rclass)
41511 && SCALAR_INT_MODE_P (mode))
41512 return GENERAL_REGS;
41514 return NO_REGS;
41517 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
41519 static bool
41520 ix86_class_likely_spilled_p (reg_class_t rclass)
41522 switch (rclass)
41524 case AREG:
41525 case DREG:
41526 case CREG:
41527 case BREG:
41528 case AD_REGS:
41529 case SIREG:
41530 case DIREG:
41531 case SSE_FIRST_REG:
41532 case FP_TOP_REG:
41533 case FP_SECOND_REG:
41534 case BND_REGS:
41535 return true;
41537 default:
41538 break;
41541 return false;
41544 /* If we are copying between general and FP registers, we need a memory
41545 location. The same is true for SSE and MMX registers.
41547 To optimize register_move_cost performance, allow inline variant.
41549 The macro can't work reliably when one of the CLASSES is class containing
41550 registers from multiple units (SSE, MMX, integer). We avoid this by never
41551 combining those units in single alternative in the machine description.
41552 Ensure that this constraint holds to avoid unexpected surprises.
41554 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
41555 enforce these sanity checks. */
41557 static inline bool
41558 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41559 machine_mode mode, int strict)
41561 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
41562 return false;
41563 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
41564 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
41565 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
41566 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
41567 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
41568 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
41570 gcc_assert (!strict || lra_in_progress);
41571 return true;
41574 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
41575 return true;
41577 /* Between mask and general, we have moves no larger than word size. */
41578 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
41579 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
41580 return true;
41582 /* ??? This is a lie. We do have moves between mmx/general, and for
41583 mmx/sse2. But by saying we need secondary memory we discourage the
41584 register allocator from using the mmx registers unless needed. */
41585 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
41586 return true;
41588 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41590 /* SSE1 doesn't have any direct moves from other classes. */
41591 if (!TARGET_SSE2)
41592 return true;
41594 /* If the target says that inter-unit moves are more expensive
41595 than moving through memory, then don't generate them. */
41596 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
41597 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
41598 return true;
41600 /* Between SSE and general, we have moves no larger than word size. */
41601 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41602 return true;
41605 return false;
41608 bool
41609 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41610 machine_mode mode, int strict)
41612 return inline_secondary_memory_needed (class1, class2, mode, strict);
41615 /* Implement the TARGET_CLASS_MAX_NREGS hook.
41617 On the 80386, this is the size of MODE in words,
41618 except in the FP regs, where a single reg is always enough. */
41620 static unsigned char
41621 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
41623 if (MAYBE_INTEGER_CLASS_P (rclass))
41625 if (mode == XFmode)
41626 return (TARGET_64BIT ? 2 : 3);
41627 else if (mode == XCmode)
41628 return (TARGET_64BIT ? 4 : 6);
41629 else
41630 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
41632 else
41634 if (COMPLEX_MODE_P (mode))
41635 return 2;
41636 else
41637 return 1;
41641 /* Return true if the registers in CLASS cannot represent the change from
41642 modes FROM to TO. */
41644 bool
41645 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
41646 enum reg_class regclass)
41648 if (from == to)
41649 return false;
41651 /* x87 registers can't do subreg at all, as all values are reformatted
41652 to extended precision. */
41653 if (MAYBE_FLOAT_CLASS_P (regclass))
41654 return true;
41656 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
41658 /* Vector registers do not support QI or HImode loads. If we don't
41659 disallow a change to these modes, reload will assume it's ok to
41660 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
41661 the vec_dupv4hi pattern. */
41662 if (GET_MODE_SIZE (from) < 4)
41663 return true;
41666 return false;
41669 /* Return the cost of moving data of mode M between a
41670 register and memory. A value of 2 is the default; this cost is
41671 relative to those in `REGISTER_MOVE_COST'.
41673 This function is used extensively by register_move_cost that is used to
41674 build tables at startup. Make it inline in this case.
41675 When IN is 2, return maximum of in and out move cost.
41677 If moving between registers and memory is more expensive than
41678 between two registers, you should define this macro to express the
41679 relative cost.
41681 Model also increased moving costs of QImode registers in non
41682 Q_REGS classes.
41684 static inline int
41685 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
41686 int in)
41688 int cost;
41689 if (FLOAT_CLASS_P (regclass))
41691 int index;
41692 switch (mode)
41694 case SFmode:
41695 index = 0;
41696 break;
41697 case DFmode:
41698 index = 1;
41699 break;
41700 case XFmode:
41701 index = 2;
41702 break;
41703 default:
41704 return 100;
41706 if (in == 2)
41707 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
41708 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
41710 if (SSE_CLASS_P (regclass))
41712 int index;
41713 switch (GET_MODE_SIZE (mode))
41715 case 4:
41716 index = 0;
41717 break;
41718 case 8:
41719 index = 1;
41720 break;
41721 case 16:
41722 index = 2;
41723 break;
41724 default:
41725 return 100;
41727 if (in == 2)
41728 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
41729 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
41731 if (MMX_CLASS_P (regclass))
41733 int index;
41734 switch (GET_MODE_SIZE (mode))
41736 case 4:
41737 index = 0;
41738 break;
41739 case 8:
41740 index = 1;
41741 break;
41742 default:
41743 return 100;
41745 if (in)
41746 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
41747 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
41749 switch (GET_MODE_SIZE (mode))
41751 case 1:
41752 if (Q_CLASS_P (regclass) || TARGET_64BIT)
41754 if (!in)
41755 return ix86_cost->int_store[0];
41756 if (TARGET_PARTIAL_REG_DEPENDENCY
41757 && optimize_function_for_speed_p (cfun))
41758 cost = ix86_cost->movzbl_load;
41759 else
41760 cost = ix86_cost->int_load[0];
41761 if (in == 2)
41762 return MAX (cost, ix86_cost->int_store[0]);
41763 return cost;
41765 else
41767 if (in == 2)
41768 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
41769 if (in)
41770 return ix86_cost->movzbl_load;
41771 else
41772 return ix86_cost->int_store[0] + 4;
41774 break;
41775 case 2:
41776 if (in == 2)
41777 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
41778 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
41779 default:
41780 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
41781 if (mode == TFmode)
41782 mode = XFmode;
41783 if (in == 2)
41784 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
41785 else if (in)
41786 cost = ix86_cost->int_load[2];
41787 else
41788 cost = ix86_cost->int_store[2];
41789 return (cost * (((int) GET_MODE_SIZE (mode)
41790 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
41794 static int
41795 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
41796 bool in)
41798 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
41802 /* Return the cost of moving data from a register in class CLASS1 to
41803 one in class CLASS2.
41805 It is not required that the cost always equal 2 when FROM is the same as TO;
41806 on some machines it is expensive to move between registers if they are not
41807 general registers. */
41809 static int
41810 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
41811 reg_class_t class2_i)
41813 enum reg_class class1 = (enum reg_class) class1_i;
41814 enum reg_class class2 = (enum reg_class) class2_i;
41816 /* In case we require secondary memory, compute cost of the store followed
41817 by load. In order to avoid bad register allocation choices, we need
41818 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
41820 if (inline_secondary_memory_needed (class1, class2, mode, 0))
41822 int cost = 1;
41824 cost += inline_memory_move_cost (mode, class1, 2);
41825 cost += inline_memory_move_cost (mode, class2, 2);
41827 /* In case of copying from general_purpose_register we may emit multiple
41828 stores followed by single load causing memory size mismatch stall.
41829 Count this as arbitrarily high cost of 20. */
41830 if (targetm.class_max_nregs (class1, mode)
41831 > targetm.class_max_nregs (class2, mode))
41832 cost += 20;
41834 /* In the case of FP/MMX moves, the registers actually overlap, and we
41835 have to switch modes in order to treat them differently. */
41836 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
41837 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
41838 cost += 20;
41840 return cost;
41843 /* Moves between SSE/MMX and integer unit are expensive. */
41844 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
41845 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41847 /* ??? By keeping returned value relatively high, we limit the number
41848 of moves between integer and MMX/SSE registers for all targets.
41849 Additionally, high value prevents problem with x86_modes_tieable_p(),
41850 where integer modes in MMX/SSE registers are not tieable
41851 because of missing QImode and HImode moves to, from or between
41852 MMX/SSE registers. */
41853 return MAX (8, ix86_cost->mmxsse_to_integer);
41855 if (MAYBE_FLOAT_CLASS_P (class1))
41856 return ix86_cost->fp_move;
41857 if (MAYBE_SSE_CLASS_P (class1))
41858 return ix86_cost->sse_move;
41859 if (MAYBE_MMX_CLASS_P (class1))
41860 return ix86_cost->mmx_move;
41861 return 2;
41864 /* Return TRUE if hard register REGNO can hold a value of machine-mode
41865 MODE. */
41867 bool
41868 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
41870 /* Flags and only flags can only hold CCmode values. */
41871 if (CC_REGNO_P (regno))
41872 return GET_MODE_CLASS (mode) == MODE_CC;
41873 if (GET_MODE_CLASS (mode) == MODE_CC
41874 || GET_MODE_CLASS (mode) == MODE_RANDOM
41875 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
41876 return false;
41877 if (STACK_REGNO_P (regno))
41878 return VALID_FP_MODE_P (mode);
41879 if (MASK_REGNO_P (regno))
41880 return (VALID_MASK_REG_MODE (mode)
41881 || (TARGET_AVX512BW
41882 && VALID_MASK_AVX512BW_MODE (mode)));
41883 if (BND_REGNO_P (regno))
41884 return VALID_BND_REG_MODE (mode);
41885 if (SSE_REGNO_P (regno))
41887 /* We implement the move patterns for all vector modes into and
41888 out of SSE registers, even when no operation instructions
41889 are available. */
41891 /* For AVX-512 we allow, regardless of regno:
41892 - XI mode
41893 - any of 512-bit wide vector mode
41894 - any scalar mode. */
41895 if (TARGET_AVX512F
41896 && (mode == XImode
41897 || VALID_AVX512F_REG_MODE (mode)
41898 || VALID_AVX512F_SCALAR_MODE (mode)))
41899 return true;
41901 /* TODO check for QI/HI scalars. */
41902 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
41903 if (TARGET_AVX512VL
41904 && (mode == OImode
41905 || mode == TImode
41906 || VALID_AVX256_REG_MODE (mode)
41907 || VALID_AVX512VL_128_REG_MODE (mode)))
41908 return true;
41910 /* xmm16-xmm31 are only available for AVX-512. */
41911 if (EXT_REX_SSE_REGNO_P (regno))
41912 return false;
41914 /* OImode and AVX modes are available only when AVX is enabled. */
41915 return ((TARGET_AVX
41916 && VALID_AVX256_REG_OR_OI_MODE (mode))
41917 || VALID_SSE_REG_MODE (mode)
41918 || VALID_SSE2_REG_MODE (mode)
41919 || VALID_MMX_REG_MODE (mode)
41920 || VALID_MMX_REG_MODE_3DNOW (mode));
41922 if (MMX_REGNO_P (regno))
41924 /* We implement the move patterns for 3DNOW modes even in MMX mode,
41925 so if the register is available at all, then we can move data of
41926 the given mode into or out of it. */
41927 return (VALID_MMX_REG_MODE (mode)
41928 || VALID_MMX_REG_MODE_3DNOW (mode));
41931 if (mode == QImode)
41933 /* Take care for QImode values - they can be in non-QI regs,
41934 but then they do cause partial register stalls. */
41935 if (ANY_QI_REGNO_P (regno))
41936 return true;
41937 if (!TARGET_PARTIAL_REG_STALL)
41938 return true;
41939 /* LRA checks if the hard register is OK for the given mode.
41940 QImode values can live in non-QI regs, so we allow all
41941 registers here. */
41942 if (lra_in_progress)
41943 return true;
41944 return !can_create_pseudo_p ();
41946 /* We handle both integer and floats in the general purpose registers. */
41947 else if (VALID_INT_MODE_P (mode))
41948 return true;
41949 else if (VALID_FP_MODE_P (mode))
41950 return true;
41951 else if (VALID_DFP_MODE_P (mode))
41952 return true;
41953 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
41954 on to use that value in smaller contexts, this can easily force a
41955 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
41956 supporting DImode, allow it. */
41957 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
41958 return true;
41960 return false;
41963 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
41964 tieable integer mode. */
41966 static bool
41967 ix86_tieable_integer_mode_p (machine_mode mode)
41969 switch (mode)
41971 case HImode:
41972 case SImode:
41973 return true;
41975 case QImode:
41976 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
41978 case DImode:
41979 return TARGET_64BIT;
41981 default:
41982 return false;
41986 /* Return true if MODE1 is accessible in a register that can hold MODE2
41987 without copying. That is, all register classes that can hold MODE2
41988 can also hold MODE1. */
41990 bool
41991 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
41993 if (mode1 == mode2)
41994 return true;
41996 if (ix86_tieable_integer_mode_p (mode1)
41997 && ix86_tieable_integer_mode_p (mode2))
41998 return true;
42000 /* MODE2 being XFmode implies fp stack or general regs, which means we
42001 can tie any smaller floating point modes to it. Note that we do not
42002 tie this with TFmode. */
42003 if (mode2 == XFmode)
42004 return mode1 == SFmode || mode1 == DFmode;
42006 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
42007 that we can tie it with SFmode. */
42008 if (mode2 == DFmode)
42009 return mode1 == SFmode;
42011 /* If MODE2 is only appropriate for an SSE register, then tie with
42012 any other mode acceptable to SSE registers. */
42013 if (GET_MODE_SIZE (mode2) == 32
42014 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
42015 return (GET_MODE_SIZE (mode1) == 32
42016 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
42017 if (GET_MODE_SIZE (mode2) == 16
42018 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
42019 return (GET_MODE_SIZE (mode1) == 16
42020 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
42022 /* If MODE2 is appropriate for an MMX register, then tie
42023 with any other mode acceptable to MMX registers. */
42024 if (GET_MODE_SIZE (mode2) == 8
42025 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
42026 return (GET_MODE_SIZE (mode1) == 8
42027 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
42029 return false;
42032 /* Return the cost of moving between two registers of mode MODE. */
42034 static int
42035 ix86_set_reg_reg_cost (machine_mode mode)
42037 unsigned int units = UNITS_PER_WORD;
42039 switch (GET_MODE_CLASS (mode))
42041 default:
42042 break;
42044 case MODE_CC:
42045 units = GET_MODE_SIZE (CCmode);
42046 break;
42048 case MODE_FLOAT:
42049 if ((TARGET_SSE && mode == TFmode)
42050 || (TARGET_80387 && mode == XFmode)
42051 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
42052 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
42053 units = GET_MODE_SIZE (mode);
42054 break;
42056 case MODE_COMPLEX_FLOAT:
42057 if ((TARGET_SSE && mode == TCmode)
42058 || (TARGET_80387 && mode == XCmode)
42059 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
42060 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
42061 units = GET_MODE_SIZE (mode);
42062 break;
42064 case MODE_VECTOR_INT:
42065 case MODE_VECTOR_FLOAT:
42066 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
42067 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
42068 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
42069 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
42070 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
42071 units = GET_MODE_SIZE (mode);
42074 /* Return the cost of moving between two registers of mode MODE,
42075 assuming that the move will be in pieces of at most UNITS bytes. */
42076 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
42079 /* Compute a (partial) cost for rtx X. Return true if the complete
42080 cost has been computed, and false if subexpressions should be
42081 scanned. In either case, *TOTAL contains the cost result. */
42083 static bool
42084 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
42085 bool speed)
42087 rtx mask;
42088 enum rtx_code code = (enum rtx_code) code_i;
42089 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
42090 machine_mode mode = GET_MODE (x);
42091 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
42093 switch (code)
42095 case SET:
42096 if (register_operand (SET_DEST (x), VOIDmode)
42097 && reg_or_0_operand (SET_SRC (x), VOIDmode))
42099 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
42100 return true;
42102 return false;
42104 case CONST_INT:
42105 case CONST:
42106 case LABEL_REF:
42107 case SYMBOL_REF:
42108 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
42109 *total = 3;
42110 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
42111 *total = 2;
42112 else if (flag_pic && SYMBOLIC_CONST (x)
42113 && !(TARGET_64BIT
42114 && (GET_CODE (x) == LABEL_REF
42115 || (GET_CODE (x) == SYMBOL_REF
42116 && SYMBOL_REF_LOCAL_P (x))))
42117 /* Use 0 cost for CONST to improve its propagation. */
42118 && (TARGET_64BIT || GET_CODE (x) != CONST))
42119 *total = 1;
42120 else
42121 *total = 0;
42122 return true;
42124 case CONST_WIDE_INT:
42125 *total = 0;
42126 return true;
42128 case CONST_DOUBLE:
42129 switch (standard_80387_constant_p (x))
42131 case 1: /* 0.0 */
42132 *total = 1;
42133 return true;
42134 default: /* Other constants */
42135 *total = 2;
42136 return true;
42137 case 0:
42138 case -1:
42139 break;
42141 if (SSE_FLOAT_MODE_P (mode))
42143 case CONST_VECTOR:
42144 switch (standard_sse_constant_p (x))
42146 case 0:
42147 break;
42148 case 1: /* 0: xor eliminates false dependency */
42149 *total = 0;
42150 return true;
42151 default: /* -1: cmp contains false dependency */
42152 *total = 1;
42153 return true;
42156 /* Fall back to (MEM (SYMBOL_REF)), since that's where
42157 it'll probably end up. Add a penalty for size. */
42158 *total = (COSTS_N_INSNS (1)
42159 + (flag_pic != 0 && !TARGET_64BIT)
42160 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
42161 return true;
42163 case ZERO_EXTEND:
42164 /* The zero extensions is often completely free on x86_64, so make
42165 it as cheap as possible. */
42166 if (TARGET_64BIT && mode == DImode
42167 && GET_MODE (XEXP (x, 0)) == SImode)
42168 *total = 1;
42169 else if (TARGET_ZERO_EXTEND_WITH_AND)
42170 *total = cost->add;
42171 else
42172 *total = cost->movzx;
42173 return false;
42175 case SIGN_EXTEND:
42176 *total = cost->movsx;
42177 return false;
42179 case ASHIFT:
42180 if (SCALAR_INT_MODE_P (mode)
42181 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
42182 && CONST_INT_P (XEXP (x, 1)))
42184 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42185 if (value == 1)
42187 *total = cost->add;
42188 return false;
42190 if ((value == 2 || value == 3)
42191 && cost->lea <= cost->shift_const)
42193 *total = cost->lea;
42194 return false;
42197 /* FALLTHRU */
42199 case ROTATE:
42200 case ASHIFTRT:
42201 case LSHIFTRT:
42202 case ROTATERT:
42203 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42205 /* ??? Should be SSE vector operation cost. */
42206 /* At least for published AMD latencies, this really is the same
42207 as the latency for a simple fpu operation like fabs. */
42208 /* V*QImode is emulated with 1-11 insns. */
42209 if (mode == V16QImode || mode == V32QImode)
42211 int count = 11;
42212 if (TARGET_XOP && mode == V16QImode)
42214 /* For XOP we use vpshab, which requires a broadcast of the
42215 value to the variable shift insn. For constants this
42216 means a V16Q const in mem; even when we can perform the
42217 shift with one insn set the cost to prefer paddb. */
42218 if (CONSTANT_P (XEXP (x, 1)))
42220 *total = (cost->fabs
42221 + rtx_cost (XEXP (x, 0), code, 0, speed)
42222 + (speed ? 2 : COSTS_N_BYTES (16)));
42223 return true;
42225 count = 3;
42227 else if (TARGET_SSSE3)
42228 count = 7;
42229 *total = cost->fabs * count;
42231 else
42232 *total = cost->fabs;
42234 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42236 if (CONST_INT_P (XEXP (x, 1)))
42238 if (INTVAL (XEXP (x, 1)) > 32)
42239 *total = cost->shift_const + COSTS_N_INSNS (2);
42240 else
42241 *total = cost->shift_const * 2;
42243 else
42245 if (GET_CODE (XEXP (x, 1)) == AND)
42246 *total = cost->shift_var * 2;
42247 else
42248 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
42251 else
42253 if (CONST_INT_P (XEXP (x, 1)))
42254 *total = cost->shift_const;
42255 else if (GET_CODE (XEXP (x, 1)) == SUBREG
42256 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
42258 /* Return the cost after shift-and truncation. */
42259 *total = cost->shift_var;
42260 return true;
42262 else
42263 *total = cost->shift_var;
42265 return false;
42267 case FMA:
42269 rtx sub;
42271 gcc_assert (FLOAT_MODE_P (mode));
42272 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
42274 /* ??? SSE scalar/vector cost should be used here. */
42275 /* ??? Bald assumption that fma has the same cost as fmul. */
42276 *total = cost->fmul;
42277 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
42279 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
42280 sub = XEXP (x, 0);
42281 if (GET_CODE (sub) == NEG)
42282 sub = XEXP (sub, 0);
42283 *total += rtx_cost (sub, FMA, 0, speed);
42285 sub = XEXP (x, 2);
42286 if (GET_CODE (sub) == NEG)
42287 sub = XEXP (sub, 0);
42288 *total += rtx_cost (sub, FMA, 2, speed);
42289 return true;
42292 case MULT:
42293 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42295 /* ??? SSE scalar cost should be used here. */
42296 *total = cost->fmul;
42297 return false;
42299 else if (X87_FLOAT_MODE_P (mode))
42301 *total = cost->fmul;
42302 return false;
42304 else if (FLOAT_MODE_P (mode))
42306 /* ??? SSE vector cost should be used here. */
42307 *total = cost->fmul;
42308 return false;
42310 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42312 /* V*QImode is emulated with 7-13 insns. */
42313 if (mode == V16QImode || mode == V32QImode)
42315 int extra = 11;
42316 if (TARGET_XOP && mode == V16QImode)
42317 extra = 5;
42318 else if (TARGET_SSSE3)
42319 extra = 6;
42320 *total = cost->fmul * 2 + cost->fabs * extra;
42322 /* V*DImode is emulated with 5-8 insns. */
42323 else if (mode == V2DImode || mode == V4DImode)
42325 if (TARGET_XOP && mode == V2DImode)
42326 *total = cost->fmul * 2 + cost->fabs * 3;
42327 else
42328 *total = cost->fmul * 3 + cost->fabs * 5;
42330 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
42331 insns, including two PMULUDQ. */
42332 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
42333 *total = cost->fmul * 2 + cost->fabs * 5;
42334 else
42335 *total = cost->fmul;
42336 return false;
42338 else
42340 rtx op0 = XEXP (x, 0);
42341 rtx op1 = XEXP (x, 1);
42342 int nbits;
42343 if (CONST_INT_P (XEXP (x, 1)))
42345 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42346 for (nbits = 0; value != 0; value &= value - 1)
42347 nbits++;
42349 else
42350 /* This is arbitrary. */
42351 nbits = 7;
42353 /* Compute costs correctly for widening multiplication. */
42354 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
42355 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
42356 == GET_MODE_SIZE (mode))
42358 int is_mulwiden = 0;
42359 machine_mode inner_mode = GET_MODE (op0);
42361 if (GET_CODE (op0) == GET_CODE (op1))
42362 is_mulwiden = 1, op1 = XEXP (op1, 0);
42363 else if (CONST_INT_P (op1))
42365 if (GET_CODE (op0) == SIGN_EXTEND)
42366 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
42367 == INTVAL (op1);
42368 else
42369 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
42372 if (is_mulwiden)
42373 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
42376 *total = (cost->mult_init[MODE_INDEX (mode)]
42377 + nbits * cost->mult_bit
42378 + rtx_cost (op0, outer_code, opno, speed)
42379 + rtx_cost (op1, outer_code, opno, speed));
42381 return true;
42384 case DIV:
42385 case UDIV:
42386 case MOD:
42387 case UMOD:
42388 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42389 /* ??? SSE cost should be used here. */
42390 *total = cost->fdiv;
42391 else if (X87_FLOAT_MODE_P (mode))
42392 *total = cost->fdiv;
42393 else if (FLOAT_MODE_P (mode))
42394 /* ??? SSE vector cost should be used here. */
42395 *total = cost->fdiv;
42396 else
42397 *total = cost->divide[MODE_INDEX (mode)];
42398 return false;
42400 case PLUS:
42401 if (GET_MODE_CLASS (mode) == MODE_INT
42402 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
42404 if (GET_CODE (XEXP (x, 0)) == PLUS
42405 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
42406 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
42407 && CONSTANT_P (XEXP (x, 1)))
42409 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
42410 if (val == 2 || val == 4 || val == 8)
42412 *total = cost->lea;
42413 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42414 outer_code, opno, speed);
42415 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
42416 outer_code, opno, speed);
42417 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42418 return true;
42421 else if (GET_CODE (XEXP (x, 0)) == MULT
42422 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
42424 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
42425 if (val == 2 || val == 4 || val == 8)
42427 *total = cost->lea;
42428 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42429 outer_code, opno, speed);
42430 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42431 return true;
42434 else if (GET_CODE (XEXP (x, 0)) == PLUS)
42436 *total = cost->lea;
42437 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42438 outer_code, opno, speed);
42439 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42440 outer_code, opno, speed);
42441 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42442 return true;
42445 /* FALLTHRU */
42447 case MINUS:
42448 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42450 /* ??? SSE cost should be used here. */
42451 *total = cost->fadd;
42452 return false;
42454 else if (X87_FLOAT_MODE_P (mode))
42456 *total = cost->fadd;
42457 return false;
42459 else if (FLOAT_MODE_P (mode))
42461 /* ??? SSE vector cost should be used here. */
42462 *total = cost->fadd;
42463 return false;
42465 /* FALLTHRU */
42467 case AND:
42468 case IOR:
42469 case XOR:
42470 if (GET_MODE_CLASS (mode) == MODE_INT
42471 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42473 *total = (cost->add * 2
42474 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
42475 << (GET_MODE (XEXP (x, 0)) != DImode))
42476 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
42477 << (GET_MODE (XEXP (x, 1)) != DImode)));
42478 return true;
42480 /* FALLTHRU */
42482 case NEG:
42483 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42485 /* ??? SSE cost should be used here. */
42486 *total = cost->fchs;
42487 return false;
42489 else if (X87_FLOAT_MODE_P (mode))
42491 *total = cost->fchs;
42492 return false;
42494 else if (FLOAT_MODE_P (mode))
42496 /* ??? SSE vector cost should be used here. */
42497 *total = cost->fchs;
42498 return false;
42500 /* FALLTHRU */
42502 case NOT:
42503 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42505 /* ??? Should be SSE vector operation cost. */
42506 /* At least for published AMD latencies, this really is the same
42507 as the latency for a simple fpu operation like fabs. */
42508 *total = cost->fabs;
42510 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42511 *total = cost->add * 2;
42512 else
42513 *total = cost->add;
42514 return false;
42516 case COMPARE:
42517 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
42518 && XEXP (XEXP (x, 0), 1) == const1_rtx
42519 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
42520 && XEXP (x, 1) == const0_rtx)
42522 /* This kind of construct is implemented using test[bwl].
42523 Treat it as if we had an AND. */
42524 *total = (cost->add
42525 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
42526 + rtx_cost (const1_rtx, outer_code, opno, speed));
42527 return true;
42530 /* The embedded comparison operand is completely free. */
42531 if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
42532 && XEXP (x, 1) == const0_rtx)
42533 *total = 0;
42535 return false;
42537 case FLOAT_EXTEND:
42538 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
42539 *total = 0;
42540 return false;
42542 case ABS:
42543 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42544 /* ??? SSE cost should be used here. */
42545 *total = cost->fabs;
42546 else if (X87_FLOAT_MODE_P (mode))
42547 *total = cost->fabs;
42548 else if (FLOAT_MODE_P (mode))
42549 /* ??? SSE vector cost should be used here. */
42550 *total = cost->fabs;
42551 return false;
42553 case SQRT:
42554 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42555 /* ??? SSE cost should be used here. */
42556 *total = cost->fsqrt;
42557 else if (X87_FLOAT_MODE_P (mode))
42558 *total = cost->fsqrt;
42559 else if (FLOAT_MODE_P (mode))
42560 /* ??? SSE vector cost should be used here. */
42561 *total = cost->fsqrt;
42562 return false;
42564 case UNSPEC:
42565 if (XINT (x, 1) == UNSPEC_TP)
42566 *total = 0;
42567 return false;
42569 case VEC_SELECT:
42570 case VEC_CONCAT:
42571 case VEC_DUPLICATE:
42572 /* ??? Assume all of these vector manipulation patterns are
42573 recognizable. In which case they all pretty much have the
42574 same cost. */
42575 *total = cost->fabs;
42576 return true;
42577 case VEC_MERGE:
42578 mask = XEXP (x, 2);
42579 /* This is masked instruction, assume the same cost,
42580 as nonmasked variant. */
42581 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
42582 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
42583 else
42584 *total = cost->fabs;
42585 return true;
42587 default:
42588 return false;
42592 #if TARGET_MACHO
42594 static int current_machopic_label_num;
42596 /* Given a symbol name and its associated stub, write out the
42597 definition of the stub. */
42599 void
42600 machopic_output_stub (FILE *file, const char *symb, const char *stub)
42602 unsigned int length;
42603 char *binder_name, *symbol_name, lazy_ptr_name[32];
42604 int label = ++current_machopic_label_num;
42606 /* For 64-bit we shouldn't get here. */
42607 gcc_assert (!TARGET_64BIT);
42609 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
42610 symb = targetm.strip_name_encoding (symb);
42612 length = strlen (stub);
42613 binder_name = XALLOCAVEC (char, length + 32);
42614 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
42616 length = strlen (symb);
42617 symbol_name = XALLOCAVEC (char, length + 32);
42618 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
42620 sprintf (lazy_ptr_name, "L%d$lz", label);
42622 if (MACHOPIC_ATT_STUB)
42623 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
42624 else if (MACHOPIC_PURE)
42625 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
42626 else
42627 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
42629 fprintf (file, "%s:\n", stub);
42630 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42632 if (MACHOPIC_ATT_STUB)
42634 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
42636 else if (MACHOPIC_PURE)
42638 /* PIC stub. */
42639 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42640 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
42641 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
42642 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
42643 label, lazy_ptr_name, label);
42644 fprintf (file, "\tjmp\t*%%ecx\n");
42646 else
42647 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
42649 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
42650 it needs no stub-binding-helper. */
42651 if (MACHOPIC_ATT_STUB)
42652 return;
42654 fprintf (file, "%s:\n", binder_name);
42656 if (MACHOPIC_PURE)
42658 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
42659 fprintf (file, "\tpushl\t%%ecx\n");
42661 else
42662 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
42664 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
42666 /* N.B. Keep the correspondence of these
42667 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
42668 old-pic/new-pic/non-pic stubs; altering this will break
42669 compatibility with existing dylibs. */
42670 if (MACHOPIC_PURE)
42672 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42673 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
42675 else
42676 /* 16-byte -mdynamic-no-pic stub. */
42677 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
42679 fprintf (file, "%s:\n", lazy_ptr_name);
42680 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42681 fprintf (file, ASM_LONG "%s\n", binder_name);
42683 #endif /* TARGET_MACHO */
42685 /* Order the registers for register allocator. */
42687 void
42688 x86_order_regs_for_local_alloc (void)
42690 int pos = 0;
42691 int i;
42693 /* First allocate the local general purpose registers. */
42694 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42695 if (GENERAL_REGNO_P (i) && call_used_regs[i])
42696 reg_alloc_order [pos++] = i;
42698 /* Global general purpose registers. */
42699 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42700 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
42701 reg_alloc_order [pos++] = i;
42703 /* x87 registers come first in case we are doing FP math
42704 using them. */
42705 if (!TARGET_SSE_MATH)
42706 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42707 reg_alloc_order [pos++] = i;
42709 /* SSE registers. */
42710 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
42711 reg_alloc_order [pos++] = i;
42712 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
42713 reg_alloc_order [pos++] = i;
42715 /* Extended REX SSE registers. */
42716 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
42717 reg_alloc_order [pos++] = i;
42719 /* Mask register. */
42720 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
42721 reg_alloc_order [pos++] = i;
42723 /* MPX bound registers. */
42724 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
42725 reg_alloc_order [pos++] = i;
42727 /* x87 registers. */
42728 if (TARGET_SSE_MATH)
42729 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42730 reg_alloc_order [pos++] = i;
42732 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
42733 reg_alloc_order [pos++] = i;
42735 /* Initialize the rest of array as we do not allocate some registers
42736 at all. */
42737 while (pos < FIRST_PSEUDO_REGISTER)
42738 reg_alloc_order [pos++] = 0;
42741 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
42742 in struct attribute_spec handler. */
42743 static tree
42744 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
42745 tree args,
42746 int,
42747 bool *no_add_attrs)
42749 if (TREE_CODE (*node) != FUNCTION_TYPE
42750 && TREE_CODE (*node) != METHOD_TYPE
42751 && TREE_CODE (*node) != FIELD_DECL
42752 && TREE_CODE (*node) != TYPE_DECL)
42754 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42755 name);
42756 *no_add_attrs = true;
42757 return NULL_TREE;
42759 if (TARGET_64BIT)
42761 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
42762 name);
42763 *no_add_attrs = true;
42764 return NULL_TREE;
42766 if (is_attribute_p ("callee_pop_aggregate_return", name))
42768 tree cst;
42770 cst = TREE_VALUE (args);
42771 if (TREE_CODE (cst) != INTEGER_CST)
42773 warning (OPT_Wattributes,
42774 "%qE attribute requires an integer constant argument",
42775 name);
42776 *no_add_attrs = true;
42778 else if (compare_tree_int (cst, 0) != 0
42779 && compare_tree_int (cst, 1) != 0)
42781 warning (OPT_Wattributes,
42782 "argument to %qE attribute is neither zero, nor one",
42783 name);
42784 *no_add_attrs = true;
42787 return NULL_TREE;
42790 return NULL_TREE;
42793 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
42794 struct attribute_spec.handler. */
42795 static tree
42796 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
42797 bool *no_add_attrs)
42799 if (TREE_CODE (*node) != FUNCTION_TYPE
42800 && TREE_CODE (*node) != METHOD_TYPE
42801 && TREE_CODE (*node) != FIELD_DECL
42802 && TREE_CODE (*node) != TYPE_DECL)
42804 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42805 name);
42806 *no_add_attrs = true;
42807 return NULL_TREE;
42810 /* Can combine regparm with all attributes but fastcall. */
42811 if (is_attribute_p ("ms_abi", name))
42813 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
42815 error ("ms_abi and sysv_abi attributes are not compatible");
42818 return NULL_TREE;
42820 else if (is_attribute_p ("sysv_abi", name))
42822 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
42824 error ("ms_abi and sysv_abi attributes are not compatible");
42827 return NULL_TREE;
42830 return NULL_TREE;
42833 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
42834 struct attribute_spec.handler. */
42835 static tree
42836 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
42837 bool *no_add_attrs)
42839 tree *type = NULL;
42840 if (DECL_P (*node))
42842 if (TREE_CODE (*node) == TYPE_DECL)
42843 type = &TREE_TYPE (*node);
42845 else
42846 type = node;
42848 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
42850 warning (OPT_Wattributes, "%qE attribute ignored",
42851 name);
42852 *no_add_attrs = true;
42855 else if ((is_attribute_p ("ms_struct", name)
42856 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
42857 || ((is_attribute_p ("gcc_struct", name)
42858 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
42860 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
42861 name);
42862 *no_add_attrs = true;
42865 return NULL_TREE;
42868 static tree
42869 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
42870 bool *no_add_attrs)
42872 if (TREE_CODE (*node) != FUNCTION_DECL)
42874 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42875 name);
42876 *no_add_attrs = true;
42878 return NULL_TREE;
42881 static bool
42882 ix86_ms_bitfield_layout_p (const_tree record_type)
42884 return ((TARGET_MS_BITFIELD_LAYOUT
42885 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
42886 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
42889 /* Returns an expression indicating where the this parameter is
42890 located on entry to the FUNCTION. */
42892 static rtx
42893 x86_this_parameter (tree function)
42895 tree type = TREE_TYPE (function);
42896 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
42897 int nregs;
42899 if (TARGET_64BIT)
42901 const int *parm_regs;
42903 if (ix86_function_type_abi (type) == MS_ABI)
42904 parm_regs = x86_64_ms_abi_int_parameter_registers;
42905 else
42906 parm_regs = x86_64_int_parameter_registers;
42907 return gen_rtx_REG (Pmode, parm_regs[aggr]);
42910 nregs = ix86_function_regparm (type, function);
42912 if (nregs > 0 && !stdarg_p (type))
42914 int regno;
42915 unsigned int ccvt = ix86_get_callcvt (type);
42917 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42918 regno = aggr ? DX_REG : CX_REG;
42919 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42921 regno = CX_REG;
42922 if (aggr)
42923 return gen_rtx_MEM (SImode,
42924 plus_constant (Pmode, stack_pointer_rtx, 4));
42926 else
42928 regno = AX_REG;
42929 if (aggr)
42931 regno = DX_REG;
42932 if (nregs == 1)
42933 return gen_rtx_MEM (SImode,
42934 plus_constant (Pmode,
42935 stack_pointer_rtx, 4));
42938 return gen_rtx_REG (SImode, regno);
42941 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
42942 aggr ? 8 : 4));
42945 /* Determine whether x86_output_mi_thunk can succeed. */
42947 static bool
42948 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
42949 const_tree function)
42951 /* 64-bit can handle anything. */
42952 if (TARGET_64BIT)
42953 return true;
42955 /* For 32-bit, everything's fine if we have one free register. */
42956 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
42957 return true;
42959 /* Need a free register for vcall_offset. */
42960 if (vcall_offset)
42961 return false;
42963 /* Need a free register for GOT references. */
42964 if (flag_pic && !targetm.binds_local_p (function))
42965 return false;
42967 /* Otherwise ok. */
42968 return true;
42971 /* Output the assembler code for a thunk function. THUNK_DECL is the
42972 declaration for the thunk function itself, FUNCTION is the decl for
42973 the target function. DELTA is an immediate constant offset to be
42974 added to THIS. If VCALL_OFFSET is nonzero, the word at
42975 *(*this + vcall_offset) should be added to THIS. */
42977 static void
42978 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
42979 HOST_WIDE_INT vcall_offset, tree function)
42981 rtx this_param = x86_this_parameter (function);
42982 rtx this_reg, tmp, fnaddr;
42983 unsigned int tmp_regno;
42984 rtx_insn *insn;
42986 if (TARGET_64BIT)
42987 tmp_regno = R10_REG;
42988 else
42990 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
42991 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42992 tmp_regno = AX_REG;
42993 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42994 tmp_regno = DX_REG;
42995 else
42996 tmp_regno = CX_REG;
42999 emit_note (NOTE_INSN_PROLOGUE_END);
43001 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
43002 pull it in now and let DELTA benefit. */
43003 if (REG_P (this_param))
43004 this_reg = this_param;
43005 else if (vcall_offset)
43007 /* Put the this parameter into %eax. */
43008 this_reg = gen_rtx_REG (Pmode, AX_REG);
43009 emit_move_insn (this_reg, this_param);
43011 else
43012 this_reg = NULL_RTX;
43014 /* Adjust the this parameter by a fixed constant. */
43015 if (delta)
43017 rtx delta_rtx = GEN_INT (delta);
43018 rtx delta_dst = this_reg ? this_reg : this_param;
43020 if (TARGET_64BIT)
43022 if (!x86_64_general_operand (delta_rtx, Pmode))
43024 tmp = gen_rtx_REG (Pmode, tmp_regno);
43025 emit_move_insn (tmp, delta_rtx);
43026 delta_rtx = tmp;
43030 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
43033 /* Adjust the this parameter by a value stored in the vtable. */
43034 if (vcall_offset)
43036 rtx vcall_addr, vcall_mem, this_mem;
43038 tmp = gen_rtx_REG (Pmode, tmp_regno);
43040 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
43041 if (Pmode != ptr_mode)
43042 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
43043 emit_move_insn (tmp, this_mem);
43045 /* Adjust the this parameter. */
43046 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
43047 if (TARGET_64BIT
43048 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
43050 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
43051 emit_move_insn (tmp2, GEN_INT (vcall_offset));
43052 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
43055 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
43056 if (Pmode != ptr_mode)
43057 emit_insn (gen_addsi_1_zext (this_reg,
43058 gen_rtx_REG (ptr_mode,
43059 REGNO (this_reg)),
43060 vcall_mem));
43061 else
43062 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
43065 /* If necessary, drop THIS back to its stack slot. */
43066 if (this_reg && this_reg != this_param)
43067 emit_move_insn (this_param, this_reg);
43069 fnaddr = XEXP (DECL_RTL (function), 0);
43070 if (TARGET_64BIT)
43072 if (!flag_pic || targetm.binds_local_p (function)
43073 || TARGET_PECOFF)
43075 else
43077 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
43078 tmp = gen_rtx_CONST (Pmode, tmp);
43079 fnaddr = gen_const_mem (Pmode, tmp);
43082 else
43084 if (!flag_pic || targetm.binds_local_p (function))
43086 #if TARGET_MACHO
43087 else if (TARGET_MACHO)
43089 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
43090 fnaddr = XEXP (fnaddr, 0);
43092 #endif /* TARGET_MACHO */
43093 else
43095 tmp = gen_rtx_REG (Pmode, CX_REG);
43096 output_set_got (tmp, NULL_RTX);
43098 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
43099 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
43100 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
43101 fnaddr = gen_const_mem (Pmode, fnaddr);
43105 /* Our sibling call patterns do not allow memories, because we have no
43106 predicate that can distinguish between frame and non-frame memory.
43107 For our purposes here, we can get away with (ab)using a jump pattern,
43108 because we're going to do no optimization. */
43109 if (MEM_P (fnaddr))
43111 if (sibcall_insn_operand (fnaddr, word_mode))
43113 fnaddr = XEXP (DECL_RTL (function), 0);
43114 tmp = gen_rtx_MEM (QImode, fnaddr);
43115 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43116 tmp = emit_call_insn (tmp);
43117 SIBLING_CALL_P (tmp) = 1;
43119 else
43120 emit_jump_insn (gen_indirect_jump (fnaddr));
43122 else
43124 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
43126 // CM_LARGE_PIC always uses pseudo PIC register which is
43127 // uninitialized. Since FUNCTION is local and calling it
43128 // doesn't go through PLT, we use scratch register %r11 as
43129 // PIC register and initialize it here.
43130 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
43131 ix86_init_large_pic_reg (tmp_regno);
43132 fnaddr = legitimize_pic_address (fnaddr,
43133 gen_rtx_REG (Pmode, tmp_regno));
43136 if (!sibcall_insn_operand (fnaddr, word_mode))
43138 tmp = gen_rtx_REG (word_mode, tmp_regno);
43139 if (GET_MODE (fnaddr) != word_mode)
43140 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
43141 emit_move_insn (tmp, fnaddr);
43142 fnaddr = tmp;
43145 tmp = gen_rtx_MEM (QImode, fnaddr);
43146 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43147 tmp = emit_call_insn (tmp);
43148 SIBLING_CALL_P (tmp) = 1;
43150 emit_barrier ();
43152 /* Emit just enough of rest_of_compilation to get the insns emitted.
43153 Note that use_thunk calls assemble_start_function et al. */
43154 insn = get_insns ();
43155 shorten_branches (insn);
43156 final_start_function (insn, file, 1);
43157 final (insn, file, 1);
43158 final_end_function ();
43161 static void
43162 x86_file_start (void)
43164 default_file_start ();
43165 if (TARGET_16BIT)
43166 fputs ("\t.code16gcc\n", asm_out_file);
43167 #if TARGET_MACHO
43168 darwin_file_start ();
43169 #endif
43170 if (X86_FILE_START_VERSION_DIRECTIVE)
43171 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
43172 if (X86_FILE_START_FLTUSED)
43173 fputs ("\t.global\t__fltused\n", asm_out_file);
43174 if (ix86_asm_dialect == ASM_INTEL)
43175 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
43179 x86_field_alignment (tree field, int computed)
43181 machine_mode mode;
43182 tree type = TREE_TYPE (field);
43184 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
43185 return computed;
43186 mode = TYPE_MODE (strip_array_types (type));
43187 if (mode == DFmode || mode == DCmode
43188 || GET_MODE_CLASS (mode) == MODE_INT
43189 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
43190 return MIN (32, computed);
43191 return computed;
43194 /* Print call to TARGET to FILE. */
43196 static void
43197 x86_print_call_or_nop (FILE *file, const char *target)
43199 if (flag_nop_mcount)
43200 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
43201 else
43202 fprintf (file, "1:\tcall\t%s\n", target);
43205 /* Output assembler code to FILE to increment profiler label # LABELNO
43206 for profiling a function entry. */
43207 void
43208 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
43210 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
43211 : MCOUNT_NAME);
43212 if (TARGET_64BIT)
43214 #ifndef NO_PROFILE_COUNTERS
43215 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
43216 #endif
43218 if (!TARGET_PECOFF && flag_pic)
43219 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
43220 else
43221 x86_print_call_or_nop (file, mcount_name);
43223 else if (flag_pic)
43225 #ifndef NO_PROFILE_COUNTERS
43226 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
43227 LPREFIX, labelno);
43228 #endif
43229 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
43231 else
43233 #ifndef NO_PROFILE_COUNTERS
43234 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
43235 LPREFIX, labelno);
43236 #endif
43237 x86_print_call_or_nop (file, mcount_name);
43240 if (flag_record_mcount)
43242 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
43243 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
43244 fprintf (file, "\t.previous\n");
43248 /* We don't have exact information about the insn sizes, but we may assume
43249 quite safely that we are informed about all 1 byte insns and memory
43250 address sizes. This is enough to eliminate unnecessary padding in
43251 99% of cases. */
43253 static int
43254 min_insn_size (rtx_insn *insn)
43256 int l = 0, len;
43258 if (!INSN_P (insn) || !active_insn_p (insn))
43259 return 0;
43261 /* Discard alignments we've emit and jump instructions. */
43262 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
43263 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
43264 return 0;
43266 /* Important case - calls are always 5 bytes.
43267 It is common to have many calls in the row. */
43268 if (CALL_P (insn)
43269 && symbolic_reference_mentioned_p (PATTERN (insn))
43270 && !SIBLING_CALL_P (insn))
43271 return 5;
43272 len = get_attr_length (insn);
43273 if (len <= 1)
43274 return 1;
43276 /* For normal instructions we rely on get_attr_length being exact,
43277 with a few exceptions. */
43278 if (!JUMP_P (insn))
43280 enum attr_type type = get_attr_type (insn);
43282 switch (type)
43284 case TYPE_MULTI:
43285 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
43286 || asm_noperands (PATTERN (insn)) >= 0)
43287 return 0;
43288 break;
43289 case TYPE_OTHER:
43290 case TYPE_FCMP:
43291 break;
43292 default:
43293 /* Otherwise trust get_attr_length. */
43294 return len;
43297 l = get_attr_length_address (insn);
43298 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
43299 l = 4;
43301 if (l)
43302 return 1+l;
43303 else
43304 return 2;
43307 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43309 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
43310 window. */
43312 static void
43313 ix86_avoid_jump_mispredicts (void)
43315 rtx_insn *insn, *start = get_insns ();
43316 int nbytes = 0, njumps = 0;
43317 bool isjump = false;
43319 /* Look for all minimal intervals of instructions containing 4 jumps.
43320 The intervals are bounded by START and INSN. NBYTES is the total
43321 size of instructions in the interval including INSN and not including
43322 START. When the NBYTES is smaller than 16 bytes, it is possible
43323 that the end of START and INSN ends up in the same 16byte page.
43325 The smallest offset in the page INSN can start is the case where START
43326 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
43327 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
43329 Don't consider asm goto as jump, while it can contain a jump, it doesn't
43330 have to, control transfer to label(s) can be performed through other
43331 means, and also we estimate minimum length of all asm stmts as 0. */
43332 for (insn = start; insn; insn = NEXT_INSN (insn))
43334 int min_size;
43336 if (LABEL_P (insn))
43338 int align = label_to_alignment (insn);
43339 int max_skip = label_to_max_skip (insn);
43341 if (max_skip > 15)
43342 max_skip = 15;
43343 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
43344 already in the current 16 byte page, because otherwise
43345 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
43346 bytes to reach 16 byte boundary. */
43347 if (align <= 0
43348 || (align <= 3 && max_skip != (1 << align) - 1))
43349 max_skip = 0;
43350 if (dump_file)
43351 fprintf (dump_file, "Label %i with max_skip %i\n",
43352 INSN_UID (insn), max_skip);
43353 if (max_skip)
43355 while (nbytes + max_skip >= 16)
43357 start = NEXT_INSN (start);
43358 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43359 || CALL_P (start))
43360 njumps--, isjump = true;
43361 else
43362 isjump = false;
43363 nbytes -= min_insn_size (start);
43366 continue;
43369 min_size = min_insn_size (insn);
43370 nbytes += min_size;
43371 if (dump_file)
43372 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
43373 INSN_UID (insn), min_size);
43374 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
43375 || CALL_P (insn))
43376 njumps++;
43377 else
43378 continue;
43380 while (njumps > 3)
43382 start = NEXT_INSN (start);
43383 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43384 || CALL_P (start))
43385 njumps--, isjump = true;
43386 else
43387 isjump = false;
43388 nbytes -= min_insn_size (start);
43390 gcc_assert (njumps >= 0);
43391 if (dump_file)
43392 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
43393 INSN_UID (start), INSN_UID (insn), nbytes);
43395 if (njumps == 3 && isjump && nbytes < 16)
43397 int padsize = 15 - nbytes + min_insn_size (insn);
43399 if (dump_file)
43400 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
43401 INSN_UID (insn), padsize);
43402 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
43406 #endif
43408 /* AMD Athlon works faster
43409 when RET is not destination of conditional jump or directly preceded
43410 by other jump instruction. We avoid the penalty by inserting NOP just
43411 before the RET instructions in such cases. */
43412 static void
43413 ix86_pad_returns (void)
43415 edge e;
43416 edge_iterator ei;
43418 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43420 basic_block bb = e->src;
43421 rtx_insn *ret = BB_END (bb);
43422 rtx_insn *prev;
43423 bool replace = false;
43425 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
43426 || optimize_bb_for_size_p (bb))
43427 continue;
43428 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
43429 if (active_insn_p (prev) || LABEL_P (prev))
43430 break;
43431 if (prev && LABEL_P (prev))
43433 edge e;
43434 edge_iterator ei;
43436 FOR_EACH_EDGE (e, ei, bb->preds)
43437 if (EDGE_FREQUENCY (e) && e->src->index >= 0
43438 && !(e->flags & EDGE_FALLTHRU))
43440 replace = true;
43441 break;
43444 if (!replace)
43446 prev = prev_active_insn (ret);
43447 if (prev
43448 && ((JUMP_P (prev) && any_condjump_p (prev))
43449 || CALL_P (prev)))
43450 replace = true;
43451 /* Empty functions get branch mispredict even when
43452 the jump destination is not visible to us. */
43453 if (!prev && !optimize_function_for_size_p (cfun))
43454 replace = true;
43456 if (replace)
43458 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
43459 delete_insn (ret);
43464 /* Count the minimum number of instructions in BB. Return 4 if the
43465 number of instructions >= 4. */
43467 static int
43468 ix86_count_insn_bb (basic_block bb)
43470 rtx_insn *insn;
43471 int insn_count = 0;
43473 /* Count number of instructions in this block. Return 4 if the number
43474 of instructions >= 4. */
43475 FOR_BB_INSNS (bb, insn)
43477 /* Only happen in exit blocks. */
43478 if (JUMP_P (insn)
43479 && ANY_RETURN_P (PATTERN (insn)))
43480 break;
43482 if (NONDEBUG_INSN_P (insn)
43483 && GET_CODE (PATTERN (insn)) != USE
43484 && GET_CODE (PATTERN (insn)) != CLOBBER)
43486 insn_count++;
43487 if (insn_count >= 4)
43488 return insn_count;
43492 return insn_count;
43496 /* Count the minimum number of instructions in code path in BB.
43497 Return 4 if the number of instructions >= 4. */
43499 static int
43500 ix86_count_insn (basic_block bb)
43502 edge e;
43503 edge_iterator ei;
43504 int min_prev_count;
43506 /* Only bother counting instructions along paths with no
43507 more than 2 basic blocks between entry and exit. Given
43508 that BB has an edge to exit, determine if a predecessor
43509 of BB has an edge from entry. If so, compute the number
43510 of instructions in the predecessor block. If there
43511 happen to be multiple such blocks, compute the minimum. */
43512 min_prev_count = 4;
43513 FOR_EACH_EDGE (e, ei, bb->preds)
43515 edge prev_e;
43516 edge_iterator prev_ei;
43518 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43520 min_prev_count = 0;
43521 break;
43523 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
43525 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43527 int count = ix86_count_insn_bb (e->src);
43528 if (count < min_prev_count)
43529 min_prev_count = count;
43530 break;
43535 if (min_prev_count < 4)
43536 min_prev_count += ix86_count_insn_bb (bb);
43538 return min_prev_count;
43541 /* Pad short function to 4 instructions. */
43543 static void
43544 ix86_pad_short_function (void)
43546 edge e;
43547 edge_iterator ei;
43549 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43551 rtx_insn *ret = BB_END (e->src);
43552 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
43554 int insn_count = ix86_count_insn (e->src);
43556 /* Pad short function. */
43557 if (insn_count < 4)
43559 rtx_insn *insn = ret;
43561 /* Find epilogue. */
43562 while (insn
43563 && (!NOTE_P (insn)
43564 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
43565 insn = PREV_INSN (insn);
43567 if (!insn)
43568 insn = ret;
43570 /* Two NOPs count as one instruction. */
43571 insn_count = 2 * (4 - insn_count);
43572 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
43578 /* Fix up a Windows system unwinder issue. If an EH region falls through into
43579 the epilogue, the Windows system unwinder will apply epilogue logic and
43580 produce incorrect offsets. This can be avoided by adding a nop between
43581 the last insn that can throw and the first insn of the epilogue. */
43583 static void
43584 ix86_seh_fixup_eh_fallthru (void)
43586 edge e;
43587 edge_iterator ei;
43589 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43591 rtx_insn *insn, *next;
43593 /* Find the beginning of the epilogue. */
43594 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
43595 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
43596 break;
43597 if (insn == NULL)
43598 continue;
43600 /* We only care about preceding insns that can throw. */
43601 insn = prev_active_insn (insn);
43602 if (insn == NULL || !can_throw_internal (insn))
43603 continue;
43605 /* Do not separate calls from their debug information. */
43606 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
43607 if (NOTE_P (next)
43608 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
43609 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
43610 insn = next;
43611 else
43612 break;
43614 emit_insn_after (gen_nops (const1_rtx), insn);
43618 /* Implement machine specific optimizations. We implement padding of returns
43619 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
43620 static void
43621 ix86_reorg (void)
43623 /* We are freeing block_for_insn in the toplev to keep compatibility
43624 with old MDEP_REORGS that are not CFG based. Recompute it now. */
43625 compute_bb_for_insn ();
43627 if (TARGET_SEH && current_function_has_exception_handlers ())
43628 ix86_seh_fixup_eh_fallthru ();
43630 if (optimize && optimize_function_for_speed_p (cfun))
43632 if (TARGET_PAD_SHORT_FUNCTION)
43633 ix86_pad_short_function ();
43634 else if (TARGET_PAD_RETURNS)
43635 ix86_pad_returns ();
43636 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43637 if (TARGET_FOUR_JUMP_LIMIT)
43638 ix86_avoid_jump_mispredicts ();
43639 #endif
43643 /* Return nonzero when QImode register that must be represented via REX prefix
43644 is used. */
43645 bool
43646 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
43648 int i;
43649 extract_insn_cached (insn);
43650 for (i = 0; i < recog_data.n_operands; i++)
43651 if (GENERAL_REG_P (recog_data.operand[i])
43652 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
43653 return true;
43654 return false;
43657 /* Return true when INSN mentions register that must be encoded using REX
43658 prefix. */
43659 bool
43660 x86_extended_reg_mentioned_p (rtx insn)
43662 subrtx_iterator::array_type array;
43663 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
43665 const_rtx x = *iter;
43666 if (REG_P (x)
43667 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
43668 return true;
43670 return false;
43673 /* If profitable, negate (without causing overflow) integer constant
43674 of mode MODE at location LOC. Return true in this case. */
43675 bool
43676 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
43678 HOST_WIDE_INT val;
43680 if (!CONST_INT_P (*loc))
43681 return false;
43683 switch (mode)
43685 case DImode:
43686 /* DImode x86_64 constants must fit in 32 bits. */
43687 gcc_assert (x86_64_immediate_operand (*loc, mode));
43689 mode = SImode;
43690 break;
43692 case SImode:
43693 case HImode:
43694 case QImode:
43695 break;
43697 default:
43698 gcc_unreachable ();
43701 /* Avoid overflows. */
43702 if (mode_signbit_p (mode, *loc))
43703 return false;
43705 val = INTVAL (*loc);
43707 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
43708 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
43709 if ((val < 0 && val != -128)
43710 || val == 128)
43712 *loc = GEN_INT (-val);
43713 return true;
43716 return false;
43719 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
43720 optabs would emit if we didn't have TFmode patterns. */
43722 void
43723 x86_emit_floatuns (rtx operands[2])
43725 rtx_code_label *neglab, *donelab;
43726 rtx i0, i1, f0, in, out;
43727 machine_mode mode, inmode;
43729 inmode = GET_MODE (operands[1]);
43730 gcc_assert (inmode == SImode || inmode == DImode);
43732 out = operands[0];
43733 in = force_reg (inmode, operands[1]);
43734 mode = GET_MODE (out);
43735 neglab = gen_label_rtx ();
43736 donelab = gen_label_rtx ();
43737 f0 = gen_reg_rtx (mode);
43739 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
43741 expand_float (out, in, 0);
43743 emit_jump_insn (gen_jump (donelab));
43744 emit_barrier ();
43746 emit_label (neglab);
43748 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
43749 1, OPTAB_DIRECT);
43750 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
43751 1, OPTAB_DIRECT);
43752 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
43754 expand_float (f0, i0, 0);
43756 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
43758 emit_label (donelab);
43761 static bool canonicalize_perm (struct expand_vec_perm_d *d);
43762 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
43763 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
43764 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
43766 /* Get a vector mode of the same size as the original but with elements
43767 twice as wide. This is only guaranteed to apply to integral vectors. */
43769 static inline machine_mode
43770 get_mode_wider_vector (machine_mode o)
43772 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
43773 machine_mode n = GET_MODE_WIDER_MODE (o);
43774 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
43775 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
43776 return n;
43779 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
43780 fill target with val via vec_duplicate. */
43782 static bool
43783 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
43785 bool ok;
43786 rtx_insn *insn;
43787 rtx dup;
43789 /* First attempt to recognize VAL as-is. */
43790 dup = gen_rtx_VEC_DUPLICATE (mode, val);
43791 insn = emit_insn (gen_rtx_SET (target, dup));
43792 if (recog_memoized (insn) < 0)
43794 rtx_insn *seq;
43795 /* If that fails, force VAL into a register. */
43797 start_sequence ();
43798 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
43799 seq = get_insns ();
43800 end_sequence ();
43801 if (seq)
43802 emit_insn_before (seq, insn);
43804 ok = recog_memoized (insn) >= 0;
43805 gcc_assert (ok);
43807 return true;
43810 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43811 with all elements equal to VAR. Return true if successful. */
43813 static bool
43814 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
43815 rtx target, rtx val)
43817 bool ok;
43819 switch (mode)
43821 case V2SImode:
43822 case V2SFmode:
43823 if (!mmx_ok)
43824 return false;
43825 /* FALLTHRU */
43827 case V4DFmode:
43828 case V4DImode:
43829 case V8SFmode:
43830 case V8SImode:
43831 case V2DFmode:
43832 case V2DImode:
43833 case V4SFmode:
43834 case V4SImode:
43835 case V16SImode:
43836 case V8DImode:
43837 case V16SFmode:
43838 case V8DFmode:
43839 return ix86_vector_duplicate_value (mode, target, val);
43841 case V4HImode:
43842 if (!mmx_ok)
43843 return false;
43844 if (TARGET_SSE || TARGET_3DNOW_A)
43846 rtx x;
43848 val = gen_lowpart (SImode, val);
43849 x = gen_rtx_TRUNCATE (HImode, val);
43850 x = gen_rtx_VEC_DUPLICATE (mode, x);
43851 emit_insn (gen_rtx_SET (target, x));
43852 return true;
43854 goto widen;
43856 case V8QImode:
43857 if (!mmx_ok)
43858 return false;
43859 goto widen;
43861 case V8HImode:
43862 if (TARGET_AVX2)
43863 return ix86_vector_duplicate_value (mode, target, val);
43865 if (TARGET_SSE2)
43867 struct expand_vec_perm_d dperm;
43868 rtx tmp1, tmp2;
43870 permute:
43871 memset (&dperm, 0, sizeof (dperm));
43872 dperm.target = target;
43873 dperm.vmode = mode;
43874 dperm.nelt = GET_MODE_NUNITS (mode);
43875 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
43876 dperm.one_operand_p = true;
43878 /* Extend to SImode using a paradoxical SUBREG. */
43879 tmp1 = gen_reg_rtx (SImode);
43880 emit_move_insn (tmp1, gen_lowpart (SImode, val));
43882 /* Insert the SImode value as low element of a V4SImode vector. */
43883 tmp2 = gen_reg_rtx (V4SImode);
43884 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
43885 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
43887 ok = (expand_vec_perm_1 (&dperm)
43888 || expand_vec_perm_broadcast_1 (&dperm));
43889 gcc_assert (ok);
43890 return ok;
43892 goto widen;
43894 case V16QImode:
43895 if (TARGET_AVX2)
43896 return ix86_vector_duplicate_value (mode, target, val);
43898 if (TARGET_SSE2)
43899 goto permute;
43900 goto widen;
43902 widen:
43903 /* Replicate the value once into the next wider mode and recurse. */
43905 machine_mode smode, wsmode, wvmode;
43906 rtx x;
43908 smode = GET_MODE_INNER (mode);
43909 wvmode = get_mode_wider_vector (mode);
43910 wsmode = GET_MODE_INNER (wvmode);
43912 val = convert_modes (wsmode, smode, val, true);
43913 x = expand_simple_binop (wsmode, ASHIFT, val,
43914 GEN_INT (GET_MODE_BITSIZE (smode)),
43915 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43916 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
43918 x = gen_reg_rtx (wvmode);
43919 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
43920 gcc_assert (ok);
43921 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
43922 return ok;
43925 case V16HImode:
43926 case V32QImode:
43927 if (TARGET_AVX2)
43928 return ix86_vector_duplicate_value (mode, target, val);
43929 else
43931 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
43932 rtx x = gen_reg_rtx (hvmode);
43934 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43935 gcc_assert (ok);
43937 x = gen_rtx_VEC_CONCAT (mode, x, x);
43938 emit_insn (gen_rtx_SET (target, x));
43940 return true;
43942 case V64QImode:
43943 case V32HImode:
43944 if (TARGET_AVX512BW)
43945 return ix86_vector_duplicate_value (mode, target, val);
43946 else
43948 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
43949 rtx x = gen_reg_rtx (hvmode);
43951 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43952 gcc_assert (ok);
43954 x = gen_rtx_VEC_CONCAT (mode, x, x);
43955 emit_insn (gen_rtx_SET (target, x));
43957 return true;
43959 default:
43960 return false;
43964 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43965 whose ONE_VAR element is VAR, and other elements are zero. Return true
43966 if successful. */
43968 static bool
43969 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
43970 rtx target, rtx var, int one_var)
43972 machine_mode vsimode;
43973 rtx new_target;
43974 rtx x, tmp;
43975 bool use_vector_set = false;
43977 switch (mode)
43979 case V2DImode:
43980 /* For SSE4.1, we normally use vector set. But if the second
43981 element is zero and inter-unit moves are OK, we use movq
43982 instead. */
43983 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
43984 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
43985 && one_var == 0));
43986 break;
43987 case V16QImode:
43988 case V4SImode:
43989 case V4SFmode:
43990 use_vector_set = TARGET_SSE4_1;
43991 break;
43992 case V8HImode:
43993 use_vector_set = TARGET_SSE2;
43994 break;
43995 case V4HImode:
43996 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
43997 break;
43998 case V32QImode:
43999 case V16HImode:
44000 case V8SImode:
44001 case V8SFmode:
44002 case V4DFmode:
44003 use_vector_set = TARGET_AVX;
44004 break;
44005 case V4DImode:
44006 /* Use ix86_expand_vector_set in 64bit mode only. */
44007 use_vector_set = TARGET_AVX && TARGET_64BIT;
44008 break;
44009 default:
44010 break;
44013 if (use_vector_set)
44015 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
44016 var = force_reg (GET_MODE_INNER (mode), var);
44017 ix86_expand_vector_set (mmx_ok, target, var, one_var);
44018 return true;
44021 switch (mode)
44023 case V2SFmode:
44024 case V2SImode:
44025 if (!mmx_ok)
44026 return false;
44027 /* FALLTHRU */
44029 case V2DFmode:
44030 case V2DImode:
44031 if (one_var != 0)
44032 return false;
44033 var = force_reg (GET_MODE_INNER (mode), var);
44034 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
44035 emit_insn (gen_rtx_SET (target, x));
44036 return true;
44038 case V4SFmode:
44039 case V4SImode:
44040 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
44041 new_target = gen_reg_rtx (mode);
44042 else
44043 new_target = target;
44044 var = force_reg (GET_MODE_INNER (mode), var);
44045 x = gen_rtx_VEC_DUPLICATE (mode, var);
44046 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
44047 emit_insn (gen_rtx_SET (new_target, x));
44048 if (one_var != 0)
44050 /* We need to shuffle the value to the correct position, so
44051 create a new pseudo to store the intermediate result. */
44053 /* With SSE2, we can use the integer shuffle insns. */
44054 if (mode != V4SFmode && TARGET_SSE2)
44056 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
44057 const1_rtx,
44058 GEN_INT (one_var == 1 ? 0 : 1),
44059 GEN_INT (one_var == 2 ? 0 : 1),
44060 GEN_INT (one_var == 3 ? 0 : 1)));
44061 if (target != new_target)
44062 emit_move_insn (target, new_target);
44063 return true;
44066 /* Otherwise convert the intermediate result to V4SFmode and
44067 use the SSE1 shuffle instructions. */
44068 if (mode != V4SFmode)
44070 tmp = gen_reg_rtx (V4SFmode);
44071 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
44073 else
44074 tmp = new_target;
44076 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
44077 const1_rtx,
44078 GEN_INT (one_var == 1 ? 0 : 1),
44079 GEN_INT (one_var == 2 ? 0+4 : 1+4),
44080 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
44082 if (mode != V4SFmode)
44083 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
44084 else if (tmp != target)
44085 emit_move_insn (target, tmp);
44087 else if (target != new_target)
44088 emit_move_insn (target, new_target);
44089 return true;
44091 case V8HImode:
44092 case V16QImode:
44093 vsimode = V4SImode;
44094 goto widen;
44095 case V4HImode:
44096 case V8QImode:
44097 if (!mmx_ok)
44098 return false;
44099 vsimode = V2SImode;
44100 goto widen;
44101 widen:
44102 if (one_var != 0)
44103 return false;
44105 /* Zero extend the variable element to SImode and recurse. */
44106 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
44108 x = gen_reg_rtx (vsimode);
44109 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
44110 var, one_var))
44111 gcc_unreachable ();
44113 emit_move_insn (target, gen_lowpart (mode, x));
44114 return true;
44116 default:
44117 return false;
44121 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
44122 consisting of the values in VALS. It is known that all elements
44123 except ONE_VAR are constants. Return true if successful. */
44125 static bool
44126 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
44127 rtx target, rtx vals, int one_var)
44129 rtx var = XVECEXP (vals, 0, one_var);
44130 machine_mode wmode;
44131 rtx const_vec, x;
44133 const_vec = copy_rtx (vals);
44134 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
44135 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
44137 switch (mode)
44139 case V2DFmode:
44140 case V2DImode:
44141 case V2SFmode:
44142 case V2SImode:
44143 /* For the two element vectors, it's just as easy to use
44144 the general case. */
44145 return false;
44147 case V4DImode:
44148 /* Use ix86_expand_vector_set in 64bit mode only. */
44149 if (!TARGET_64BIT)
44150 return false;
44151 case V4DFmode:
44152 case V8SFmode:
44153 case V8SImode:
44154 case V16HImode:
44155 case V32QImode:
44156 case V4SFmode:
44157 case V4SImode:
44158 case V8HImode:
44159 case V4HImode:
44160 break;
44162 case V16QImode:
44163 if (TARGET_SSE4_1)
44164 break;
44165 wmode = V8HImode;
44166 goto widen;
44167 case V8QImode:
44168 wmode = V4HImode;
44169 goto widen;
44170 widen:
44171 /* There's no way to set one QImode entry easily. Combine
44172 the variable value with its adjacent constant value, and
44173 promote to an HImode set. */
44174 x = XVECEXP (vals, 0, one_var ^ 1);
44175 if (one_var & 1)
44177 var = convert_modes (HImode, QImode, var, true);
44178 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
44179 NULL_RTX, 1, OPTAB_LIB_WIDEN);
44180 x = GEN_INT (INTVAL (x) & 0xff);
44182 else
44184 var = convert_modes (HImode, QImode, var, true);
44185 x = gen_int_mode (INTVAL (x) << 8, HImode);
44187 if (x != const0_rtx)
44188 var = expand_simple_binop (HImode, IOR, var, x, var,
44189 1, OPTAB_LIB_WIDEN);
44191 x = gen_reg_rtx (wmode);
44192 emit_move_insn (x, gen_lowpart (wmode, const_vec));
44193 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
44195 emit_move_insn (target, gen_lowpart (mode, x));
44196 return true;
44198 default:
44199 return false;
44202 emit_move_insn (target, const_vec);
44203 ix86_expand_vector_set (mmx_ok, target, var, one_var);
44204 return true;
44207 /* A subroutine of ix86_expand_vector_init_general. Use vector
44208 concatenate to handle the most general case: all values variable,
44209 and none identical. */
44211 static void
44212 ix86_expand_vector_init_concat (machine_mode mode,
44213 rtx target, rtx *ops, int n)
44215 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
44216 rtx first[16], second[8], third[4];
44217 rtvec v;
44218 int i, j;
44220 switch (n)
44222 case 2:
44223 switch (mode)
44225 case V16SImode:
44226 cmode = V8SImode;
44227 break;
44228 case V16SFmode:
44229 cmode = V8SFmode;
44230 break;
44231 case V8DImode:
44232 cmode = V4DImode;
44233 break;
44234 case V8DFmode:
44235 cmode = V4DFmode;
44236 break;
44237 case V8SImode:
44238 cmode = V4SImode;
44239 break;
44240 case V8SFmode:
44241 cmode = V4SFmode;
44242 break;
44243 case V4DImode:
44244 cmode = V2DImode;
44245 break;
44246 case V4DFmode:
44247 cmode = V2DFmode;
44248 break;
44249 case V4SImode:
44250 cmode = V2SImode;
44251 break;
44252 case V4SFmode:
44253 cmode = V2SFmode;
44254 break;
44255 case V2DImode:
44256 cmode = DImode;
44257 break;
44258 case V2SImode:
44259 cmode = SImode;
44260 break;
44261 case V2DFmode:
44262 cmode = DFmode;
44263 break;
44264 case V2SFmode:
44265 cmode = SFmode;
44266 break;
44267 default:
44268 gcc_unreachable ();
44271 if (!register_operand (ops[1], cmode))
44272 ops[1] = force_reg (cmode, ops[1]);
44273 if (!register_operand (ops[0], cmode))
44274 ops[0] = force_reg (cmode, ops[0]);
44275 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0],
44276 ops[1])));
44277 break;
44279 case 4:
44280 switch (mode)
44282 case V4DImode:
44283 cmode = V2DImode;
44284 break;
44285 case V4DFmode:
44286 cmode = V2DFmode;
44287 break;
44288 case V4SImode:
44289 cmode = V2SImode;
44290 break;
44291 case V4SFmode:
44292 cmode = V2SFmode;
44293 break;
44294 default:
44295 gcc_unreachable ();
44297 goto half;
44299 case 8:
44300 switch (mode)
44302 case V8DImode:
44303 cmode = V2DImode;
44304 hmode = V4DImode;
44305 break;
44306 case V8DFmode:
44307 cmode = V2DFmode;
44308 hmode = V4DFmode;
44309 break;
44310 case V8SImode:
44311 cmode = V2SImode;
44312 hmode = V4SImode;
44313 break;
44314 case V8SFmode:
44315 cmode = V2SFmode;
44316 hmode = V4SFmode;
44317 break;
44318 default:
44319 gcc_unreachable ();
44321 goto half;
44323 case 16:
44324 switch (mode)
44326 case V16SImode:
44327 cmode = V2SImode;
44328 hmode = V4SImode;
44329 gmode = V8SImode;
44330 break;
44331 case V16SFmode:
44332 cmode = V2SFmode;
44333 hmode = V4SFmode;
44334 gmode = V8SFmode;
44335 break;
44336 default:
44337 gcc_unreachable ();
44339 goto half;
44341 half:
44342 /* FIXME: We process inputs backward to help RA. PR 36222. */
44343 i = n - 1;
44344 j = (n >> 1) - 1;
44345 for (; i > 0; i -= 2, j--)
44347 first[j] = gen_reg_rtx (cmode);
44348 v = gen_rtvec (2, ops[i - 1], ops[i]);
44349 ix86_expand_vector_init (false, first[j],
44350 gen_rtx_PARALLEL (cmode, v));
44353 n >>= 1;
44354 if (n > 4)
44356 gcc_assert (hmode != VOIDmode);
44357 gcc_assert (gmode != VOIDmode);
44358 for (i = j = 0; i < n; i += 2, j++)
44360 second[j] = gen_reg_rtx (hmode);
44361 ix86_expand_vector_init_concat (hmode, second [j],
44362 &first [i], 2);
44364 n >>= 1;
44365 for (i = j = 0; i < n; i += 2, j++)
44367 third[j] = gen_reg_rtx (gmode);
44368 ix86_expand_vector_init_concat (gmode, third[j],
44369 &second[i], 2);
44371 n >>= 1;
44372 ix86_expand_vector_init_concat (mode, target, third, n);
44374 else if (n > 2)
44376 gcc_assert (hmode != VOIDmode);
44377 for (i = j = 0; i < n; i += 2, j++)
44379 second[j] = gen_reg_rtx (hmode);
44380 ix86_expand_vector_init_concat (hmode, second [j],
44381 &first [i], 2);
44383 n >>= 1;
44384 ix86_expand_vector_init_concat (mode, target, second, n);
44386 else
44387 ix86_expand_vector_init_concat (mode, target, first, n);
44388 break;
44390 default:
44391 gcc_unreachable ();
44395 /* A subroutine of ix86_expand_vector_init_general. Use vector
44396 interleave to handle the most general case: all values variable,
44397 and none identical. */
44399 static void
44400 ix86_expand_vector_init_interleave (machine_mode mode,
44401 rtx target, rtx *ops, int n)
44403 machine_mode first_imode, second_imode, third_imode, inner_mode;
44404 int i, j;
44405 rtx op0, op1;
44406 rtx (*gen_load_even) (rtx, rtx, rtx);
44407 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
44408 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
44410 switch (mode)
44412 case V8HImode:
44413 gen_load_even = gen_vec_setv8hi;
44414 gen_interleave_first_low = gen_vec_interleave_lowv4si;
44415 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44416 inner_mode = HImode;
44417 first_imode = V4SImode;
44418 second_imode = V2DImode;
44419 third_imode = VOIDmode;
44420 break;
44421 case V16QImode:
44422 gen_load_even = gen_vec_setv16qi;
44423 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
44424 gen_interleave_second_low = gen_vec_interleave_lowv4si;
44425 inner_mode = QImode;
44426 first_imode = V8HImode;
44427 second_imode = V4SImode;
44428 third_imode = V2DImode;
44429 break;
44430 default:
44431 gcc_unreachable ();
44434 for (i = 0; i < n; i++)
44436 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
44437 op0 = gen_reg_rtx (SImode);
44438 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
44440 /* Insert the SImode value as low element of V4SImode vector. */
44441 op1 = gen_reg_rtx (V4SImode);
44442 op0 = gen_rtx_VEC_MERGE (V4SImode,
44443 gen_rtx_VEC_DUPLICATE (V4SImode,
44444 op0),
44445 CONST0_RTX (V4SImode),
44446 const1_rtx);
44447 emit_insn (gen_rtx_SET (op1, op0));
44449 /* Cast the V4SImode vector back to a vector in orignal mode. */
44450 op0 = gen_reg_rtx (mode);
44451 emit_move_insn (op0, gen_lowpart (mode, op1));
44453 /* Load even elements into the second position. */
44454 emit_insn (gen_load_even (op0,
44455 force_reg (inner_mode,
44456 ops [i + i + 1]),
44457 const1_rtx));
44459 /* Cast vector to FIRST_IMODE vector. */
44460 ops[i] = gen_reg_rtx (first_imode);
44461 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
44464 /* Interleave low FIRST_IMODE vectors. */
44465 for (i = j = 0; i < n; i += 2, j++)
44467 op0 = gen_reg_rtx (first_imode);
44468 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
44470 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
44471 ops[j] = gen_reg_rtx (second_imode);
44472 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
44475 /* Interleave low SECOND_IMODE vectors. */
44476 switch (second_imode)
44478 case V4SImode:
44479 for (i = j = 0; i < n / 2; i += 2, j++)
44481 op0 = gen_reg_rtx (second_imode);
44482 emit_insn (gen_interleave_second_low (op0, ops[i],
44483 ops[i + 1]));
44485 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
44486 vector. */
44487 ops[j] = gen_reg_rtx (third_imode);
44488 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
44490 second_imode = V2DImode;
44491 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44492 /* FALLTHRU */
44494 case V2DImode:
44495 op0 = gen_reg_rtx (second_imode);
44496 emit_insn (gen_interleave_second_low (op0, ops[0],
44497 ops[1]));
44499 /* Cast the SECOND_IMODE vector back to a vector on original
44500 mode. */
44501 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0)));
44502 break;
44504 default:
44505 gcc_unreachable ();
44509 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
44510 all values variable, and none identical. */
44512 static void
44513 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
44514 rtx target, rtx vals)
44516 rtx ops[64], op0, op1, op2, op3, op4, op5;
44517 machine_mode half_mode = VOIDmode;
44518 machine_mode quarter_mode = VOIDmode;
44519 int n, i;
44521 switch (mode)
44523 case V2SFmode:
44524 case V2SImode:
44525 if (!mmx_ok && !TARGET_SSE)
44526 break;
44527 /* FALLTHRU */
44529 case V16SImode:
44530 case V16SFmode:
44531 case V8DFmode:
44532 case V8DImode:
44533 case V8SFmode:
44534 case V8SImode:
44535 case V4DFmode:
44536 case V4DImode:
44537 case V4SFmode:
44538 case V4SImode:
44539 case V2DFmode:
44540 case V2DImode:
44541 n = GET_MODE_NUNITS (mode);
44542 for (i = 0; i < n; i++)
44543 ops[i] = XVECEXP (vals, 0, i);
44544 ix86_expand_vector_init_concat (mode, target, ops, n);
44545 return;
44547 case V32QImode:
44548 half_mode = V16QImode;
44549 goto half;
44551 case V16HImode:
44552 half_mode = V8HImode;
44553 goto half;
44555 half:
44556 n = GET_MODE_NUNITS (mode);
44557 for (i = 0; i < n; i++)
44558 ops[i] = XVECEXP (vals, 0, i);
44559 op0 = gen_reg_rtx (half_mode);
44560 op1 = gen_reg_rtx (half_mode);
44561 ix86_expand_vector_init_interleave (half_mode, op0, ops,
44562 n >> 2);
44563 ix86_expand_vector_init_interleave (half_mode, op1,
44564 &ops [n >> 1], n >> 2);
44565 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1)));
44566 return;
44568 case V64QImode:
44569 quarter_mode = V16QImode;
44570 half_mode = V32QImode;
44571 goto quarter;
44573 case V32HImode:
44574 quarter_mode = V8HImode;
44575 half_mode = V16HImode;
44576 goto quarter;
44578 quarter:
44579 n = GET_MODE_NUNITS (mode);
44580 for (i = 0; i < n; i++)
44581 ops[i] = XVECEXP (vals, 0, i);
44582 op0 = gen_reg_rtx (quarter_mode);
44583 op1 = gen_reg_rtx (quarter_mode);
44584 op2 = gen_reg_rtx (quarter_mode);
44585 op3 = gen_reg_rtx (quarter_mode);
44586 op4 = gen_reg_rtx (half_mode);
44587 op5 = gen_reg_rtx (half_mode);
44588 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
44589 n >> 3);
44590 ix86_expand_vector_init_interleave (quarter_mode, op1,
44591 &ops [n >> 2], n >> 3);
44592 ix86_expand_vector_init_interleave (quarter_mode, op2,
44593 &ops [n >> 1], n >> 3);
44594 ix86_expand_vector_init_interleave (quarter_mode, op3,
44595 &ops [(n >> 1) | (n >> 2)], n >> 3);
44596 emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
44597 emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
44598 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5)));
44599 return;
44601 case V16QImode:
44602 if (!TARGET_SSE4_1)
44603 break;
44604 /* FALLTHRU */
44606 case V8HImode:
44607 if (!TARGET_SSE2)
44608 break;
44610 /* Don't use ix86_expand_vector_init_interleave if we can't
44611 move from GPR to SSE register directly. */
44612 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
44613 break;
44615 n = GET_MODE_NUNITS (mode);
44616 for (i = 0; i < n; i++)
44617 ops[i] = XVECEXP (vals, 0, i);
44618 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
44619 return;
44621 case V4HImode:
44622 case V8QImode:
44623 break;
44625 default:
44626 gcc_unreachable ();
44630 int i, j, n_elts, n_words, n_elt_per_word;
44631 machine_mode inner_mode;
44632 rtx words[4], shift;
44634 inner_mode = GET_MODE_INNER (mode);
44635 n_elts = GET_MODE_NUNITS (mode);
44636 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
44637 n_elt_per_word = n_elts / n_words;
44638 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
44640 for (i = 0; i < n_words; ++i)
44642 rtx word = NULL_RTX;
44644 for (j = 0; j < n_elt_per_word; ++j)
44646 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
44647 elt = convert_modes (word_mode, inner_mode, elt, true);
44649 if (j == 0)
44650 word = elt;
44651 else
44653 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
44654 word, 1, OPTAB_LIB_WIDEN);
44655 word = expand_simple_binop (word_mode, IOR, word, elt,
44656 word, 1, OPTAB_LIB_WIDEN);
44660 words[i] = word;
44663 if (n_words == 1)
44664 emit_move_insn (target, gen_lowpart (mode, words[0]));
44665 else if (n_words == 2)
44667 rtx tmp = gen_reg_rtx (mode);
44668 emit_clobber (tmp);
44669 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
44670 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
44671 emit_move_insn (target, tmp);
44673 else if (n_words == 4)
44675 rtx tmp = gen_reg_rtx (V4SImode);
44676 gcc_assert (word_mode == SImode);
44677 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
44678 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
44679 emit_move_insn (target, gen_lowpart (mode, tmp));
44681 else
44682 gcc_unreachable ();
44686 /* Initialize vector TARGET via VALS. Suppress the use of MMX
44687 instructions unless MMX_OK is true. */
44689 void
44690 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
44692 machine_mode mode = GET_MODE (target);
44693 machine_mode inner_mode = GET_MODE_INNER (mode);
44694 int n_elts = GET_MODE_NUNITS (mode);
44695 int n_var = 0, one_var = -1;
44696 bool all_same = true, all_const_zero = true;
44697 int i;
44698 rtx x;
44700 for (i = 0; i < n_elts; ++i)
44702 x = XVECEXP (vals, 0, i);
44703 if (!(CONST_SCALAR_INT_P (x)
44704 || CONST_DOUBLE_P (x)
44705 || CONST_FIXED_P (x)))
44706 n_var++, one_var = i;
44707 else if (x != CONST0_RTX (inner_mode))
44708 all_const_zero = false;
44709 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
44710 all_same = false;
44713 /* Constants are best loaded from the constant pool. */
44714 if (n_var == 0)
44716 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
44717 return;
44720 /* If all values are identical, broadcast the value. */
44721 if (all_same
44722 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
44723 XVECEXP (vals, 0, 0)))
44724 return;
44726 /* Values where only one field is non-constant are best loaded from
44727 the pool and overwritten via move later. */
44728 if (n_var == 1)
44730 if (all_const_zero
44731 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
44732 XVECEXP (vals, 0, one_var),
44733 one_var))
44734 return;
44736 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
44737 return;
44740 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
44743 void
44744 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
44746 machine_mode mode = GET_MODE (target);
44747 machine_mode inner_mode = GET_MODE_INNER (mode);
44748 machine_mode half_mode;
44749 bool use_vec_merge = false;
44750 rtx tmp;
44751 static rtx (*gen_extract[6][2]) (rtx, rtx)
44753 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
44754 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
44755 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
44756 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
44757 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
44758 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
44760 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
44762 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
44763 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
44764 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
44765 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
44766 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
44767 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
44769 int i, j, n;
44770 machine_mode mmode = VOIDmode;
44771 rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
44773 switch (mode)
44775 case V2SFmode:
44776 case V2SImode:
44777 if (mmx_ok)
44779 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44780 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
44781 if (elt == 0)
44782 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44783 else
44784 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44785 emit_insn (gen_rtx_SET (target, tmp));
44786 return;
44788 break;
44790 case V2DImode:
44791 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
44792 if (use_vec_merge)
44793 break;
44795 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44796 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
44797 if (elt == 0)
44798 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44799 else
44800 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44801 emit_insn (gen_rtx_SET (target, tmp));
44802 return;
44804 case V2DFmode:
44806 rtx op0, op1;
44808 /* For the two element vectors, we implement a VEC_CONCAT with
44809 the extraction of the other element. */
44811 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
44812 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
44814 if (elt == 0)
44815 op0 = val, op1 = tmp;
44816 else
44817 op0 = tmp, op1 = val;
44819 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
44820 emit_insn (gen_rtx_SET (target, tmp));
44822 return;
44824 case V4SFmode:
44825 use_vec_merge = TARGET_SSE4_1;
44826 if (use_vec_merge)
44827 break;
44829 switch (elt)
44831 case 0:
44832 use_vec_merge = true;
44833 break;
44835 case 1:
44836 /* tmp = target = A B C D */
44837 tmp = copy_to_reg (target);
44838 /* target = A A B B */
44839 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
44840 /* target = X A B B */
44841 ix86_expand_vector_set (false, target, val, 0);
44842 /* target = A X C D */
44843 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44844 const1_rtx, const0_rtx,
44845 GEN_INT (2+4), GEN_INT (3+4)));
44846 return;
44848 case 2:
44849 /* tmp = target = A B C D */
44850 tmp = copy_to_reg (target);
44851 /* tmp = X B C D */
44852 ix86_expand_vector_set (false, tmp, val, 0);
44853 /* target = A B X D */
44854 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44855 const0_rtx, const1_rtx,
44856 GEN_INT (0+4), GEN_INT (3+4)));
44857 return;
44859 case 3:
44860 /* tmp = target = A B C D */
44861 tmp = copy_to_reg (target);
44862 /* tmp = X B C D */
44863 ix86_expand_vector_set (false, tmp, val, 0);
44864 /* target = A B X D */
44865 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44866 const0_rtx, const1_rtx,
44867 GEN_INT (2+4), GEN_INT (0+4)));
44868 return;
44870 default:
44871 gcc_unreachable ();
44873 break;
44875 case V4SImode:
44876 use_vec_merge = TARGET_SSE4_1;
44877 if (use_vec_merge)
44878 break;
44880 /* Element 0 handled by vec_merge below. */
44881 if (elt == 0)
44883 use_vec_merge = true;
44884 break;
44887 if (TARGET_SSE2)
44889 /* With SSE2, use integer shuffles to swap element 0 and ELT,
44890 store into element 0, then shuffle them back. */
44892 rtx order[4];
44894 order[0] = GEN_INT (elt);
44895 order[1] = const1_rtx;
44896 order[2] = const2_rtx;
44897 order[3] = GEN_INT (3);
44898 order[elt] = const0_rtx;
44900 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44901 order[1], order[2], order[3]));
44903 ix86_expand_vector_set (false, target, val, 0);
44905 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44906 order[1], order[2], order[3]));
44908 else
44910 /* For SSE1, we have to reuse the V4SF code. */
44911 rtx t = gen_reg_rtx (V4SFmode);
44912 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
44913 emit_move_insn (target, gen_lowpart (mode, t));
44915 return;
44917 case V8HImode:
44918 use_vec_merge = TARGET_SSE2;
44919 break;
44920 case V4HImode:
44921 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44922 break;
44924 case V16QImode:
44925 use_vec_merge = TARGET_SSE4_1;
44926 break;
44928 case V8QImode:
44929 break;
44931 case V32QImode:
44932 half_mode = V16QImode;
44933 j = 0;
44934 n = 16;
44935 goto half;
44937 case V16HImode:
44938 half_mode = V8HImode;
44939 j = 1;
44940 n = 8;
44941 goto half;
44943 case V8SImode:
44944 half_mode = V4SImode;
44945 j = 2;
44946 n = 4;
44947 goto half;
44949 case V4DImode:
44950 half_mode = V2DImode;
44951 j = 3;
44952 n = 2;
44953 goto half;
44955 case V8SFmode:
44956 half_mode = V4SFmode;
44957 j = 4;
44958 n = 4;
44959 goto half;
44961 case V4DFmode:
44962 half_mode = V2DFmode;
44963 j = 5;
44964 n = 2;
44965 goto half;
44967 half:
44968 /* Compute offset. */
44969 i = elt / n;
44970 elt %= n;
44972 gcc_assert (i <= 1);
44974 /* Extract the half. */
44975 tmp = gen_reg_rtx (half_mode);
44976 emit_insn (gen_extract[j][i] (tmp, target));
44978 /* Put val in tmp at elt. */
44979 ix86_expand_vector_set (false, tmp, val, elt);
44981 /* Put it back. */
44982 emit_insn (gen_insert[j][i] (target, target, tmp));
44983 return;
44985 case V8DFmode:
44986 if (TARGET_AVX512F)
44988 mmode = QImode;
44989 gen_blendm = gen_avx512f_blendmv8df;
44991 break;
44993 case V8DImode:
44994 if (TARGET_AVX512F)
44996 mmode = QImode;
44997 gen_blendm = gen_avx512f_blendmv8di;
44999 break;
45001 case V16SFmode:
45002 if (TARGET_AVX512F)
45004 mmode = HImode;
45005 gen_blendm = gen_avx512f_blendmv16sf;
45007 break;
45009 case V16SImode:
45010 if (TARGET_AVX512F)
45012 mmode = HImode;
45013 gen_blendm = gen_avx512f_blendmv16si;
45015 break;
45017 case V32HImode:
45018 if (TARGET_AVX512F && TARGET_AVX512BW)
45020 mmode = SImode;
45021 gen_blendm = gen_avx512bw_blendmv32hi;
45023 break;
45025 case V64QImode:
45026 if (TARGET_AVX512F && TARGET_AVX512BW)
45028 mmode = DImode;
45029 gen_blendm = gen_avx512bw_blendmv64qi;
45031 break;
45033 default:
45034 break;
45037 if (mmode != VOIDmode)
45039 tmp = gen_reg_rtx (mode);
45040 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
45041 emit_insn (gen_blendm (target, tmp, target,
45042 force_reg (mmode,
45043 gen_int_mode (1 << elt, mmode))));
45045 else if (use_vec_merge)
45047 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
45048 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
45049 emit_insn (gen_rtx_SET (target, tmp));
45051 else
45053 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45055 emit_move_insn (mem, target);
45057 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45058 emit_move_insn (tmp, val);
45060 emit_move_insn (target, mem);
45064 void
45065 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
45067 machine_mode mode = GET_MODE (vec);
45068 machine_mode inner_mode = GET_MODE_INNER (mode);
45069 bool use_vec_extr = false;
45070 rtx tmp;
45072 switch (mode)
45074 case V2SImode:
45075 case V2SFmode:
45076 if (!mmx_ok)
45077 break;
45078 /* FALLTHRU */
45080 case V2DFmode:
45081 case V2DImode:
45082 use_vec_extr = true;
45083 break;
45085 case V4SFmode:
45086 use_vec_extr = TARGET_SSE4_1;
45087 if (use_vec_extr)
45088 break;
45090 switch (elt)
45092 case 0:
45093 tmp = vec;
45094 break;
45096 case 1:
45097 case 3:
45098 tmp = gen_reg_rtx (mode);
45099 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
45100 GEN_INT (elt), GEN_INT (elt),
45101 GEN_INT (elt+4), GEN_INT (elt+4)));
45102 break;
45104 case 2:
45105 tmp = gen_reg_rtx (mode);
45106 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
45107 break;
45109 default:
45110 gcc_unreachable ();
45112 vec = tmp;
45113 use_vec_extr = true;
45114 elt = 0;
45115 break;
45117 case V4SImode:
45118 use_vec_extr = TARGET_SSE4_1;
45119 if (use_vec_extr)
45120 break;
45122 if (TARGET_SSE2)
45124 switch (elt)
45126 case 0:
45127 tmp = vec;
45128 break;
45130 case 1:
45131 case 3:
45132 tmp = gen_reg_rtx (mode);
45133 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
45134 GEN_INT (elt), GEN_INT (elt),
45135 GEN_INT (elt), GEN_INT (elt)));
45136 break;
45138 case 2:
45139 tmp = gen_reg_rtx (mode);
45140 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
45141 break;
45143 default:
45144 gcc_unreachable ();
45146 vec = tmp;
45147 use_vec_extr = true;
45148 elt = 0;
45150 else
45152 /* For SSE1, we have to reuse the V4SF code. */
45153 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
45154 gen_lowpart (V4SFmode, vec), elt);
45155 return;
45157 break;
45159 case V8HImode:
45160 use_vec_extr = TARGET_SSE2;
45161 break;
45162 case V4HImode:
45163 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
45164 break;
45166 case V16QImode:
45167 use_vec_extr = TARGET_SSE4_1;
45168 break;
45170 case V8SFmode:
45171 if (TARGET_AVX)
45173 tmp = gen_reg_rtx (V4SFmode);
45174 if (elt < 4)
45175 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
45176 else
45177 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
45178 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45179 return;
45181 break;
45183 case V4DFmode:
45184 if (TARGET_AVX)
45186 tmp = gen_reg_rtx (V2DFmode);
45187 if (elt < 2)
45188 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
45189 else
45190 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
45191 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45192 return;
45194 break;
45196 case V32QImode:
45197 if (TARGET_AVX)
45199 tmp = gen_reg_rtx (V16QImode);
45200 if (elt < 16)
45201 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
45202 else
45203 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
45204 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45205 return;
45207 break;
45209 case V16HImode:
45210 if (TARGET_AVX)
45212 tmp = gen_reg_rtx (V8HImode);
45213 if (elt < 8)
45214 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
45215 else
45216 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
45217 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45218 return;
45220 break;
45222 case V8SImode:
45223 if (TARGET_AVX)
45225 tmp = gen_reg_rtx (V4SImode);
45226 if (elt < 4)
45227 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
45228 else
45229 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
45230 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45231 return;
45233 break;
45235 case V4DImode:
45236 if (TARGET_AVX)
45238 tmp = gen_reg_rtx (V2DImode);
45239 if (elt < 2)
45240 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
45241 else
45242 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
45243 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45244 return;
45246 break;
45248 case V32HImode:
45249 if (TARGET_AVX512BW)
45251 tmp = gen_reg_rtx (V16HImode);
45252 if (elt < 16)
45253 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
45254 else
45255 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
45256 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45257 return;
45259 break;
45261 case V64QImode:
45262 if (TARGET_AVX512BW)
45264 tmp = gen_reg_rtx (V32QImode);
45265 if (elt < 32)
45266 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
45267 else
45268 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
45269 ix86_expand_vector_extract (false, target, tmp, elt & 31);
45270 return;
45272 break;
45274 case V16SFmode:
45275 tmp = gen_reg_rtx (V8SFmode);
45276 if (elt < 8)
45277 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
45278 else
45279 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
45280 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45281 return;
45283 case V8DFmode:
45284 tmp = gen_reg_rtx (V4DFmode);
45285 if (elt < 4)
45286 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
45287 else
45288 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
45289 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45290 return;
45292 case V16SImode:
45293 tmp = gen_reg_rtx (V8SImode);
45294 if (elt < 8)
45295 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
45296 else
45297 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
45298 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45299 return;
45301 case V8DImode:
45302 tmp = gen_reg_rtx (V4DImode);
45303 if (elt < 4)
45304 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
45305 else
45306 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
45307 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45308 return;
45310 case V8QImode:
45311 /* ??? Could extract the appropriate HImode element and shift. */
45312 default:
45313 break;
45316 if (use_vec_extr)
45318 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
45319 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
45321 /* Let the rtl optimizers know about the zero extension performed. */
45322 if (inner_mode == QImode || inner_mode == HImode)
45324 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
45325 target = gen_lowpart (SImode, target);
45328 emit_insn (gen_rtx_SET (target, tmp));
45330 else
45332 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45334 emit_move_insn (mem, vec);
45336 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45337 emit_move_insn (target, tmp);
45341 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
45342 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
45343 The upper bits of DEST are undefined, though they shouldn't cause
45344 exceptions (some bits from src or all zeros are ok). */
45346 static void
45347 emit_reduc_half (rtx dest, rtx src, int i)
45349 rtx tem, d = dest;
45350 switch (GET_MODE (src))
45352 case V4SFmode:
45353 if (i == 128)
45354 tem = gen_sse_movhlps (dest, src, src);
45355 else
45356 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
45357 GEN_INT (1 + 4), GEN_INT (1 + 4));
45358 break;
45359 case V2DFmode:
45360 tem = gen_vec_interleave_highv2df (dest, src, src);
45361 break;
45362 case V16QImode:
45363 case V8HImode:
45364 case V4SImode:
45365 case V2DImode:
45366 d = gen_reg_rtx (V1TImode);
45367 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
45368 GEN_INT (i / 2));
45369 break;
45370 case V8SFmode:
45371 if (i == 256)
45372 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
45373 else
45374 tem = gen_avx_shufps256 (dest, src, src,
45375 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
45376 break;
45377 case V4DFmode:
45378 if (i == 256)
45379 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
45380 else
45381 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
45382 break;
45383 case V32QImode:
45384 case V16HImode:
45385 case V8SImode:
45386 case V4DImode:
45387 if (i == 256)
45389 if (GET_MODE (dest) != V4DImode)
45390 d = gen_reg_rtx (V4DImode);
45391 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
45392 gen_lowpart (V4DImode, src),
45393 const1_rtx);
45395 else
45397 d = gen_reg_rtx (V2TImode);
45398 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
45399 GEN_INT (i / 2));
45401 break;
45402 case V64QImode:
45403 case V32HImode:
45404 case V16SImode:
45405 case V16SFmode:
45406 case V8DImode:
45407 case V8DFmode:
45408 if (i > 128)
45409 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
45410 gen_lowpart (V16SImode, src),
45411 gen_lowpart (V16SImode, src),
45412 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
45413 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
45414 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
45415 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
45416 GEN_INT (0xC), GEN_INT (0xD),
45417 GEN_INT (0xE), GEN_INT (0xF),
45418 GEN_INT (0x10), GEN_INT (0x11),
45419 GEN_INT (0x12), GEN_INT (0x13),
45420 GEN_INT (0x14), GEN_INT (0x15),
45421 GEN_INT (0x16), GEN_INT (0x17));
45422 else
45423 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
45424 gen_lowpart (V16SImode, src),
45425 GEN_INT (i == 128 ? 0x2 : 0x1),
45426 GEN_INT (0x3),
45427 GEN_INT (0x3),
45428 GEN_INT (0x3),
45429 GEN_INT (i == 128 ? 0x6 : 0x5),
45430 GEN_INT (0x7),
45431 GEN_INT (0x7),
45432 GEN_INT (0x7),
45433 GEN_INT (i == 128 ? 0xA : 0x9),
45434 GEN_INT (0xB),
45435 GEN_INT (0xB),
45436 GEN_INT (0xB),
45437 GEN_INT (i == 128 ? 0xE : 0xD),
45438 GEN_INT (0xF),
45439 GEN_INT (0xF),
45440 GEN_INT (0xF));
45441 break;
45442 default:
45443 gcc_unreachable ();
45445 emit_insn (tem);
45446 if (d != dest)
45447 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
45450 /* Expand a vector reduction. FN is the binary pattern to reduce;
45451 DEST is the destination; IN is the input vector. */
45453 void
45454 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
45456 rtx half, dst, vec = in;
45457 machine_mode mode = GET_MODE (in);
45458 int i;
45460 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
45461 if (TARGET_SSE4_1
45462 && mode == V8HImode
45463 && fn == gen_uminv8hi3)
45465 emit_insn (gen_sse4_1_phminposuw (dest, in));
45466 return;
45469 for (i = GET_MODE_BITSIZE (mode);
45470 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
45471 i >>= 1)
45473 half = gen_reg_rtx (mode);
45474 emit_reduc_half (half, vec, i);
45475 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
45476 dst = dest;
45477 else
45478 dst = gen_reg_rtx (mode);
45479 emit_insn (fn (dst, half, vec));
45480 vec = dst;
45484 /* Target hook for scalar_mode_supported_p. */
45485 static bool
45486 ix86_scalar_mode_supported_p (machine_mode mode)
45488 if (DECIMAL_FLOAT_MODE_P (mode))
45489 return default_decimal_float_supported_p ();
45490 else if (mode == TFmode)
45491 return true;
45492 else
45493 return default_scalar_mode_supported_p (mode);
45496 /* Implements target hook vector_mode_supported_p. */
45497 static bool
45498 ix86_vector_mode_supported_p (machine_mode mode)
45500 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
45501 return true;
45502 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
45503 return true;
45504 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
45505 return true;
45506 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
45507 return true;
45508 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
45509 return true;
45510 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
45511 return true;
45512 return false;
45515 /* Implement target hook libgcc_floating_mode_supported_p. */
45516 static bool
45517 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
45519 switch (mode)
45521 case SFmode:
45522 case DFmode:
45523 case XFmode:
45524 return true;
45526 case TFmode:
45527 #ifdef IX86_NO_LIBGCC_TFMODE
45528 return false;
45529 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
45530 return TARGET_LONG_DOUBLE_128;
45531 #else
45532 return true;
45533 #endif
45535 default:
45536 return false;
45540 /* Target hook for c_mode_for_suffix. */
45541 static machine_mode
45542 ix86_c_mode_for_suffix (char suffix)
45544 if (suffix == 'q')
45545 return TFmode;
45546 if (suffix == 'w')
45547 return XFmode;
45549 return VOIDmode;
45552 /* Worker function for TARGET_MD_ASM_ADJUST.
45554 We do this in the new i386 backend to maintain source compatibility
45555 with the old cc0-based compiler. */
45557 static rtx_insn *
45558 ix86_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
45559 vec<const char *> &/*constraints*/,
45560 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
45562 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
45563 clobbers.safe_push (gen_rtx_REG (CCFPmode, FPSR_REG));
45565 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
45566 SET_HARD_REG_BIT (clobbered_regs, FPSR_REG);
45568 return NULL;
45571 /* Implements target vector targetm.asm.encode_section_info. */
45573 static void ATTRIBUTE_UNUSED
45574 ix86_encode_section_info (tree decl, rtx rtl, int first)
45576 default_encode_section_info (decl, rtl, first);
45578 if (ix86_in_large_data_p (decl))
45579 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
45582 /* Worker function for REVERSE_CONDITION. */
45584 enum rtx_code
45585 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
45587 return (mode != CCFPmode && mode != CCFPUmode
45588 ? reverse_condition (code)
45589 : reverse_condition_maybe_unordered (code));
45592 /* Output code to perform an x87 FP register move, from OPERANDS[1]
45593 to OPERANDS[0]. */
45595 const char *
45596 output_387_reg_move (rtx insn, rtx *operands)
45598 if (REG_P (operands[0]))
45600 if (REG_P (operands[1])
45601 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45603 if (REGNO (operands[0]) == FIRST_STACK_REG)
45604 return output_387_ffreep (operands, 0);
45605 return "fstp\t%y0";
45607 if (STACK_TOP_P (operands[0]))
45608 return "fld%Z1\t%y1";
45609 return "fst\t%y0";
45611 else if (MEM_P (operands[0]))
45613 gcc_assert (REG_P (operands[1]));
45614 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45615 return "fstp%Z0\t%y0";
45616 else
45618 /* There is no non-popping store to memory for XFmode.
45619 So if we need one, follow the store with a load. */
45620 if (GET_MODE (operands[0]) == XFmode)
45621 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
45622 else
45623 return "fst%Z0\t%y0";
45626 else
45627 gcc_unreachable();
45630 /* Output code to perform a conditional jump to LABEL, if C2 flag in
45631 FP status register is set. */
45633 void
45634 ix86_emit_fp_unordered_jump (rtx label)
45636 rtx reg = gen_reg_rtx (HImode);
45637 rtx temp;
45639 emit_insn (gen_x86_fnstsw_1 (reg));
45641 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
45643 emit_insn (gen_x86_sahf_1 (reg));
45645 temp = gen_rtx_REG (CCmode, FLAGS_REG);
45646 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
45648 else
45650 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
45652 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
45653 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
45656 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
45657 gen_rtx_LABEL_REF (VOIDmode, label),
45658 pc_rtx);
45659 temp = gen_rtx_SET (pc_rtx, temp);
45661 emit_jump_insn (temp);
45662 predict_jump (REG_BR_PROB_BASE * 10 / 100);
45665 /* Output code to perform a log1p XFmode calculation. */
45667 void ix86_emit_i387_log1p (rtx op0, rtx op1)
45669 rtx_code_label *label1 = gen_label_rtx ();
45670 rtx_code_label *label2 = gen_label_rtx ();
45672 rtx tmp = gen_reg_rtx (XFmode);
45673 rtx tmp2 = gen_reg_rtx (XFmode);
45674 rtx test;
45676 emit_insn (gen_absxf2 (tmp, op1));
45677 test = gen_rtx_GE (VOIDmode, tmp,
45678 CONST_DOUBLE_FROM_REAL_VALUE (
45679 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
45680 XFmode));
45681 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
45683 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45684 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
45685 emit_jump (label2);
45687 emit_label (label1);
45688 emit_move_insn (tmp, CONST1_RTX (XFmode));
45689 emit_insn (gen_addxf3 (tmp, op1, tmp));
45690 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45691 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
45693 emit_label (label2);
45696 /* Emit code for round calculation. */
45697 void ix86_emit_i387_round (rtx op0, rtx op1)
45699 machine_mode inmode = GET_MODE (op1);
45700 machine_mode outmode = GET_MODE (op0);
45701 rtx e1, e2, res, tmp, tmp1, half;
45702 rtx scratch = gen_reg_rtx (HImode);
45703 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
45704 rtx_code_label *jump_label = gen_label_rtx ();
45705 rtx insn;
45706 rtx (*gen_abs) (rtx, rtx);
45707 rtx (*gen_neg) (rtx, rtx);
45709 switch (inmode)
45711 case SFmode:
45712 gen_abs = gen_abssf2;
45713 break;
45714 case DFmode:
45715 gen_abs = gen_absdf2;
45716 break;
45717 case XFmode:
45718 gen_abs = gen_absxf2;
45719 break;
45720 default:
45721 gcc_unreachable ();
45724 switch (outmode)
45726 case SFmode:
45727 gen_neg = gen_negsf2;
45728 break;
45729 case DFmode:
45730 gen_neg = gen_negdf2;
45731 break;
45732 case XFmode:
45733 gen_neg = gen_negxf2;
45734 break;
45735 case HImode:
45736 gen_neg = gen_neghi2;
45737 break;
45738 case SImode:
45739 gen_neg = gen_negsi2;
45740 break;
45741 case DImode:
45742 gen_neg = gen_negdi2;
45743 break;
45744 default:
45745 gcc_unreachable ();
45748 e1 = gen_reg_rtx (inmode);
45749 e2 = gen_reg_rtx (inmode);
45750 res = gen_reg_rtx (outmode);
45752 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
45754 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
45756 /* scratch = fxam(op1) */
45757 emit_insn (gen_rtx_SET (scratch,
45758 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
45759 UNSPEC_FXAM)));
45760 /* e1 = fabs(op1) */
45761 emit_insn (gen_abs (e1, op1));
45763 /* e2 = e1 + 0.5 */
45764 half = force_reg (inmode, half);
45765 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (inmode, e1, half)));
45767 /* res = floor(e2) */
45768 if (inmode != XFmode)
45770 tmp1 = gen_reg_rtx (XFmode);
45772 emit_insn (gen_rtx_SET (tmp1, gen_rtx_FLOAT_EXTEND (XFmode, e2)));
45774 else
45775 tmp1 = e2;
45777 switch (outmode)
45779 case SFmode:
45780 case DFmode:
45782 rtx tmp0 = gen_reg_rtx (XFmode);
45784 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
45786 emit_insn (gen_rtx_SET (res,
45787 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
45788 UNSPEC_TRUNC_NOOP)));
45790 break;
45791 case XFmode:
45792 emit_insn (gen_frndintxf2_floor (res, tmp1));
45793 break;
45794 case HImode:
45795 emit_insn (gen_lfloorxfhi2 (res, tmp1));
45796 break;
45797 case SImode:
45798 emit_insn (gen_lfloorxfsi2 (res, tmp1));
45799 break;
45800 case DImode:
45801 emit_insn (gen_lfloorxfdi2 (res, tmp1));
45802 break;
45803 default:
45804 gcc_unreachable ();
45807 /* flags = signbit(a) */
45808 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
45810 /* if (flags) then res = -res */
45811 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
45812 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
45813 gen_rtx_LABEL_REF (VOIDmode, jump_label),
45814 pc_rtx);
45815 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
45816 predict_jump (REG_BR_PROB_BASE * 50 / 100);
45817 JUMP_LABEL (insn) = jump_label;
45819 emit_insn (gen_neg (res, res));
45821 emit_label (jump_label);
45822 LABEL_NUSES (jump_label) = 1;
45824 emit_move_insn (op0, res);
45827 /* Output code to perform a Newton-Rhapson approximation of a single precision
45828 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
45830 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
45832 rtx x0, x1, e0, e1;
45834 x0 = gen_reg_rtx (mode);
45835 e0 = gen_reg_rtx (mode);
45836 e1 = gen_reg_rtx (mode);
45837 x1 = gen_reg_rtx (mode);
45839 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
45841 b = force_reg (mode, b);
45843 /* x0 = rcp(b) estimate */
45844 if (mode == V16SFmode || mode == V8DFmode)
45845 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45846 UNSPEC_RCP14)));
45847 else
45848 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45849 UNSPEC_RCP)));
45851 /* e0 = x0 * b */
45852 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
45854 /* e0 = x0 * e0 */
45855 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
45857 /* e1 = x0 + x0 */
45858 emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
45860 /* x1 = e1 - e0 */
45861 emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
45863 /* res = a * x1 */
45864 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
45867 /* Output code to perform a Newton-Rhapson approximation of a
45868 single precision floating point [reciprocal] square root. */
45870 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode,
45871 bool recip)
45873 rtx x0, e0, e1, e2, e3, mthree, mhalf;
45874 REAL_VALUE_TYPE r;
45875 int unspec;
45877 x0 = gen_reg_rtx (mode);
45878 e0 = gen_reg_rtx (mode);
45879 e1 = gen_reg_rtx (mode);
45880 e2 = gen_reg_rtx (mode);
45881 e3 = gen_reg_rtx (mode);
45883 real_from_integer (&r, VOIDmode, -3, SIGNED);
45884 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45886 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
45887 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45888 unspec = UNSPEC_RSQRT;
45890 if (VECTOR_MODE_P (mode))
45892 mthree = ix86_build_const_vector (mode, true, mthree);
45893 mhalf = ix86_build_const_vector (mode, true, mhalf);
45894 /* There is no 512-bit rsqrt. There is however rsqrt14. */
45895 if (GET_MODE_SIZE (mode) == 64)
45896 unspec = UNSPEC_RSQRT14;
45899 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
45900 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
45902 a = force_reg (mode, a);
45904 /* x0 = rsqrt(a) estimate */
45905 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
45906 unspec)));
45908 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
45909 if (!recip)
45911 rtx zero, mask;
45913 zero = gen_reg_rtx (mode);
45914 mask = gen_reg_rtx (mode);
45916 zero = force_reg (mode, CONST0_RTX(mode));
45918 /* Handle masked compare. */
45919 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
45921 mask = gen_reg_rtx (HImode);
45922 /* Imm value 0x4 corresponds to not-equal comparison. */
45923 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
45924 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
45926 else
45928 emit_insn (gen_rtx_SET (mask, gen_rtx_NE (mode, zero, a)));
45930 emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, mask)));
45934 /* e0 = x0 * a */
45935 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
45936 /* e1 = e0 * x0 */
45937 emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
45939 /* e2 = e1 - 3. */
45940 mthree = force_reg (mode, mthree);
45941 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
45943 mhalf = force_reg (mode, mhalf);
45944 if (recip)
45945 /* e3 = -.5 * x0 */
45946 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, x0, mhalf)));
45947 else
45948 /* e3 = -.5 * e0 */
45949 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, e0, mhalf)));
45950 /* ret = e2 * e3 */
45951 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e3)));
45954 #ifdef TARGET_SOLARIS
45955 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
45957 static void
45958 i386_solaris_elf_named_section (const char *name, unsigned int flags,
45959 tree decl)
45961 /* With Binutils 2.15, the "@unwind" marker must be specified on
45962 every occurrence of the ".eh_frame" section, not just the first
45963 one. */
45964 if (TARGET_64BIT
45965 && strcmp (name, ".eh_frame") == 0)
45967 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
45968 flags & SECTION_WRITE ? "aw" : "a");
45969 return;
45972 #ifndef USE_GAS
45973 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
45975 solaris_elf_asm_comdat_section (name, flags, decl);
45976 return;
45978 #endif
45980 default_elf_asm_named_section (name, flags, decl);
45982 #endif /* TARGET_SOLARIS */
45984 /* Return the mangling of TYPE if it is an extended fundamental type. */
45986 static const char *
45987 ix86_mangle_type (const_tree type)
45989 type = TYPE_MAIN_VARIANT (type);
45991 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
45992 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
45993 return NULL;
45995 switch (TYPE_MODE (type))
45997 case TFmode:
45998 /* __float128 is "g". */
45999 return "g";
46000 case XFmode:
46001 /* "long double" or __float80 is "e". */
46002 return "e";
46003 default:
46004 return NULL;
46008 /* For 32-bit code we can save PIC register setup by using
46009 __stack_chk_fail_local hidden function instead of calling
46010 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
46011 register, so it is better to call __stack_chk_fail directly. */
46013 static tree ATTRIBUTE_UNUSED
46014 ix86_stack_protect_fail (void)
46016 return TARGET_64BIT
46017 ? default_external_stack_protect_fail ()
46018 : default_hidden_stack_protect_fail ();
46021 /* Select a format to encode pointers in exception handling data. CODE
46022 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
46023 true if the symbol may be affected by dynamic relocations.
46025 ??? All x86 object file formats are capable of representing this.
46026 After all, the relocation needed is the same as for the call insn.
46027 Whether or not a particular assembler allows us to enter such, I
46028 guess we'll have to see. */
46030 asm_preferred_eh_data_format (int code, int global)
46032 if (flag_pic)
46034 int type = DW_EH_PE_sdata8;
46035 if (!TARGET_64BIT
46036 || ix86_cmodel == CM_SMALL_PIC
46037 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
46038 type = DW_EH_PE_sdata4;
46039 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
46041 if (ix86_cmodel == CM_SMALL
46042 || (ix86_cmodel == CM_MEDIUM && code))
46043 return DW_EH_PE_udata4;
46044 return DW_EH_PE_absptr;
46047 /* Expand copysign from SIGN to the positive value ABS_VALUE
46048 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
46049 the sign-bit. */
46050 static void
46051 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
46053 machine_mode mode = GET_MODE (sign);
46054 rtx sgn = gen_reg_rtx (mode);
46055 if (mask == NULL_RTX)
46057 machine_mode vmode;
46059 if (mode == SFmode)
46060 vmode = V4SFmode;
46061 else if (mode == DFmode)
46062 vmode = V2DFmode;
46063 else
46064 vmode = mode;
46066 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
46067 if (!VECTOR_MODE_P (mode))
46069 /* We need to generate a scalar mode mask in this case. */
46070 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
46071 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
46072 mask = gen_reg_rtx (mode);
46073 emit_insn (gen_rtx_SET (mask, tmp));
46076 else
46077 mask = gen_rtx_NOT (mode, mask);
46078 emit_insn (gen_rtx_SET (sgn, gen_rtx_AND (mode, mask, sign)));
46079 emit_insn (gen_rtx_SET (result, gen_rtx_IOR (mode, abs_value, sgn)));
46082 /* Expand fabs (OP0) and return a new rtx that holds the result. The
46083 mask for masking out the sign-bit is stored in *SMASK, if that is
46084 non-null. */
46085 static rtx
46086 ix86_expand_sse_fabs (rtx op0, rtx *smask)
46088 machine_mode vmode, mode = GET_MODE (op0);
46089 rtx xa, mask;
46091 xa = gen_reg_rtx (mode);
46092 if (mode == SFmode)
46093 vmode = V4SFmode;
46094 else if (mode == DFmode)
46095 vmode = V2DFmode;
46096 else
46097 vmode = mode;
46098 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
46099 if (!VECTOR_MODE_P (mode))
46101 /* We need to generate a scalar mode mask in this case. */
46102 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
46103 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
46104 mask = gen_reg_rtx (mode);
46105 emit_insn (gen_rtx_SET (mask, tmp));
46107 emit_insn (gen_rtx_SET (xa, gen_rtx_AND (mode, op0, mask)));
46109 if (smask)
46110 *smask = mask;
46112 return xa;
46115 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
46116 swapping the operands if SWAP_OPERANDS is true. The expanded
46117 code is a forward jump to a newly created label in case the
46118 comparison is true. The generated label rtx is returned. */
46119 static rtx_code_label *
46120 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
46121 bool swap_operands)
46123 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
46124 rtx_code_label *label;
46125 rtx tmp;
46127 if (swap_operands)
46128 std::swap (op0, op1);
46130 label = gen_label_rtx ();
46131 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
46132 emit_insn (gen_rtx_SET (tmp, gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
46133 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
46134 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
46135 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
46136 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
46137 JUMP_LABEL (tmp) = label;
46139 return label;
46142 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
46143 using comparison code CODE. Operands are swapped for the comparison if
46144 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
46145 static rtx
46146 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
46147 bool swap_operands)
46149 rtx (*insn)(rtx, rtx, rtx, rtx);
46150 machine_mode mode = GET_MODE (op0);
46151 rtx mask = gen_reg_rtx (mode);
46153 if (swap_operands)
46154 std::swap (op0, op1);
46156 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
46158 emit_insn (insn (mask, op0, op1,
46159 gen_rtx_fmt_ee (code, mode, op0, op1)));
46160 return mask;
46163 /* Generate and return a rtx of mode MODE for 2**n where n is the number
46164 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
46165 static rtx
46166 ix86_gen_TWO52 (machine_mode mode)
46168 REAL_VALUE_TYPE TWO52r;
46169 rtx TWO52;
46171 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
46172 TWO52 = const_double_from_real_value (TWO52r, mode);
46173 TWO52 = force_reg (mode, TWO52);
46175 return TWO52;
46178 /* Expand SSE sequence for computing lround from OP1 storing
46179 into OP0. */
46180 void
46181 ix86_expand_lround (rtx op0, rtx op1)
46183 /* C code for the stuff we're doing below:
46184 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
46185 return (long)tmp;
46187 machine_mode mode = GET_MODE (op1);
46188 const struct real_format *fmt;
46189 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46190 rtx adj;
46192 /* load nextafter (0.5, 0.0) */
46193 fmt = REAL_MODE_FORMAT (mode);
46194 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46195 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46197 /* adj = copysign (0.5, op1) */
46198 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
46199 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
46201 /* adj = op1 + adj */
46202 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
46204 /* op0 = (imode)adj */
46205 expand_fix (op0, adj, 0);
46208 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
46209 into OPERAND0. */
46210 void
46211 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
46213 /* C code for the stuff we're doing below (for do_floor):
46214 xi = (long)op1;
46215 xi -= (double)xi > op1 ? 1 : 0;
46216 return xi;
46218 machine_mode fmode = GET_MODE (op1);
46219 machine_mode imode = GET_MODE (op0);
46220 rtx ireg, freg, tmp;
46221 rtx_code_label *label;
46223 /* reg = (long)op1 */
46224 ireg = gen_reg_rtx (imode);
46225 expand_fix (ireg, op1, 0);
46227 /* freg = (double)reg */
46228 freg = gen_reg_rtx (fmode);
46229 expand_float (freg, ireg, 0);
46231 /* ireg = (freg > op1) ? ireg - 1 : ireg */
46232 label = ix86_expand_sse_compare_and_jump (UNLE,
46233 freg, op1, !do_floor);
46234 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
46235 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
46236 emit_move_insn (ireg, tmp);
46238 emit_label (label);
46239 LABEL_NUSES (label) = 1;
46241 emit_move_insn (op0, ireg);
46244 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
46245 result in OPERAND0. */
46246 void
46247 ix86_expand_rint (rtx operand0, rtx operand1)
46249 /* C code for the stuff we're doing below:
46250 xa = fabs (operand1);
46251 if (!isless (xa, 2**52))
46252 return operand1;
46253 xa = xa + 2**52 - 2**52;
46254 return copysign (xa, operand1);
46256 machine_mode mode = GET_MODE (operand0);
46257 rtx res, xa, TWO52, mask;
46258 rtx_code_label *label;
46260 res = gen_reg_rtx (mode);
46261 emit_move_insn (res, operand1);
46263 /* xa = abs (operand1) */
46264 xa = ix86_expand_sse_fabs (res, &mask);
46266 /* if (!isless (xa, TWO52)) goto label; */
46267 TWO52 = ix86_gen_TWO52 (mode);
46268 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46270 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46271 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46273 ix86_sse_copysign_to_positive (res, xa, res, mask);
46275 emit_label (label);
46276 LABEL_NUSES (label) = 1;
46278 emit_move_insn (operand0, res);
46281 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46282 into OPERAND0. */
46283 void
46284 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
46286 /* C code for the stuff we expand below.
46287 double xa = fabs (x), x2;
46288 if (!isless (xa, TWO52))
46289 return x;
46290 xa = xa + TWO52 - TWO52;
46291 x2 = copysign (xa, x);
46292 Compensate. Floor:
46293 if (x2 > x)
46294 x2 -= 1;
46295 Compensate. Ceil:
46296 if (x2 < x)
46297 x2 -= -1;
46298 return x2;
46300 machine_mode mode = GET_MODE (operand0);
46301 rtx xa, TWO52, tmp, one, res, mask;
46302 rtx_code_label *label;
46304 TWO52 = ix86_gen_TWO52 (mode);
46306 /* Temporary for holding the result, initialized to the input
46307 operand to ease control flow. */
46308 res = gen_reg_rtx (mode);
46309 emit_move_insn (res, operand1);
46311 /* xa = abs (operand1) */
46312 xa = ix86_expand_sse_fabs (res, &mask);
46314 /* if (!isless (xa, TWO52)) goto label; */
46315 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46317 /* xa = xa + TWO52 - TWO52; */
46318 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46319 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46321 /* xa = copysign (xa, operand1) */
46322 ix86_sse_copysign_to_positive (xa, xa, res, mask);
46324 /* generate 1.0 or -1.0 */
46325 one = force_reg (mode,
46326 const_double_from_real_value (do_floor
46327 ? dconst1 : dconstm1, mode));
46329 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46330 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46331 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46332 /* We always need to subtract here to preserve signed zero. */
46333 tmp = expand_simple_binop (mode, MINUS,
46334 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46335 emit_move_insn (res, tmp);
46337 emit_label (label);
46338 LABEL_NUSES (label) = 1;
46340 emit_move_insn (operand0, res);
46343 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46344 into OPERAND0. */
46345 void
46346 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
46348 /* C code for the stuff we expand below.
46349 double xa = fabs (x), x2;
46350 if (!isless (xa, TWO52))
46351 return x;
46352 x2 = (double)(long)x;
46353 Compensate. Floor:
46354 if (x2 > x)
46355 x2 -= 1;
46356 Compensate. Ceil:
46357 if (x2 < x)
46358 x2 += 1;
46359 if (HONOR_SIGNED_ZEROS (mode))
46360 return copysign (x2, x);
46361 return x2;
46363 machine_mode mode = GET_MODE (operand0);
46364 rtx xa, xi, TWO52, tmp, one, res, mask;
46365 rtx_code_label *label;
46367 TWO52 = ix86_gen_TWO52 (mode);
46369 /* Temporary for holding the result, initialized to the input
46370 operand to ease control flow. */
46371 res = gen_reg_rtx (mode);
46372 emit_move_insn (res, operand1);
46374 /* xa = abs (operand1) */
46375 xa = ix86_expand_sse_fabs (res, &mask);
46377 /* if (!isless (xa, TWO52)) goto label; */
46378 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46380 /* xa = (double)(long)x */
46381 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46382 expand_fix (xi, res, 0);
46383 expand_float (xa, xi, 0);
46385 /* generate 1.0 */
46386 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46388 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46389 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46390 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46391 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
46392 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46393 emit_move_insn (res, tmp);
46395 if (HONOR_SIGNED_ZEROS (mode))
46396 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46398 emit_label (label);
46399 LABEL_NUSES (label) = 1;
46401 emit_move_insn (operand0, res);
46404 /* Expand SSE sequence for computing round from OPERAND1 storing
46405 into OPERAND0. Sequence that works without relying on DImode truncation
46406 via cvttsd2siq that is only available on 64bit targets. */
46407 void
46408 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
46410 /* C code for the stuff we expand below.
46411 double xa = fabs (x), xa2, x2;
46412 if (!isless (xa, TWO52))
46413 return x;
46414 Using the absolute value and copying back sign makes
46415 -0.0 -> -0.0 correct.
46416 xa2 = xa + TWO52 - TWO52;
46417 Compensate.
46418 dxa = xa2 - xa;
46419 if (dxa <= -0.5)
46420 xa2 += 1;
46421 else if (dxa > 0.5)
46422 xa2 -= 1;
46423 x2 = copysign (xa2, x);
46424 return x2;
46426 machine_mode mode = GET_MODE (operand0);
46427 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
46428 rtx_code_label *label;
46430 TWO52 = ix86_gen_TWO52 (mode);
46432 /* Temporary for holding the result, initialized to the input
46433 operand to ease control flow. */
46434 res = gen_reg_rtx (mode);
46435 emit_move_insn (res, operand1);
46437 /* xa = abs (operand1) */
46438 xa = ix86_expand_sse_fabs (res, &mask);
46440 /* if (!isless (xa, TWO52)) goto label; */
46441 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46443 /* xa2 = xa + TWO52 - TWO52; */
46444 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46445 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
46447 /* dxa = xa2 - xa; */
46448 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
46450 /* generate 0.5, 1.0 and -0.5 */
46451 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
46452 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
46453 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
46454 0, OPTAB_DIRECT);
46456 /* Compensate. */
46457 tmp = gen_reg_rtx (mode);
46458 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
46459 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
46460 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46461 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46462 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
46463 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
46464 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46465 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46467 /* res = copysign (xa2, operand1) */
46468 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
46470 emit_label (label);
46471 LABEL_NUSES (label) = 1;
46473 emit_move_insn (operand0, res);
46476 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46477 into OPERAND0. */
46478 void
46479 ix86_expand_trunc (rtx operand0, rtx operand1)
46481 /* C code for SSE variant we expand below.
46482 double xa = fabs (x), x2;
46483 if (!isless (xa, TWO52))
46484 return x;
46485 x2 = (double)(long)x;
46486 if (HONOR_SIGNED_ZEROS (mode))
46487 return copysign (x2, x);
46488 return x2;
46490 machine_mode mode = GET_MODE (operand0);
46491 rtx xa, xi, TWO52, res, mask;
46492 rtx_code_label *label;
46494 TWO52 = ix86_gen_TWO52 (mode);
46496 /* Temporary for holding the result, initialized to the input
46497 operand to ease control flow. */
46498 res = gen_reg_rtx (mode);
46499 emit_move_insn (res, operand1);
46501 /* xa = abs (operand1) */
46502 xa = ix86_expand_sse_fabs (res, &mask);
46504 /* if (!isless (xa, TWO52)) goto label; */
46505 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46507 /* x = (double)(long)x */
46508 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46509 expand_fix (xi, res, 0);
46510 expand_float (res, xi, 0);
46512 if (HONOR_SIGNED_ZEROS (mode))
46513 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46515 emit_label (label);
46516 LABEL_NUSES (label) = 1;
46518 emit_move_insn (operand0, res);
46521 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46522 into OPERAND0. */
46523 void
46524 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
46526 machine_mode mode = GET_MODE (operand0);
46527 rtx xa, mask, TWO52, one, res, smask, tmp;
46528 rtx_code_label *label;
46530 /* C code for SSE variant we expand below.
46531 double xa = fabs (x), x2;
46532 if (!isless (xa, TWO52))
46533 return x;
46534 xa2 = xa + TWO52 - TWO52;
46535 Compensate:
46536 if (xa2 > xa)
46537 xa2 -= 1.0;
46538 x2 = copysign (xa2, x);
46539 return x2;
46542 TWO52 = ix86_gen_TWO52 (mode);
46544 /* Temporary for holding the result, initialized to the input
46545 operand to ease control flow. */
46546 res = gen_reg_rtx (mode);
46547 emit_move_insn (res, operand1);
46549 /* xa = abs (operand1) */
46550 xa = ix86_expand_sse_fabs (res, &smask);
46552 /* if (!isless (xa, TWO52)) goto label; */
46553 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46555 /* res = xa + TWO52 - TWO52; */
46556 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46557 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
46558 emit_move_insn (res, tmp);
46560 /* generate 1.0 */
46561 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46563 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
46564 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
46565 emit_insn (gen_rtx_SET (mask, gen_rtx_AND (mode, mask, one)));
46566 tmp = expand_simple_binop (mode, MINUS,
46567 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
46568 emit_move_insn (res, tmp);
46570 /* res = copysign (res, operand1) */
46571 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
46573 emit_label (label);
46574 LABEL_NUSES (label) = 1;
46576 emit_move_insn (operand0, res);
46579 /* Expand SSE sequence for computing round from OPERAND1 storing
46580 into OPERAND0. */
46581 void
46582 ix86_expand_round (rtx operand0, rtx operand1)
46584 /* C code for the stuff we're doing below:
46585 double xa = fabs (x);
46586 if (!isless (xa, TWO52))
46587 return x;
46588 xa = (double)(long)(xa + nextafter (0.5, 0.0));
46589 return copysign (xa, x);
46591 machine_mode mode = GET_MODE (operand0);
46592 rtx res, TWO52, xa, xi, half, mask;
46593 rtx_code_label *label;
46594 const struct real_format *fmt;
46595 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46597 /* Temporary for holding the result, initialized to the input
46598 operand to ease control flow. */
46599 res = gen_reg_rtx (mode);
46600 emit_move_insn (res, operand1);
46602 TWO52 = ix86_gen_TWO52 (mode);
46603 xa = ix86_expand_sse_fabs (res, &mask);
46604 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46606 /* load nextafter (0.5, 0.0) */
46607 fmt = REAL_MODE_FORMAT (mode);
46608 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46609 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46611 /* xa = xa + 0.5 */
46612 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
46613 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
46615 /* xa = (double)(int64_t)xa */
46616 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46617 expand_fix (xi, xa, 0);
46618 expand_float (xa, xi, 0);
46620 /* res = copysign (xa, operand1) */
46621 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
46623 emit_label (label);
46624 LABEL_NUSES (label) = 1;
46626 emit_move_insn (operand0, res);
46629 /* Expand SSE sequence for computing round
46630 from OP1 storing into OP0 using sse4 round insn. */
46631 void
46632 ix86_expand_round_sse4 (rtx op0, rtx op1)
46634 machine_mode mode = GET_MODE (op0);
46635 rtx e1, e2, res, half;
46636 const struct real_format *fmt;
46637 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46638 rtx (*gen_copysign) (rtx, rtx, rtx);
46639 rtx (*gen_round) (rtx, rtx, rtx);
46641 switch (mode)
46643 case SFmode:
46644 gen_copysign = gen_copysignsf3;
46645 gen_round = gen_sse4_1_roundsf2;
46646 break;
46647 case DFmode:
46648 gen_copysign = gen_copysigndf3;
46649 gen_round = gen_sse4_1_rounddf2;
46650 break;
46651 default:
46652 gcc_unreachable ();
46655 /* round (a) = trunc (a + copysign (0.5, a)) */
46657 /* load nextafter (0.5, 0.0) */
46658 fmt = REAL_MODE_FORMAT (mode);
46659 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46660 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46661 half = const_double_from_real_value (pred_half, mode);
46663 /* e1 = copysign (0.5, op1) */
46664 e1 = gen_reg_rtx (mode);
46665 emit_insn (gen_copysign (e1, half, op1));
46667 /* e2 = op1 + e1 */
46668 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
46670 /* res = trunc (e2) */
46671 res = gen_reg_rtx (mode);
46672 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
46674 emit_move_insn (op0, res);
46678 /* Table of valid machine attributes. */
46679 static const struct attribute_spec ix86_attribute_table[] =
46681 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
46682 affects_type_identity } */
46683 /* Stdcall attribute says callee is responsible for popping arguments
46684 if they are not variable. */
46685 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46686 true },
46687 /* Fastcall attribute says callee is responsible for popping arguments
46688 if they are not variable. */
46689 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46690 true },
46691 /* Thiscall attribute says callee is responsible for popping arguments
46692 if they are not variable. */
46693 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46694 true },
46695 /* Cdecl attribute says the callee is a normal C declaration */
46696 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46697 true },
46698 /* Regparm attribute specifies how many integer arguments are to be
46699 passed in registers. */
46700 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
46701 true },
46702 /* Sseregparm attribute says we are using x86_64 calling conventions
46703 for FP arguments. */
46704 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46705 true },
46706 /* The transactional memory builtins are implicitly regparm or fastcall
46707 depending on the ABI. Override the generic do-nothing attribute that
46708 these builtins were declared with. */
46709 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
46710 true },
46711 /* force_align_arg_pointer says this function realigns the stack at entry. */
46712 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
46713 false, true, true, ix86_handle_cconv_attribute, false },
46714 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46715 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
46716 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
46717 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
46718 false },
46719 #endif
46720 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46721 false },
46722 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46723 false },
46724 #ifdef SUBTARGET_ATTRIBUTE_TABLE
46725 SUBTARGET_ATTRIBUTE_TABLE,
46726 #endif
46727 /* ms_abi and sysv_abi calling convention function attributes. */
46728 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46729 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46730 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
46731 false },
46732 { "callee_pop_aggregate_return", 1, 1, false, true, true,
46733 ix86_handle_callee_pop_aggregate_return, true },
46734 /* End element. */
46735 { NULL, 0, 0, false, false, false, NULL, false }
46738 /* Implement targetm.vectorize.builtin_vectorization_cost. */
46739 static int
46740 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
46741 tree vectype, int)
46743 unsigned elements;
46745 switch (type_of_cost)
46747 case scalar_stmt:
46748 return ix86_cost->scalar_stmt_cost;
46750 case scalar_load:
46751 return ix86_cost->scalar_load_cost;
46753 case scalar_store:
46754 return ix86_cost->scalar_store_cost;
46756 case vector_stmt:
46757 return ix86_cost->vec_stmt_cost;
46759 case vector_load:
46760 return ix86_cost->vec_align_load_cost;
46762 case vector_store:
46763 return ix86_cost->vec_store_cost;
46765 case vec_to_scalar:
46766 return ix86_cost->vec_to_scalar_cost;
46768 case scalar_to_vec:
46769 return ix86_cost->scalar_to_vec_cost;
46771 case unaligned_load:
46772 case unaligned_store:
46773 return ix86_cost->vec_unalign_load_cost;
46775 case cond_branch_taken:
46776 return ix86_cost->cond_taken_branch_cost;
46778 case cond_branch_not_taken:
46779 return ix86_cost->cond_not_taken_branch_cost;
46781 case vec_perm:
46782 case vec_promote_demote:
46783 return ix86_cost->vec_stmt_cost;
46785 case vec_construct:
46786 elements = TYPE_VECTOR_SUBPARTS (vectype);
46787 return ix86_cost->vec_stmt_cost * (elements / 2 + 1);
46789 default:
46790 gcc_unreachable ();
46794 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
46795 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
46796 insn every time. */
46798 static GTY(()) rtx_insn *vselect_insn;
46800 /* Initialize vselect_insn. */
46802 static void
46803 init_vselect_insn (void)
46805 unsigned i;
46806 rtx x;
46808 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
46809 for (i = 0; i < MAX_VECT_LEN; ++i)
46810 XVECEXP (x, 0, i) = const0_rtx;
46811 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
46812 const0_rtx), x);
46813 x = gen_rtx_SET (const0_rtx, x);
46814 start_sequence ();
46815 vselect_insn = emit_insn (x);
46816 end_sequence ();
46819 /* Construct (set target (vec_select op0 (parallel perm))) and
46820 return true if that's a valid instruction in the active ISA. */
46822 static bool
46823 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
46824 unsigned nelt, bool testing_p)
46826 unsigned int i;
46827 rtx x, save_vconcat;
46828 int icode;
46830 if (vselect_insn == NULL_RTX)
46831 init_vselect_insn ();
46833 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
46834 PUT_NUM_ELEM (XVEC (x, 0), nelt);
46835 for (i = 0; i < nelt; ++i)
46836 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
46837 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46838 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
46839 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
46840 SET_DEST (PATTERN (vselect_insn)) = target;
46841 icode = recog_memoized (vselect_insn);
46843 if (icode >= 0 && !testing_p)
46844 emit_insn (copy_rtx (PATTERN (vselect_insn)));
46846 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
46847 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
46848 INSN_CODE (vselect_insn) = -1;
46850 return icode >= 0;
46853 /* Similar, but generate a vec_concat from op0 and op1 as well. */
46855 static bool
46856 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
46857 const unsigned char *perm, unsigned nelt,
46858 bool testing_p)
46860 machine_mode v2mode;
46861 rtx x;
46862 bool ok;
46864 if (vselect_insn == NULL_RTX)
46865 init_vselect_insn ();
46867 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
46868 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46869 PUT_MODE (x, v2mode);
46870 XEXP (x, 0) = op0;
46871 XEXP (x, 1) = op1;
46872 ok = expand_vselect (target, x, perm, nelt, testing_p);
46873 XEXP (x, 0) = const0_rtx;
46874 XEXP (x, 1) = const0_rtx;
46875 return ok;
46878 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46879 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
46881 static bool
46882 expand_vec_perm_blend (struct expand_vec_perm_d *d)
46884 machine_mode mmode, vmode = d->vmode;
46885 unsigned i, mask, nelt = d->nelt;
46886 rtx target, op0, op1, maskop, x;
46887 rtx rperm[32], vperm;
46889 if (d->one_operand_p)
46890 return false;
46891 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
46892 && (TARGET_AVX512BW
46893 || GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4))
46895 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
46897 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
46899 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
46901 else
46902 return false;
46904 /* This is a blend, not a permute. Elements must stay in their
46905 respective lanes. */
46906 for (i = 0; i < nelt; ++i)
46908 unsigned e = d->perm[i];
46909 if (!(e == i || e == i + nelt))
46910 return false;
46913 if (d->testing_p)
46914 return true;
46916 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
46917 decision should be extracted elsewhere, so that we only try that
46918 sequence once all budget==3 options have been tried. */
46919 target = d->target;
46920 op0 = d->op0;
46921 op1 = d->op1;
46922 mask = 0;
46924 switch (vmode)
46926 case V8DFmode:
46927 case V16SFmode:
46928 case V4DFmode:
46929 case V8SFmode:
46930 case V2DFmode:
46931 case V4SFmode:
46932 case V8HImode:
46933 case V8SImode:
46934 case V32HImode:
46935 case V64QImode:
46936 case V16SImode:
46937 case V8DImode:
46938 for (i = 0; i < nelt; ++i)
46939 mask |= (d->perm[i] >= nelt) << i;
46940 break;
46942 case V2DImode:
46943 for (i = 0; i < 2; ++i)
46944 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
46945 vmode = V8HImode;
46946 goto do_subreg;
46948 case V4SImode:
46949 for (i = 0; i < 4; ++i)
46950 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46951 vmode = V8HImode;
46952 goto do_subreg;
46954 case V16QImode:
46955 /* See if bytes move in pairs so we can use pblendw with
46956 an immediate argument, rather than pblendvb with a vector
46957 argument. */
46958 for (i = 0; i < 16; i += 2)
46959 if (d->perm[i] + 1 != d->perm[i + 1])
46961 use_pblendvb:
46962 for (i = 0; i < nelt; ++i)
46963 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
46965 finish_pblendvb:
46966 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
46967 vperm = force_reg (vmode, vperm);
46969 if (GET_MODE_SIZE (vmode) == 16)
46970 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
46971 else
46972 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
46973 if (target != d->target)
46974 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46975 return true;
46978 for (i = 0; i < 8; ++i)
46979 mask |= (d->perm[i * 2] >= 16) << i;
46980 vmode = V8HImode;
46981 /* FALLTHRU */
46983 do_subreg:
46984 target = gen_reg_rtx (vmode);
46985 op0 = gen_lowpart (vmode, op0);
46986 op1 = gen_lowpart (vmode, op1);
46987 break;
46989 case V32QImode:
46990 /* See if bytes move in pairs. If not, vpblendvb must be used. */
46991 for (i = 0; i < 32; i += 2)
46992 if (d->perm[i] + 1 != d->perm[i + 1])
46993 goto use_pblendvb;
46994 /* See if bytes move in quadruplets. If yes, vpblendd
46995 with immediate can be used. */
46996 for (i = 0; i < 32; i += 4)
46997 if (d->perm[i] + 2 != d->perm[i + 2])
46998 break;
46999 if (i < 32)
47001 /* See if bytes move the same in both lanes. If yes,
47002 vpblendw with immediate can be used. */
47003 for (i = 0; i < 16; i += 2)
47004 if (d->perm[i] + 16 != d->perm[i + 16])
47005 goto use_pblendvb;
47007 /* Use vpblendw. */
47008 for (i = 0; i < 16; ++i)
47009 mask |= (d->perm[i * 2] >= 32) << i;
47010 vmode = V16HImode;
47011 goto do_subreg;
47014 /* Use vpblendd. */
47015 for (i = 0; i < 8; ++i)
47016 mask |= (d->perm[i * 4] >= 32) << i;
47017 vmode = V8SImode;
47018 goto do_subreg;
47020 case V16HImode:
47021 /* See if words move in pairs. If yes, vpblendd can be used. */
47022 for (i = 0; i < 16; i += 2)
47023 if (d->perm[i] + 1 != d->perm[i + 1])
47024 break;
47025 if (i < 16)
47027 /* See if words move the same in both lanes. If not,
47028 vpblendvb must be used. */
47029 for (i = 0; i < 8; i++)
47030 if (d->perm[i] + 8 != d->perm[i + 8])
47032 /* Use vpblendvb. */
47033 for (i = 0; i < 32; ++i)
47034 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
47036 vmode = V32QImode;
47037 nelt = 32;
47038 target = gen_reg_rtx (vmode);
47039 op0 = gen_lowpart (vmode, op0);
47040 op1 = gen_lowpart (vmode, op1);
47041 goto finish_pblendvb;
47044 /* Use vpblendw. */
47045 for (i = 0; i < 16; ++i)
47046 mask |= (d->perm[i] >= 16) << i;
47047 break;
47050 /* Use vpblendd. */
47051 for (i = 0; i < 8; ++i)
47052 mask |= (d->perm[i * 2] >= 16) << i;
47053 vmode = V8SImode;
47054 goto do_subreg;
47056 case V4DImode:
47057 /* Use vpblendd. */
47058 for (i = 0; i < 4; ++i)
47059 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
47060 vmode = V8SImode;
47061 goto do_subreg;
47063 default:
47064 gcc_unreachable ();
47067 switch (vmode)
47069 case V8DFmode:
47070 case V8DImode:
47071 mmode = QImode;
47072 break;
47073 case V16SFmode:
47074 case V16SImode:
47075 mmode = HImode;
47076 break;
47077 case V32HImode:
47078 mmode = SImode;
47079 break;
47080 case V64QImode:
47081 mmode = DImode;
47082 break;
47083 default:
47084 mmode = VOIDmode;
47087 if (mmode != VOIDmode)
47088 maskop = force_reg (mmode, gen_int_mode (mask, mmode));
47089 else
47090 maskop = GEN_INT (mask);
47092 /* This matches five different patterns with the different modes. */
47093 x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop);
47094 x = gen_rtx_SET (target, x);
47095 emit_insn (x);
47096 if (target != d->target)
47097 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47099 return true;
47102 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47103 in terms of the variable form of vpermilps.
47105 Note that we will have already failed the immediate input vpermilps,
47106 which requires that the high and low part shuffle be identical; the
47107 variable form doesn't require that. */
47109 static bool
47110 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
47112 rtx rperm[8], vperm;
47113 unsigned i;
47115 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
47116 return false;
47118 /* We can only permute within the 128-bit lane. */
47119 for (i = 0; i < 8; ++i)
47121 unsigned e = d->perm[i];
47122 if (i < 4 ? e >= 4 : e < 4)
47123 return false;
47126 if (d->testing_p)
47127 return true;
47129 for (i = 0; i < 8; ++i)
47131 unsigned e = d->perm[i];
47133 /* Within each 128-bit lane, the elements of op0 are numbered
47134 from 0 and the elements of op1 are numbered from 4. */
47135 if (e >= 8 + 4)
47136 e -= 8;
47137 else if (e >= 4)
47138 e -= 4;
47140 rperm[i] = GEN_INT (e);
47143 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
47144 vperm = force_reg (V8SImode, vperm);
47145 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
47147 return true;
47150 /* Return true if permutation D can be performed as VMODE permutation
47151 instead. */
47153 static bool
47154 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
47156 unsigned int i, j, chunk;
47158 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
47159 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
47160 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
47161 return false;
47163 if (GET_MODE_NUNITS (vmode) >= d->nelt)
47164 return true;
47166 chunk = d->nelt / GET_MODE_NUNITS (vmode);
47167 for (i = 0; i < d->nelt; i += chunk)
47168 if (d->perm[i] & (chunk - 1))
47169 return false;
47170 else
47171 for (j = 1; j < chunk; ++j)
47172 if (d->perm[i] + j != d->perm[i + j])
47173 return false;
47175 return true;
47178 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47179 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
47181 static bool
47182 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
47184 unsigned i, nelt, eltsz, mask;
47185 unsigned char perm[64];
47186 machine_mode vmode = V16QImode;
47187 rtx rperm[64], vperm, target, op0, op1;
47189 nelt = d->nelt;
47191 if (!d->one_operand_p)
47193 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
47195 if (TARGET_AVX2
47196 && valid_perm_using_mode_p (V2TImode, d))
47198 if (d->testing_p)
47199 return true;
47201 /* Use vperm2i128 insn. The pattern uses
47202 V4DImode instead of V2TImode. */
47203 target = d->target;
47204 if (d->vmode != V4DImode)
47205 target = gen_reg_rtx (V4DImode);
47206 op0 = gen_lowpart (V4DImode, d->op0);
47207 op1 = gen_lowpart (V4DImode, d->op1);
47208 rperm[0]
47209 = GEN_INT ((d->perm[0] / (nelt / 2))
47210 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
47211 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
47212 if (target != d->target)
47213 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47214 return true;
47216 return false;
47219 else
47221 if (GET_MODE_SIZE (d->vmode) == 16)
47223 if (!TARGET_SSSE3)
47224 return false;
47226 else if (GET_MODE_SIZE (d->vmode) == 32)
47228 if (!TARGET_AVX2)
47229 return false;
47231 /* V4DImode should be already handled through
47232 expand_vselect by vpermq instruction. */
47233 gcc_assert (d->vmode != V4DImode);
47235 vmode = V32QImode;
47236 if (d->vmode == V8SImode
47237 || d->vmode == V16HImode
47238 || d->vmode == V32QImode)
47240 /* First see if vpermq can be used for
47241 V8SImode/V16HImode/V32QImode. */
47242 if (valid_perm_using_mode_p (V4DImode, d))
47244 for (i = 0; i < 4; i++)
47245 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
47246 if (d->testing_p)
47247 return true;
47248 target = gen_reg_rtx (V4DImode);
47249 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
47250 perm, 4, false))
47252 emit_move_insn (d->target,
47253 gen_lowpart (d->vmode, target));
47254 return true;
47256 return false;
47259 /* Next see if vpermd can be used. */
47260 if (valid_perm_using_mode_p (V8SImode, d))
47261 vmode = V8SImode;
47263 /* Or if vpermps can be used. */
47264 else if (d->vmode == V8SFmode)
47265 vmode = V8SImode;
47267 if (vmode == V32QImode)
47269 /* vpshufb only works intra lanes, it is not
47270 possible to shuffle bytes in between the lanes. */
47271 for (i = 0; i < nelt; ++i)
47272 if ((d->perm[i] ^ i) & (nelt / 2))
47273 return false;
47276 else if (GET_MODE_SIZE (d->vmode) == 64)
47278 if (!TARGET_AVX512BW)
47279 return false;
47281 /* If vpermq didn't work, vpshufb won't work either. */
47282 if (d->vmode == V8DFmode || d->vmode == V8DImode)
47283 return false;
47285 vmode = V64QImode;
47286 if (d->vmode == V16SImode
47287 || d->vmode == V32HImode
47288 || d->vmode == V64QImode)
47290 /* First see if vpermq can be used for
47291 V16SImode/V32HImode/V64QImode. */
47292 if (valid_perm_using_mode_p (V8DImode, d))
47294 for (i = 0; i < 8; i++)
47295 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
47296 if (d->testing_p)
47297 return true;
47298 target = gen_reg_rtx (V8DImode);
47299 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
47300 perm, 8, false))
47302 emit_move_insn (d->target,
47303 gen_lowpart (d->vmode, target));
47304 return true;
47306 return false;
47309 /* Next see if vpermd can be used. */
47310 if (valid_perm_using_mode_p (V16SImode, d))
47311 vmode = V16SImode;
47313 /* Or if vpermps can be used. */
47314 else if (d->vmode == V16SFmode)
47315 vmode = V16SImode;
47316 if (vmode == V64QImode)
47318 /* vpshufb only works intra lanes, it is not
47319 possible to shuffle bytes in between the lanes. */
47320 for (i = 0; i < nelt; ++i)
47321 if ((d->perm[i] ^ i) & (nelt / 4))
47322 return false;
47325 else
47326 return false;
47329 if (d->testing_p)
47330 return true;
47332 if (vmode == V8SImode)
47333 for (i = 0; i < 8; ++i)
47334 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
47335 else if (vmode == V16SImode)
47336 for (i = 0; i < 16; ++i)
47337 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
47338 else
47340 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47341 if (!d->one_operand_p)
47342 mask = 2 * nelt - 1;
47343 else if (vmode == V16QImode)
47344 mask = nelt - 1;
47345 else if (vmode == V64QImode)
47346 mask = nelt / 4 - 1;
47347 else
47348 mask = nelt / 2 - 1;
47350 for (i = 0; i < nelt; ++i)
47352 unsigned j, e = d->perm[i] & mask;
47353 for (j = 0; j < eltsz; ++j)
47354 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
47358 vperm = gen_rtx_CONST_VECTOR (vmode,
47359 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
47360 vperm = force_reg (vmode, vperm);
47362 target = d->target;
47363 if (d->vmode != vmode)
47364 target = gen_reg_rtx (vmode);
47365 op0 = gen_lowpart (vmode, d->op0);
47366 if (d->one_operand_p)
47368 if (vmode == V16QImode)
47369 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
47370 else if (vmode == V32QImode)
47371 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
47372 else if (vmode == V64QImode)
47373 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
47374 else if (vmode == V8SFmode)
47375 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
47376 else if (vmode == V8SImode)
47377 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
47378 else if (vmode == V16SFmode)
47379 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
47380 else if (vmode == V16SImode)
47381 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
47382 else
47383 gcc_unreachable ();
47385 else
47387 op1 = gen_lowpart (vmode, d->op1);
47388 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
47390 if (target != d->target)
47391 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47393 return true;
47396 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
47397 in a single instruction. */
47399 static bool
47400 expand_vec_perm_1 (struct expand_vec_perm_d *d)
47402 unsigned i, nelt = d->nelt;
47403 unsigned char perm2[MAX_VECT_LEN];
47405 /* Check plain VEC_SELECT first, because AVX has instructions that could
47406 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
47407 input where SEL+CONCAT may not. */
47408 if (d->one_operand_p)
47410 int mask = nelt - 1;
47411 bool identity_perm = true;
47412 bool broadcast_perm = true;
47414 for (i = 0; i < nelt; i++)
47416 perm2[i] = d->perm[i] & mask;
47417 if (perm2[i] != i)
47418 identity_perm = false;
47419 if (perm2[i])
47420 broadcast_perm = false;
47423 if (identity_perm)
47425 if (!d->testing_p)
47426 emit_move_insn (d->target, d->op0);
47427 return true;
47429 else if (broadcast_perm && TARGET_AVX2)
47431 /* Use vpbroadcast{b,w,d}. */
47432 rtx (*gen) (rtx, rtx) = NULL;
47433 switch (d->vmode)
47435 case V64QImode:
47436 if (TARGET_AVX512BW)
47437 gen = gen_avx512bw_vec_dupv64qi_1;
47438 break;
47439 case V32QImode:
47440 gen = gen_avx2_pbroadcastv32qi_1;
47441 break;
47442 case V32HImode:
47443 if (TARGET_AVX512BW)
47444 gen = gen_avx512bw_vec_dupv32hi_1;
47445 break;
47446 case V16HImode:
47447 gen = gen_avx2_pbroadcastv16hi_1;
47448 break;
47449 case V16SImode:
47450 if (TARGET_AVX512F)
47451 gen = gen_avx512f_vec_dupv16si_1;
47452 break;
47453 case V8SImode:
47454 gen = gen_avx2_pbroadcastv8si_1;
47455 break;
47456 case V16QImode:
47457 gen = gen_avx2_pbroadcastv16qi;
47458 break;
47459 case V8HImode:
47460 gen = gen_avx2_pbroadcastv8hi;
47461 break;
47462 case V16SFmode:
47463 if (TARGET_AVX512F)
47464 gen = gen_avx512f_vec_dupv16sf_1;
47465 break;
47466 case V8SFmode:
47467 gen = gen_avx2_vec_dupv8sf_1;
47468 break;
47469 case V8DFmode:
47470 if (TARGET_AVX512F)
47471 gen = gen_avx512f_vec_dupv8df_1;
47472 break;
47473 case V8DImode:
47474 if (TARGET_AVX512F)
47475 gen = gen_avx512f_vec_dupv8di_1;
47476 break;
47477 /* For other modes prefer other shuffles this function creates. */
47478 default: break;
47480 if (gen != NULL)
47482 if (!d->testing_p)
47483 emit_insn (gen (d->target, d->op0));
47484 return true;
47488 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
47489 return true;
47491 /* There are plenty of patterns in sse.md that are written for
47492 SEL+CONCAT and are not replicated for a single op. Perhaps
47493 that should be changed, to avoid the nastiness here. */
47495 /* Recognize interleave style patterns, which means incrementing
47496 every other permutation operand. */
47497 for (i = 0; i < nelt; i += 2)
47499 perm2[i] = d->perm[i] & mask;
47500 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
47502 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47503 d->testing_p))
47504 return true;
47506 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
47507 if (nelt >= 4)
47509 for (i = 0; i < nelt; i += 4)
47511 perm2[i + 0] = d->perm[i + 0] & mask;
47512 perm2[i + 1] = d->perm[i + 1] & mask;
47513 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
47514 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
47517 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47518 d->testing_p))
47519 return true;
47523 /* Finally, try the fully general two operand permute. */
47524 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
47525 d->testing_p))
47526 return true;
47528 /* Recognize interleave style patterns with reversed operands. */
47529 if (!d->one_operand_p)
47531 for (i = 0; i < nelt; ++i)
47533 unsigned e = d->perm[i];
47534 if (e >= nelt)
47535 e -= nelt;
47536 else
47537 e += nelt;
47538 perm2[i] = e;
47541 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
47542 d->testing_p))
47543 return true;
47546 /* Try the SSE4.1 blend variable merge instructions. */
47547 if (expand_vec_perm_blend (d))
47548 return true;
47550 /* Try one of the AVX vpermil variable permutations. */
47551 if (expand_vec_perm_vpermil (d))
47552 return true;
47554 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47555 vpshufb, vpermd, vpermps or vpermq variable permutation. */
47556 if (expand_vec_perm_pshufb (d))
47557 return true;
47559 /* Try the AVX2 vpalignr instruction. */
47560 if (expand_vec_perm_palignr (d, true))
47561 return true;
47563 /* Try the AVX512F vpermi2 instructions. */
47564 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47565 return true;
47567 return false;
47570 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47571 in terms of a pair of pshuflw + pshufhw instructions. */
47573 static bool
47574 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47576 unsigned char perm2[MAX_VECT_LEN];
47577 unsigned i;
47578 bool ok;
47580 if (d->vmode != V8HImode || !d->one_operand_p)
47581 return false;
47583 /* The two permutations only operate in 64-bit lanes. */
47584 for (i = 0; i < 4; ++i)
47585 if (d->perm[i] >= 4)
47586 return false;
47587 for (i = 4; i < 8; ++i)
47588 if (d->perm[i] < 4)
47589 return false;
47591 if (d->testing_p)
47592 return true;
47594 /* Emit the pshuflw. */
47595 memcpy (perm2, d->perm, 4);
47596 for (i = 4; i < 8; ++i)
47597 perm2[i] = i;
47598 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47599 gcc_assert (ok);
47601 /* Emit the pshufhw. */
47602 memcpy (perm2 + 4, d->perm + 4, 4);
47603 for (i = 0; i < 4; ++i)
47604 perm2[i] = i;
47605 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47606 gcc_assert (ok);
47608 return true;
47611 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47612 the permutation using the SSSE3 palignr instruction. This succeeds
47613 when all of the elements in PERM fit within one vector and we merely
47614 need to shift them down so that a single vector permutation has a
47615 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
47616 the vpalignr instruction itself can perform the requested permutation. */
47618 static bool
47619 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47621 unsigned i, nelt = d->nelt;
47622 unsigned min, max, minswap, maxswap;
47623 bool in_order, ok, swap = false;
47624 rtx shift, target;
47625 struct expand_vec_perm_d dcopy;
47627 /* Even with AVX, palignr only operates on 128-bit vectors,
47628 in AVX2 palignr operates on both 128-bit lanes. */
47629 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47630 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47631 return false;
47633 min = 2 * nelt;
47634 max = 0;
47635 minswap = 2 * nelt;
47636 maxswap = 0;
47637 for (i = 0; i < nelt; ++i)
47639 unsigned e = d->perm[i];
47640 unsigned eswap = d->perm[i] ^ nelt;
47641 if (GET_MODE_SIZE (d->vmode) == 32)
47643 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47644 eswap = e ^ (nelt / 2);
47646 if (e < min)
47647 min = e;
47648 if (e > max)
47649 max = e;
47650 if (eswap < minswap)
47651 minswap = eswap;
47652 if (eswap > maxswap)
47653 maxswap = eswap;
47655 if (min == 0
47656 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47658 if (d->one_operand_p
47659 || minswap == 0
47660 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47661 ? nelt / 2 : nelt))
47662 return false;
47663 swap = true;
47664 min = minswap;
47665 max = maxswap;
47668 /* Given that we have SSSE3, we know we'll be able to implement the
47669 single operand permutation after the palignr with pshufb for
47670 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
47671 first. */
47672 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47673 return true;
47675 dcopy = *d;
47676 if (swap)
47678 dcopy.op0 = d->op1;
47679 dcopy.op1 = d->op0;
47680 for (i = 0; i < nelt; ++i)
47681 dcopy.perm[i] ^= nelt;
47684 in_order = true;
47685 for (i = 0; i < nelt; ++i)
47687 unsigned e = dcopy.perm[i];
47688 if (GET_MODE_SIZE (d->vmode) == 32
47689 && e >= nelt
47690 && (e & (nelt / 2 - 1)) < min)
47691 e = e - min - (nelt / 2);
47692 else
47693 e = e - min;
47694 if (e != i)
47695 in_order = false;
47696 dcopy.perm[i] = e;
47698 dcopy.one_operand_p = true;
47700 if (single_insn_only_p && !in_order)
47701 return false;
47703 /* For AVX2, test whether we can permute the result in one instruction. */
47704 if (d->testing_p)
47706 if (in_order)
47707 return true;
47708 dcopy.op1 = dcopy.op0;
47709 return expand_vec_perm_1 (&dcopy);
47712 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
47713 if (GET_MODE_SIZE (d->vmode) == 16)
47715 target = gen_reg_rtx (TImode);
47716 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
47717 gen_lowpart (TImode, dcopy.op0), shift));
47719 else
47721 target = gen_reg_rtx (V2TImode);
47722 emit_insn (gen_avx2_palignrv2ti (target,
47723 gen_lowpart (V2TImode, dcopy.op1),
47724 gen_lowpart (V2TImode, dcopy.op0),
47725 shift));
47728 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
47730 /* Test for the degenerate case where the alignment by itself
47731 produces the desired permutation. */
47732 if (in_order)
47734 emit_move_insn (d->target, dcopy.op0);
47735 return true;
47738 ok = expand_vec_perm_1 (&dcopy);
47739 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
47741 return ok;
47744 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
47745 the permutation using the SSE4_1 pblendv instruction. Potentially
47746 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
47748 static bool
47749 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
47751 unsigned i, which, nelt = d->nelt;
47752 struct expand_vec_perm_d dcopy, dcopy1;
47753 machine_mode vmode = d->vmode;
47754 bool ok;
47756 /* Use the same checks as in expand_vec_perm_blend. */
47757 if (d->one_operand_p)
47758 return false;
47759 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47761 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47763 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47765 else
47766 return false;
47768 /* Figure out where permutation elements stay not in their
47769 respective lanes. */
47770 for (i = 0, which = 0; i < nelt; ++i)
47772 unsigned e = d->perm[i];
47773 if (e != i)
47774 which |= (e < nelt ? 1 : 2);
47776 /* We can pblend the part where elements stay not in their
47777 respective lanes only when these elements are all in one
47778 half of a permutation.
47779 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
47780 lanes, but both 8 and 9 >= 8
47781 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
47782 respective lanes and 8 >= 8, but 2 not. */
47783 if (which != 1 && which != 2)
47784 return false;
47785 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
47786 return true;
47788 /* First we apply one operand permutation to the part where
47789 elements stay not in their respective lanes. */
47790 dcopy = *d;
47791 if (which == 2)
47792 dcopy.op0 = dcopy.op1 = d->op1;
47793 else
47794 dcopy.op0 = dcopy.op1 = d->op0;
47795 if (!d->testing_p)
47796 dcopy.target = gen_reg_rtx (vmode);
47797 dcopy.one_operand_p = true;
47799 for (i = 0; i < nelt; ++i)
47800 dcopy.perm[i] = d->perm[i] & (nelt - 1);
47802 ok = expand_vec_perm_1 (&dcopy);
47803 if (GET_MODE_SIZE (vmode) != 16 && !ok)
47804 return false;
47805 else
47806 gcc_assert (ok);
47807 if (d->testing_p)
47808 return true;
47810 /* Next we put permuted elements into their positions. */
47811 dcopy1 = *d;
47812 if (which == 2)
47813 dcopy1.op1 = dcopy.target;
47814 else
47815 dcopy1.op0 = dcopy.target;
47817 for (i = 0; i < nelt; ++i)
47818 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
47820 ok = expand_vec_perm_blend (&dcopy1);
47821 gcc_assert (ok);
47823 return true;
47826 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
47828 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47829 a two vector permutation into a single vector permutation by using
47830 an interleave operation to merge the vectors. */
47832 static bool
47833 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
47835 struct expand_vec_perm_d dremap, dfinal;
47836 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
47837 unsigned HOST_WIDE_INT contents;
47838 unsigned char remap[2 * MAX_VECT_LEN];
47839 rtx_insn *seq;
47840 bool ok, same_halves = false;
47842 if (GET_MODE_SIZE (d->vmode) == 16)
47844 if (d->one_operand_p)
47845 return false;
47847 else if (GET_MODE_SIZE (d->vmode) == 32)
47849 if (!TARGET_AVX)
47850 return false;
47851 /* For 32-byte modes allow even d->one_operand_p.
47852 The lack of cross-lane shuffling in some instructions
47853 might prevent a single insn shuffle. */
47854 dfinal = *d;
47855 dfinal.testing_p = true;
47856 /* If expand_vec_perm_interleave3 can expand this into
47857 a 3 insn sequence, give up and let it be expanded as
47858 3 insn sequence. While that is one insn longer,
47859 it doesn't need a memory operand and in the common
47860 case that both interleave low and high permutations
47861 with the same operands are adjacent needs 4 insns
47862 for both after CSE. */
47863 if (expand_vec_perm_interleave3 (&dfinal))
47864 return false;
47866 else
47867 return false;
47869 /* Examine from whence the elements come. */
47870 contents = 0;
47871 for (i = 0; i < nelt; ++i)
47872 contents |= HOST_WIDE_INT_1U << d->perm[i];
47874 memset (remap, 0xff, sizeof (remap));
47875 dremap = *d;
47877 if (GET_MODE_SIZE (d->vmode) == 16)
47879 unsigned HOST_WIDE_INT h1, h2, h3, h4;
47881 /* Split the two input vectors into 4 halves. */
47882 h1 = (HOST_WIDE_INT_1U << nelt2) - 1;
47883 h2 = h1 << nelt2;
47884 h3 = h2 << nelt2;
47885 h4 = h3 << nelt2;
47887 /* If the elements from the low halves use interleave low, and similarly
47888 for interleave high. If the elements are from mis-matched halves, we
47889 can use shufps for V4SF/V4SI or do a DImode shuffle. */
47890 if ((contents & (h1 | h3)) == contents)
47892 /* punpckl* */
47893 for (i = 0; i < nelt2; ++i)
47895 remap[i] = i * 2;
47896 remap[i + nelt] = i * 2 + 1;
47897 dremap.perm[i * 2] = i;
47898 dremap.perm[i * 2 + 1] = i + nelt;
47900 if (!TARGET_SSE2 && d->vmode == V4SImode)
47901 dremap.vmode = V4SFmode;
47903 else if ((contents & (h2 | h4)) == contents)
47905 /* punpckh* */
47906 for (i = 0; i < nelt2; ++i)
47908 remap[i + nelt2] = i * 2;
47909 remap[i + nelt + nelt2] = i * 2 + 1;
47910 dremap.perm[i * 2] = i + nelt2;
47911 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
47913 if (!TARGET_SSE2 && d->vmode == V4SImode)
47914 dremap.vmode = V4SFmode;
47916 else if ((contents & (h1 | h4)) == contents)
47918 /* shufps */
47919 for (i = 0; i < nelt2; ++i)
47921 remap[i] = i;
47922 remap[i + nelt + nelt2] = i + nelt2;
47923 dremap.perm[i] = i;
47924 dremap.perm[i + nelt2] = i + nelt + nelt2;
47926 if (nelt != 4)
47928 /* shufpd */
47929 dremap.vmode = V2DImode;
47930 dremap.nelt = 2;
47931 dremap.perm[0] = 0;
47932 dremap.perm[1] = 3;
47935 else if ((contents & (h2 | h3)) == contents)
47937 /* shufps */
47938 for (i = 0; i < nelt2; ++i)
47940 remap[i + nelt2] = i;
47941 remap[i + nelt] = i + nelt2;
47942 dremap.perm[i] = i + nelt2;
47943 dremap.perm[i + nelt2] = i + nelt;
47945 if (nelt != 4)
47947 /* shufpd */
47948 dremap.vmode = V2DImode;
47949 dremap.nelt = 2;
47950 dremap.perm[0] = 1;
47951 dremap.perm[1] = 2;
47954 else
47955 return false;
47957 else
47959 unsigned int nelt4 = nelt / 4, nzcnt = 0;
47960 unsigned HOST_WIDE_INT q[8];
47961 unsigned int nonzero_halves[4];
47963 /* Split the two input vectors into 8 quarters. */
47964 q[0] = (HOST_WIDE_INT_1U << nelt4) - 1;
47965 for (i = 1; i < 8; ++i)
47966 q[i] = q[0] << (nelt4 * i);
47967 for (i = 0; i < 4; ++i)
47968 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
47970 nonzero_halves[nzcnt] = i;
47971 ++nzcnt;
47974 if (nzcnt == 1)
47976 gcc_assert (d->one_operand_p);
47977 nonzero_halves[1] = nonzero_halves[0];
47978 same_halves = true;
47980 else if (d->one_operand_p)
47982 gcc_assert (nonzero_halves[0] == 0);
47983 gcc_assert (nonzero_halves[1] == 1);
47986 if (nzcnt <= 2)
47988 if (d->perm[0] / nelt2 == nonzero_halves[1])
47990 /* Attempt to increase the likelihood that dfinal
47991 shuffle will be intra-lane. */
47992 std::swap (nonzero_halves[0], nonzero_halves[1]);
47995 /* vperm2f128 or vperm2i128. */
47996 for (i = 0; i < nelt2; ++i)
47998 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
47999 remap[i + nonzero_halves[0] * nelt2] = i;
48000 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
48001 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
48004 if (d->vmode != V8SFmode
48005 && d->vmode != V4DFmode
48006 && d->vmode != V8SImode)
48008 dremap.vmode = V8SImode;
48009 dremap.nelt = 8;
48010 for (i = 0; i < 4; ++i)
48012 dremap.perm[i] = i + nonzero_halves[0] * 4;
48013 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
48017 else if (d->one_operand_p)
48018 return false;
48019 else if (TARGET_AVX2
48020 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
48022 /* vpunpckl* */
48023 for (i = 0; i < nelt4; ++i)
48025 remap[i] = i * 2;
48026 remap[i + nelt] = i * 2 + 1;
48027 remap[i + nelt2] = i * 2 + nelt2;
48028 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
48029 dremap.perm[i * 2] = i;
48030 dremap.perm[i * 2 + 1] = i + nelt;
48031 dremap.perm[i * 2 + nelt2] = i + nelt2;
48032 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
48035 else if (TARGET_AVX2
48036 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
48038 /* vpunpckh* */
48039 for (i = 0; i < nelt4; ++i)
48041 remap[i + nelt4] = i * 2;
48042 remap[i + nelt + nelt4] = i * 2 + 1;
48043 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
48044 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
48045 dremap.perm[i * 2] = i + nelt4;
48046 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
48047 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
48048 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
48051 else
48052 return false;
48055 /* Use the remapping array set up above to move the elements from their
48056 swizzled locations into their final destinations. */
48057 dfinal = *d;
48058 for (i = 0; i < nelt; ++i)
48060 unsigned e = remap[d->perm[i]];
48061 gcc_assert (e < nelt);
48062 /* If same_halves is true, both halves of the remapped vector are the
48063 same. Avoid cross-lane accesses if possible. */
48064 if (same_halves && i >= nelt2)
48066 gcc_assert (e < nelt2);
48067 dfinal.perm[i] = e + nelt2;
48069 else
48070 dfinal.perm[i] = e;
48072 if (!d->testing_p)
48074 dremap.target = gen_reg_rtx (dremap.vmode);
48075 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48077 dfinal.op1 = dfinal.op0;
48078 dfinal.one_operand_p = true;
48080 /* Test if the final remap can be done with a single insn. For V4SFmode or
48081 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
48082 start_sequence ();
48083 ok = expand_vec_perm_1 (&dfinal);
48084 seq = get_insns ();
48085 end_sequence ();
48087 if (!ok)
48088 return false;
48090 if (d->testing_p)
48091 return true;
48093 if (dremap.vmode != dfinal.vmode)
48095 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
48096 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
48099 ok = expand_vec_perm_1 (&dremap);
48100 gcc_assert (ok);
48102 emit_insn (seq);
48103 return true;
48106 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48107 a single vector cross-lane permutation into vpermq followed
48108 by any of the single insn permutations. */
48110 static bool
48111 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
48113 struct expand_vec_perm_d dremap, dfinal;
48114 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
48115 unsigned contents[2];
48116 bool ok;
48118 if (!(TARGET_AVX2
48119 && (d->vmode == V32QImode || d->vmode == V16HImode)
48120 && d->one_operand_p))
48121 return false;
48123 contents[0] = 0;
48124 contents[1] = 0;
48125 for (i = 0; i < nelt2; ++i)
48127 contents[0] |= 1u << (d->perm[i] / nelt4);
48128 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
48131 for (i = 0; i < 2; ++i)
48133 unsigned int cnt = 0;
48134 for (j = 0; j < 4; ++j)
48135 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
48136 return false;
48139 if (d->testing_p)
48140 return true;
48142 dremap = *d;
48143 dremap.vmode = V4DImode;
48144 dremap.nelt = 4;
48145 dremap.target = gen_reg_rtx (V4DImode);
48146 dremap.op0 = gen_lowpart (V4DImode, d->op0);
48147 dremap.op1 = dremap.op0;
48148 dremap.one_operand_p = true;
48149 for (i = 0; i < 2; ++i)
48151 unsigned int cnt = 0;
48152 for (j = 0; j < 4; ++j)
48153 if ((contents[i] & (1u << j)) != 0)
48154 dremap.perm[2 * i + cnt++] = j;
48155 for (; cnt < 2; ++cnt)
48156 dremap.perm[2 * i + cnt] = 0;
48159 dfinal = *d;
48160 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48161 dfinal.op1 = dfinal.op0;
48162 dfinal.one_operand_p = true;
48163 for (i = 0, j = 0; i < nelt; ++i)
48165 if (i == nelt2)
48166 j = 2;
48167 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
48168 if ((d->perm[i] / nelt4) == dremap.perm[j])
48170 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
48171 dfinal.perm[i] |= nelt4;
48172 else
48173 gcc_unreachable ();
48176 ok = expand_vec_perm_1 (&dremap);
48177 gcc_assert (ok);
48179 ok = expand_vec_perm_1 (&dfinal);
48180 gcc_assert (ok);
48182 return true;
48185 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
48186 a vector permutation using two instructions, vperm2f128 resp.
48187 vperm2i128 followed by any single in-lane permutation. */
48189 static bool
48190 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
48192 struct expand_vec_perm_d dfirst, dsecond;
48193 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
48194 bool ok;
48196 if (!TARGET_AVX
48197 || GET_MODE_SIZE (d->vmode) != 32
48198 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
48199 return false;
48201 dsecond = *d;
48202 dsecond.one_operand_p = false;
48203 dsecond.testing_p = true;
48205 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
48206 immediate. For perm < 16 the second permutation uses
48207 d->op0 as first operand, for perm >= 16 it uses d->op1
48208 as first operand. The second operand is the result of
48209 vperm2[fi]128. */
48210 for (perm = 0; perm < 32; perm++)
48212 /* Ignore permutations which do not move anything cross-lane. */
48213 if (perm < 16)
48215 /* The second shuffle for e.g. V4DFmode has
48216 0123 and ABCD operands.
48217 Ignore AB23, as 23 is already in the second lane
48218 of the first operand. */
48219 if ((perm & 0xc) == (1 << 2)) continue;
48220 /* And 01CD, as 01 is in the first lane of the first
48221 operand. */
48222 if ((perm & 3) == 0) continue;
48223 /* And 4567, as then the vperm2[fi]128 doesn't change
48224 anything on the original 4567 second operand. */
48225 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
48227 else
48229 /* The second shuffle for e.g. V4DFmode has
48230 4567 and ABCD operands.
48231 Ignore AB67, as 67 is already in the second lane
48232 of the first operand. */
48233 if ((perm & 0xc) == (3 << 2)) continue;
48234 /* And 45CD, as 45 is in the first lane of the first
48235 operand. */
48236 if ((perm & 3) == 2) continue;
48237 /* And 0123, as then the vperm2[fi]128 doesn't change
48238 anything on the original 0123 first operand. */
48239 if ((perm & 0xf) == (1 << 2)) continue;
48242 for (i = 0; i < nelt; i++)
48244 j = d->perm[i] / nelt2;
48245 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
48246 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
48247 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
48248 dsecond.perm[i] = d->perm[i] & (nelt - 1);
48249 else
48250 break;
48253 if (i == nelt)
48255 start_sequence ();
48256 ok = expand_vec_perm_1 (&dsecond);
48257 end_sequence ();
48259 else
48260 ok = false;
48262 if (ok)
48264 if (d->testing_p)
48265 return true;
48267 /* Found a usable second shuffle. dfirst will be
48268 vperm2f128 on d->op0 and d->op1. */
48269 dsecond.testing_p = false;
48270 dfirst = *d;
48271 dfirst.target = gen_reg_rtx (d->vmode);
48272 for (i = 0; i < nelt; i++)
48273 dfirst.perm[i] = (i & (nelt2 - 1))
48274 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
48276 canonicalize_perm (&dfirst);
48277 ok = expand_vec_perm_1 (&dfirst);
48278 gcc_assert (ok);
48280 /* And dsecond is some single insn shuffle, taking
48281 d->op0 and result of vperm2f128 (if perm < 16) or
48282 d->op1 and result of vperm2f128 (otherwise). */
48283 if (perm >= 16)
48284 dsecond.op0 = dsecond.op1;
48285 dsecond.op1 = dfirst.target;
48287 ok = expand_vec_perm_1 (&dsecond);
48288 gcc_assert (ok);
48290 return true;
48293 /* For one operand, the only useful vperm2f128 permutation is 0x01
48294 aka lanes swap. */
48295 if (d->one_operand_p)
48296 return false;
48299 return false;
48302 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48303 a two vector permutation using 2 intra-lane interleave insns
48304 and cross-lane shuffle for 32-byte vectors. */
48306 static bool
48307 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
48309 unsigned i, nelt;
48310 rtx (*gen) (rtx, rtx, rtx);
48312 if (d->one_operand_p)
48313 return false;
48314 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
48316 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
48318 else
48319 return false;
48321 nelt = d->nelt;
48322 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
48323 return false;
48324 for (i = 0; i < nelt; i += 2)
48325 if (d->perm[i] != d->perm[0] + i / 2
48326 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
48327 return false;
48329 if (d->testing_p)
48330 return true;
48332 switch (d->vmode)
48334 case V32QImode:
48335 if (d->perm[0])
48336 gen = gen_vec_interleave_highv32qi;
48337 else
48338 gen = gen_vec_interleave_lowv32qi;
48339 break;
48340 case V16HImode:
48341 if (d->perm[0])
48342 gen = gen_vec_interleave_highv16hi;
48343 else
48344 gen = gen_vec_interleave_lowv16hi;
48345 break;
48346 case V8SImode:
48347 if (d->perm[0])
48348 gen = gen_vec_interleave_highv8si;
48349 else
48350 gen = gen_vec_interleave_lowv8si;
48351 break;
48352 case V4DImode:
48353 if (d->perm[0])
48354 gen = gen_vec_interleave_highv4di;
48355 else
48356 gen = gen_vec_interleave_lowv4di;
48357 break;
48358 case V8SFmode:
48359 if (d->perm[0])
48360 gen = gen_vec_interleave_highv8sf;
48361 else
48362 gen = gen_vec_interleave_lowv8sf;
48363 break;
48364 case V4DFmode:
48365 if (d->perm[0])
48366 gen = gen_vec_interleave_highv4df;
48367 else
48368 gen = gen_vec_interleave_lowv4df;
48369 break;
48370 default:
48371 gcc_unreachable ();
48374 emit_insn (gen (d->target, d->op0, d->op1));
48375 return true;
48378 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
48379 a single vector permutation using a single intra-lane vector
48380 permutation, vperm2f128 swapping the lanes and vblend* insn blending
48381 the non-swapped and swapped vectors together. */
48383 static bool
48384 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
48386 struct expand_vec_perm_d dfirst, dsecond;
48387 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
48388 rtx_insn *seq;
48389 bool ok;
48390 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
48392 if (!TARGET_AVX
48393 || TARGET_AVX2
48394 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
48395 || !d->one_operand_p)
48396 return false;
48398 dfirst = *d;
48399 for (i = 0; i < nelt; i++)
48400 dfirst.perm[i] = 0xff;
48401 for (i = 0, msk = 0; i < nelt; i++)
48403 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
48404 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
48405 return false;
48406 dfirst.perm[j] = d->perm[i];
48407 if (j != i)
48408 msk |= (1 << i);
48410 for (i = 0; i < nelt; i++)
48411 if (dfirst.perm[i] == 0xff)
48412 dfirst.perm[i] = i;
48414 if (!d->testing_p)
48415 dfirst.target = gen_reg_rtx (dfirst.vmode);
48417 start_sequence ();
48418 ok = expand_vec_perm_1 (&dfirst);
48419 seq = get_insns ();
48420 end_sequence ();
48422 if (!ok)
48423 return false;
48425 if (d->testing_p)
48426 return true;
48428 emit_insn (seq);
48430 dsecond = *d;
48431 dsecond.op0 = dfirst.target;
48432 dsecond.op1 = dfirst.target;
48433 dsecond.one_operand_p = true;
48434 dsecond.target = gen_reg_rtx (dsecond.vmode);
48435 for (i = 0; i < nelt; i++)
48436 dsecond.perm[i] = i ^ nelt2;
48438 ok = expand_vec_perm_1 (&dsecond);
48439 gcc_assert (ok);
48441 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
48442 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
48443 return true;
48446 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
48447 permutation using two vperm2f128, followed by a vshufpd insn blending
48448 the two vectors together. */
48450 static bool
48451 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
48453 struct expand_vec_perm_d dfirst, dsecond, dthird;
48454 bool ok;
48456 if (!TARGET_AVX || (d->vmode != V4DFmode))
48457 return false;
48459 if (d->testing_p)
48460 return true;
48462 dfirst = *d;
48463 dsecond = *d;
48464 dthird = *d;
48466 dfirst.perm[0] = (d->perm[0] & ~1);
48467 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
48468 dfirst.perm[2] = (d->perm[2] & ~1);
48469 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
48470 dsecond.perm[0] = (d->perm[1] & ~1);
48471 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
48472 dsecond.perm[2] = (d->perm[3] & ~1);
48473 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
48474 dthird.perm[0] = (d->perm[0] % 2);
48475 dthird.perm[1] = (d->perm[1] % 2) + 4;
48476 dthird.perm[2] = (d->perm[2] % 2) + 2;
48477 dthird.perm[3] = (d->perm[3] % 2) + 6;
48479 dfirst.target = gen_reg_rtx (dfirst.vmode);
48480 dsecond.target = gen_reg_rtx (dsecond.vmode);
48481 dthird.op0 = dfirst.target;
48482 dthird.op1 = dsecond.target;
48483 dthird.one_operand_p = false;
48485 canonicalize_perm (&dfirst);
48486 canonicalize_perm (&dsecond);
48488 ok = expand_vec_perm_1 (&dfirst)
48489 && expand_vec_perm_1 (&dsecond)
48490 && expand_vec_perm_1 (&dthird);
48492 gcc_assert (ok);
48494 return true;
48497 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
48498 permutation with two pshufb insns and an ior. We should have already
48499 failed all two instruction sequences. */
48501 static bool
48502 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
48504 rtx rperm[2][16], vperm, l, h, op, m128;
48505 unsigned int i, nelt, eltsz;
48507 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
48508 return false;
48509 gcc_assert (!d->one_operand_p);
48511 if (d->testing_p)
48512 return true;
48514 nelt = d->nelt;
48515 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48517 /* Generate two permutation masks. If the required element is within
48518 the given vector it is shuffled into the proper lane. If the required
48519 element is in the other vector, force a zero into the lane by setting
48520 bit 7 in the permutation mask. */
48521 m128 = GEN_INT (-128);
48522 for (i = 0; i < nelt; ++i)
48524 unsigned j, e = d->perm[i];
48525 unsigned which = (e >= nelt);
48526 if (e >= nelt)
48527 e -= nelt;
48529 for (j = 0; j < eltsz; ++j)
48531 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
48532 rperm[1-which][i*eltsz + j] = m128;
48536 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
48537 vperm = force_reg (V16QImode, vperm);
48539 l = gen_reg_rtx (V16QImode);
48540 op = gen_lowpart (V16QImode, d->op0);
48541 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
48543 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48544 vperm = force_reg (V16QImode, vperm);
48546 h = gen_reg_rtx (V16QImode);
48547 op = gen_lowpart (V16QImode, d->op1);
48548 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48550 op = d->target;
48551 if (d->vmode != V16QImode)
48552 op = gen_reg_rtx (V16QImode);
48553 emit_insn (gen_iorv16qi3 (op, l, h));
48554 if (op != d->target)
48555 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48557 return true;
48560 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48561 with two vpshufb insns, vpermq and vpor. We should have already failed
48562 all two or three instruction sequences. */
48564 static bool
48565 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48567 rtx rperm[2][32], vperm, l, h, hp, op, m128;
48568 unsigned int i, nelt, eltsz;
48570 if (!TARGET_AVX2
48571 || !d->one_operand_p
48572 || (d->vmode != V32QImode && d->vmode != V16HImode))
48573 return false;
48575 if (d->testing_p)
48576 return true;
48578 nelt = d->nelt;
48579 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48581 /* Generate two permutation masks. If the required element is within
48582 the same lane, it is shuffled in. If the required element from the
48583 other lane, force a zero by setting bit 7 in the permutation mask.
48584 In the other mask the mask has non-negative elements if element
48585 is requested from the other lane, but also moved to the other lane,
48586 so that the result of vpshufb can have the two V2TImode halves
48587 swapped. */
48588 m128 = GEN_INT (-128);
48589 for (i = 0; i < nelt; ++i)
48591 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48592 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48594 for (j = 0; j < eltsz; ++j)
48596 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48597 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48601 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48602 vperm = force_reg (V32QImode, vperm);
48604 h = gen_reg_rtx (V32QImode);
48605 op = gen_lowpart (V32QImode, d->op0);
48606 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48608 /* Swap the 128-byte lanes of h into hp. */
48609 hp = gen_reg_rtx (V4DImode);
48610 op = gen_lowpart (V4DImode, h);
48611 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48612 const1_rtx));
48614 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48615 vperm = force_reg (V32QImode, vperm);
48617 l = gen_reg_rtx (V32QImode);
48618 op = gen_lowpart (V32QImode, d->op0);
48619 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48621 op = d->target;
48622 if (d->vmode != V32QImode)
48623 op = gen_reg_rtx (V32QImode);
48624 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48625 if (op != d->target)
48626 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48628 return true;
48631 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48632 and extract-odd permutations of two V32QImode and V16QImode operand
48633 with two vpshufb insns, vpor and vpermq. We should have already
48634 failed all two or three instruction sequences. */
48636 static bool
48637 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48639 rtx rperm[2][32], vperm, l, h, ior, op, m128;
48640 unsigned int i, nelt, eltsz;
48642 if (!TARGET_AVX2
48643 || d->one_operand_p
48644 || (d->vmode != V32QImode && d->vmode != V16HImode))
48645 return false;
48647 for (i = 0; i < d->nelt; ++i)
48648 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48649 return false;
48651 if (d->testing_p)
48652 return true;
48654 nelt = d->nelt;
48655 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48657 /* Generate two permutation masks. In the first permutation mask
48658 the first quarter will contain indexes for the first half
48659 of the op0, the second quarter will contain bit 7 set, third quarter
48660 will contain indexes for the second half of the op0 and the
48661 last quarter bit 7 set. In the second permutation mask
48662 the first quarter will contain bit 7 set, the second quarter
48663 indexes for the first half of the op1, the third quarter bit 7 set
48664 and last quarter indexes for the second half of the op1.
48665 I.e. the first mask e.g. for V32QImode extract even will be:
48666 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48667 (all values masked with 0xf except for -128) and second mask
48668 for extract even will be
48669 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
48670 m128 = GEN_INT (-128);
48671 for (i = 0; i < nelt; ++i)
48673 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48674 unsigned which = d->perm[i] >= nelt;
48675 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48677 for (j = 0; j < eltsz; ++j)
48679 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48680 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48684 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48685 vperm = force_reg (V32QImode, vperm);
48687 l = gen_reg_rtx (V32QImode);
48688 op = gen_lowpart (V32QImode, d->op0);
48689 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48691 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48692 vperm = force_reg (V32QImode, vperm);
48694 h = gen_reg_rtx (V32QImode);
48695 op = gen_lowpart (V32QImode, d->op1);
48696 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48698 ior = gen_reg_rtx (V32QImode);
48699 emit_insn (gen_iorv32qi3 (ior, l, h));
48701 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
48702 op = gen_reg_rtx (V4DImode);
48703 ior = gen_lowpart (V4DImode, ior);
48704 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
48705 const1_rtx, GEN_INT (3)));
48706 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48708 return true;
48711 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48712 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
48713 with two "and" and "pack" or two "shift" and "pack" insns. We should
48714 have already failed all two instruction sequences. */
48716 static bool
48717 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
48719 rtx op, dop0, dop1, t, rperm[16];
48720 unsigned i, odd, c, s, nelt = d->nelt;
48721 bool end_perm = false;
48722 machine_mode half_mode;
48723 rtx (*gen_and) (rtx, rtx, rtx);
48724 rtx (*gen_pack) (rtx, rtx, rtx);
48725 rtx (*gen_shift) (rtx, rtx, rtx);
48727 if (d->one_operand_p)
48728 return false;
48730 switch (d->vmode)
48732 case V8HImode:
48733 /* Required for "pack". */
48734 if (!TARGET_SSE4_1)
48735 return false;
48736 c = 0xffff;
48737 s = 16;
48738 half_mode = V4SImode;
48739 gen_and = gen_andv4si3;
48740 gen_pack = gen_sse4_1_packusdw;
48741 gen_shift = gen_lshrv4si3;
48742 break;
48743 case V16QImode:
48744 /* No check as all instructions are SSE2. */
48745 c = 0xff;
48746 s = 8;
48747 half_mode = V8HImode;
48748 gen_and = gen_andv8hi3;
48749 gen_pack = gen_sse2_packuswb;
48750 gen_shift = gen_lshrv8hi3;
48751 break;
48752 case V16HImode:
48753 if (!TARGET_AVX2)
48754 return false;
48755 c = 0xffff;
48756 s = 16;
48757 half_mode = V8SImode;
48758 gen_and = gen_andv8si3;
48759 gen_pack = gen_avx2_packusdw;
48760 gen_shift = gen_lshrv8si3;
48761 end_perm = true;
48762 break;
48763 case V32QImode:
48764 if (!TARGET_AVX2)
48765 return false;
48766 c = 0xff;
48767 s = 8;
48768 half_mode = V16HImode;
48769 gen_and = gen_andv16hi3;
48770 gen_pack = gen_avx2_packuswb;
48771 gen_shift = gen_lshrv16hi3;
48772 end_perm = true;
48773 break;
48774 default:
48775 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
48776 general shuffles. */
48777 return false;
48780 /* Check that permutation is even or odd. */
48781 odd = d->perm[0];
48782 if (odd > 1)
48783 return false;
48785 for (i = 1; i < nelt; ++i)
48786 if (d->perm[i] != 2 * i + odd)
48787 return false;
48789 if (d->testing_p)
48790 return true;
48792 dop0 = gen_reg_rtx (half_mode);
48793 dop1 = gen_reg_rtx (half_mode);
48794 if (odd == 0)
48796 for (i = 0; i < nelt / 2; i++)
48797 rperm[i] = GEN_INT (c);
48798 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
48799 t = force_reg (half_mode, t);
48800 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
48801 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
48803 else
48805 emit_insn (gen_shift (dop0,
48806 gen_lowpart (half_mode, d->op0),
48807 GEN_INT (s)));
48808 emit_insn (gen_shift (dop1,
48809 gen_lowpart (half_mode, d->op1),
48810 GEN_INT (s)));
48812 /* In AVX2 for 256 bit case we need to permute pack result. */
48813 if (TARGET_AVX2 && end_perm)
48815 op = gen_reg_rtx (d->vmode);
48816 t = gen_reg_rtx (V4DImode);
48817 emit_insn (gen_pack (op, dop0, dop1));
48818 emit_insn (gen_avx2_permv4di_1 (t,
48819 gen_lowpart (V4DImode, op),
48820 const0_rtx,
48821 const2_rtx,
48822 const1_rtx,
48823 GEN_INT (3)));
48824 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
48826 else
48827 emit_insn (gen_pack (d->target, dop0, dop1));
48829 return true;
48832 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
48833 and extract-odd permutations. */
48835 static bool
48836 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
48838 rtx t1, t2, t3, t4, t5;
48840 switch (d->vmode)
48842 case V4DFmode:
48843 if (d->testing_p)
48844 break;
48845 t1 = gen_reg_rtx (V4DFmode);
48846 t2 = gen_reg_rtx (V4DFmode);
48848 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48849 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
48850 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
48852 /* Now an unpck[lh]pd will produce the result required. */
48853 if (odd)
48854 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
48855 else
48856 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
48857 emit_insn (t3);
48858 break;
48860 case V8SFmode:
48862 int mask = odd ? 0xdd : 0x88;
48864 if (d->testing_p)
48865 break;
48866 t1 = gen_reg_rtx (V8SFmode);
48867 t2 = gen_reg_rtx (V8SFmode);
48868 t3 = gen_reg_rtx (V8SFmode);
48870 /* Shuffle within the 128-bit lanes to produce:
48871 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
48872 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
48873 GEN_INT (mask)));
48875 /* Shuffle the lanes around to produce:
48876 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
48877 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
48878 GEN_INT (0x3)));
48880 /* Shuffle within the 128-bit lanes to produce:
48881 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
48882 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
48884 /* Shuffle within the 128-bit lanes to produce:
48885 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
48886 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
48888 /* Shuffle the lanes around to produce:
48889 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
48890 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
48891 GEN_INT (0x20)));
48893 break;
48895 case V2DFmode:
48896 case V4SFmode:
48897 case V2DImode:
48898 case V4SImode:
48899 /* These are always directly implementable by expand_vec_perm_1. */
48900 gcc_unreachable ();
48902 case V8HImode:
48903 if (TARGET_SSE4_1)
48904 return expand_vec_perm_even_odd_pack (d);
48905 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
48906 return expand_vec_perm_pshufb2 (d);
48907 else
48909 if (d->testing_p)
48910 break;
48911 /* We need 2*log2(N)-1 operations to achieve odd/even
48912 with interleave. */
48913 t1 = gen_reg_rtx (V8HImode);
48914 t2 = gen_reg_rtx (V8HImode);
48915 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
48916 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
48917 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
48918 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
48919 if (odd)
48920 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
48921 else
48922 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
48923 emit_insn (t3);
48925 break;
48927 case V16QImode:
48928 return expand_vec_perm_even_odd_pack (d);
48930 case V16HImode:
48931 case V32QImode:
48932 return expand_vec_perm_even_odd_pack (d);
48934 case V4DImode:
48935 if (!TARGET_AVX2)
48937 struct expand_vec_perm_d d_copy = *d;
48938 d_copy.vmode = V4DFmode;
48939 if (d->testing_p)
48940 d_copy.target = gen_lowpart (V4DFmode, d->target);
48941 else
48942 d_copy.target = gen_reg_rtx (V4DFmode);
48943 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
48944 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
48945 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48947 if (!d->testing_p)
48948 emit_move_insn (d->target,
48949 gen_lowpart (V4DImode, d_copy.target));
48950 return true;
48952 return false;
48955 if (d->testing_p)
48956 break;
48958 t1 = gen_reg_rtx (V4DImode);
48959 t2 = gen_reg_rtx (V4DImode);
48961 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48962 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
48963 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
48965 /* Now an vpunpck[lh]qdq will produce the result required. */
48966 if (odd)
48967 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
48968 else
48969 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
48970 emit_insn (t3);
48971 break;
48973 case V8SImode:
48974 if (!TARGET_AVX2)
48976 struct expand_vec_perm_d d_copy = *d;
48977 d_copy.vmode = V8SFmode;
48978 if (d->testing_p)
48979 d_copy.target = gen_lowpart (V8SFmode, d->target);
48980 else
48981 d_copy.target = gen_reg_rtx (V8SFmode);
48982 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
48983 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
48984 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48986 if (!d->testing_p)
48987 emit_move_insn (d->target,
48988 gen_lowpart (V8SImode, d_copy.target));
48989 return true;
48991 return false;
48994 if (d->testing_p)
48995 break;
48997 t1 = gen_reg_rtx (V8SImode);
48998 t2 = gen_reg_rtx (V8SImode);
48999 t3 = gen_reg_rtx (V4DImode);
49000 t4 = gen_reg_rtx (V4DImode);
49001 t5 = gen_reg_rtx (V4DImode);
49003 /* Shuffle the lanes around into
49004 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
49005 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
49006 gen_lowpart (V4DImode, d->op1),
49007 GEN_INT (0x20)));
49008 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
49009 gen_lowpart (V4DImode, d->op1),
49010 GEN_INT (0x31)));
49012 /* Swap the 2nd and 3rd position in each lane into
49013 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
49014 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
49015 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
49016 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
49017 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
49019 /* Now an vpunpck[lh]qdq will produce
49020 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
49021 if (odd)
49022 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
49023 gen_lowpart (V4DImode, t2));
49024 else
49025 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
49026 gen_lowpart (V4DImode, t2));
49027 emit_insn (t3);
49028 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
49029 break;
49031 default:
49032 gcc_unreachable ();
49035 return true;
49038 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
49039 extract-even and extract-odd permutations. */
49041 static bool
49042 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
49044 unsigned i, odd, nelt = d->nelt;
49046 odd = d->perm[0];
49047 if (odd != 0 && odd != 1)
49048 return false;
49050 for (i = 1; i < nelt; ++i)
49051 if (d->perm[i] != 2 * i + odd)
49052 return false;
49054 return expand_vec_perm_even_odd_1 (d, odd);
49057 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
49058 permutations. We assume that expand_vec_perm_1 has already failed. */
49060 static bool
49061 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
49063 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
49064 machine_mode vmode = d->vmode;
49065 unsigned char perm2[4];
49066 rtx op0 = d->op0, dest;
49067 bool ok;
49069 switch (vmode)
49071 case V4DFmode:
49072 case V8SFmode:
49073 /* These are special-cased in sse.md so that we can optionally
49074 use the vbroadcast instruction. They expand to two insns
49075 if the input happens to be in a register. */
49076 gcc_unreachable ();
49078 case V2DFmode:
49079 case V2DImode:
49080 case V4SFmode:
49081 case V4SImode:
49082 /* These are always implementable using standard shuffle patterns. */
49083 gcc_unreachable ();
49085 case V8HImode:
49086 case V16QImode:
49087 /* These can be implemented via interleave. We save one insn by
49088 stopping once we have promoted to V4SImode and then use pshufd. */
49089 if (d->testing_p)
49090 return true;
49093 rtx dest;
49094 rtx (*gen) (rtx, rtx, rtx)
49095 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
49096 : gen_vec_interleave_lowv8hi;
49098 if (elt >= nelt2)
49100 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
49101 : gen_vec_interleave_highv8hi;
49102 elt -= nelt2;
49104 nelt2 /= 2;
49106 dest = gen_reg_rtx (vmode);
49107 emit_insn (gen (dest, op0, op0));
49108 vmode = get_mode_wider_vector (vmode);
49109 op0 = gen_lowpart (vmode, dest);
49111 while (vmode != V4SImode);
49113 memset (perm2, elt, 4);
49114 dest = gen_reg_rtx (V4SImode);
49115 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
49116 gcc_assert (ok);
49117 if (!d->testing_p)
49118 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
49119 return true;
49121 case V64QImode:
49122 case V32QImode:
49123 case V16HImode:
49124 case V8SImode:
49125 case V4DImode:
49126 /* For AVX2 broadcasts of the first element vpbroadcast* or
49127 vpermq should be used by expand_vec_perm_1. */
49128 gcc_assert (!TARGET_AVX2 || d->perm[0]);
49129 return false;
49131 default:
49132 gcc_unreachable ();
49136 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
49137 broadcast permutations. */
49139 static bool
49140 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
49142 unsigned i, elt, nelt = d->nelt;
49144 if (!d->one_operand_p)
49145 return false;
49147 elt = d->perm[0];
49148 for (i = 1; i < nelt; ++i)
49149 if (d->perm[i] != elt)
49150 return false;
49152 return expand_vec_perm_broadcast_1 (d);
49155 /* Implement arbitrary permutations of two V64QImode operands
49156 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
49157 static bool
49158 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
49160 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
49161 return false;
49163 if (d->testing_p)
49164 return true;
49166 struct expand_vec_perm_d ds[2];
49167 rtx rperm[128], vperm, target0, target1;
49168 unsigned int i, nelt;
49169 machine_mode vmode;
49171 nelt = d->nelt;
49172 vmode = V64QImode;
49174 for (i = 0; i < 2; i++)
49176 ds[i] = *d;
49177 ds[i].vmode = V32HImode;
49178 ds[i].nelt = 32;
49179 ds[i].target = gen_reg_rtx (V32HImode);
49180 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
49181 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
49184 /* Prepare permutations such that the first one takes care of
49185 putting the even bytes into the right positions or one higher
49186 positions (ds[0]) and the second one takes care of
49187 putting the odd bytes into the right positions or one below
49188 (ds[1]). */
49190 for (i = 0; i < nelt; i++)
49192 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
49193 if (i & 1)
49195 rperm[i] = constm1_rtx;
49196 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49198 else
49200 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49201 rperm[i + 64] = constm1_rtx;
49205 bool ok = expand_vec_perm_1 (&ds[0]);
49206 gcc_assert (ok);
49207 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
49209 ok = expand_vec_perm_1 (&ds[1]);
49210 gcc_assert (ok);
49211 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
49213 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
49214 vperm = force_reg (vmode, vperm);
49215 target0 = gen_reg_rtx (V64QImode);
49216 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
49218 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
49219 vperm = force_reg (vmode, vperm);
49220 target1 = gen_reg_rtx (V64QImode);
49221 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
49223 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
49224 return true;
49227 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
49228 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
49229 all the shorter instruction sequences. */
49231 static bool
49232 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
49234 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
49235 unsigned int i, nelt, eltsz;
49236 bool used[4];
49238 if (!TARGET_AVX2
49239 || d->one_operand_p
49240 || (d->vmode != V32QImode && d->vmode != V16HImode))
49241 return false;
49243 if (d->testing_p)
49244 return true;
49246 nelt = d->nelt;
49247 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
49249 /* Generate 4 permutation masks. If the required element is within
49250 the same lane, it is shuffled in. If the required element from the
49251 other lane, force a zero by setting bit 7 in the permutation mask.
49252 In the other mask the mask has non-negative elements if element
49253 is requested from the other lane, but also moved to the other lane,
49254 so that the result of vpshufb can have the two V2TImode halves
49255 swapped. */
49256 m128 = GEN_INT (-128);
49257 for (i = 0; i < 32; ++i)
49259 rperm[0][i] = m128;
49260 rperm[1][i] = m128;
49261 rperm[2][i] = m128;
49262 rperm[3][i] = m128;
49264 used[0] = false;
49265 used[1] = false;
49266 used[2] = false;
49267 used[3] = false;
49268 for (i = 0; i < nelt; ++i)
49270 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
49271 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
49272 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
49274 for (j = 0; j < eltsz; ++j)
49275 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
49276 used[which] = true;
49279 for (i = 0; i < 2; ++i)
49281 if (!used[2 * i + 1])
49283 h[i] = NULL_RTX;
49284 continue;
49286 vperm = gen_rtx_CONST_VECTOR (V32QImode,
49287 gen_rtvec_v (32, rperm[2 * i + 1]));
49288 vperm = force_reg (V32QImode, vperm);
49289 h[i] = gen_reg_rtx (V32QImode);
49290 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49291 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
49294 /* Swap the 128-byte lanes of h[X]. */
49295 for (i = 0; i < 2; ++i)
49297 if (h[i] == NULL_RTX)
49298 continue;
49299 op = gen_reg_rtx (V4DImode);
49300 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
49301 const2_rtx, GEN_INT (3), const0_rtx,
49302 const1_rtx));
49303 h[i] = gen_lowpart (V32QImode, op);
49306 for (i = 0; i < 2; ++i)
49308 if (!used[2 * i])
49310 l[i] = NULL_RTX;
49311 continue;
49313 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
49314 vperm = force_reg (V32QImode, vperm);
49315 l[i] = gen_reg_rtx (V32QImode);
49316 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49317 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
49320 for (i = 0; i < 2; ++i)
49322 if (h[i] && l[i])
49324 op = gen_reg_rtx (V32QImode);
49325 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
49326 l[i] = op;
49328 else if (h[i])
49329 l[i] = h[i];
49332 gcc_assert (l[0] && l[1]);
49333 op = d->target;
49334 if (d->vmode != V32QImode)
49335 op = gen_reg_rtx (V32QImode);
49336 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
49337 if (op != d->target)
49338 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
49339 return true;
49342 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
49343 With all of the interface bits taken care of, perform the expansion
49344 in D and return true on success. */
49346 static bool
49347 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
49349 /* Try a single instruction expansion. */
49350 if (expand_vec_perm_1 (d))
49351 return true;
49353 /* Try sequences of two instructions. */
49355 if (expand_vec_perm_pshuflw_pshufhw (d))
49356 return true;
49358 if (expand_vec_perm_palignr (d, false))
49359 return true;
49361 if (expand_vec_perm_interleave2 (d))
49362 return true;
49364 if (expand_vec_perm_broadcast (d))
49365 return true;
49367 if (expand_vec_perm_vpermq_perm_1 (d))
49368 return true;
49370 if (expand_vec_perm_vperm2f128 (d))
49371 return true;
49373 if (expand_vec_perm_pblendv (d))
49374 return true;
49376 /* Try sequences of three instructions. */
49378 if (expand_vec_perm_even_odd_pack (d))
49379 return true;
49381 if (expand_vec_perm_2vperm2f128_vshuf (d))
49382 return true;
49384 if (expand_vec_perm_pshufb2 (d))
49385 return true;
49387 if (expand_vec_perm_interleave3 (d))
49388 return true;
49390 if (expand_vec_perm_vperm2f128_vblend (d))
49391 return true;
49393 /* Try sequences of four instructions. */
49395 if (expand_vec_perm_vpshufb2_vpermq (d))
49396 return true;
49398 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
49399 return true;
49401 if (expand_vec_perm_vpermi2_vpshub2 (d))
49402 return true;
49404 /* ??? Look for narrow permutations whose element orderings would
49405 allow the promotion to a wider mode. */
49407 /* ??? Look for sequences of interleave or a wider permute that place
49408 the data into the correct lanes for a half-vector shuffle like
49409 pshuf[lh]w or vpermilps. */
49411 /* ??? Look for sequences of interleave that produce the desired results.
49412 The combinatorics of punpck[lh] get pretty ugly... */
49414 if (expand_vec_perm_even_odd (d))
49415 return true;
49417 /* Even longer sequences. */
49418 if (expand_vec_perm_vpshufb4_vpermq2 (d))
49419 return true;
49421 return false;
49424 /* If a permutation only uses one operand, make it clear. Returns true
49425 if the permutation references both operands. */
49427 static bool
49428 canonicalize_perm (struct expand_vec_perm_d *d)
49430 int i, which, nelt = d->nelt;
49432 for (i = which = 0; i < nelt; ++i)
49433 which |= (d->perm[i] < nelt ? 1 : 2);
49435 d->one_operand_p = true;
49436 switch (which)
49438 default:
49439 gcc_unreachable();
49441 case 3:
49442 if (!rtx_equal_p (d->op0, d->op1))
49444 d->one_operand_p = false;
49445 break;
49447 /* The elements of PERM do not suggest that only the first operand
49448 is used, but both operands are identical. Allow easier matching
49449 of the permutation by folding the permutation into the single
49450 input vector. */
49451 /* FALLTHRU */
49453 case 2:
49454 for (i = 0; i < nelt; ++i)
49455 d->perm[i] &= nelt - 1;
49456 d->op0 = d->op1;
49457 break;
49459 case 1:
49460 d->op1 = d->op0;
49461 break;
49464 return (which == 3);
49467 bool
49468 ix86_expand_vec_perm_const (rtx operands[4])
49470 struct expand_vec_perm_d d;
49471 unsigned char perm[MAX_VECT_LEN];
49472 int i, nelt;
49473 bool two_args;
49474 rtx sel;
49476 d.target = operands[0];
49477 d.op0 = operands[1];
49478 d.op1 = operands[2];
49479 sel = operands[3];
49481 d.vmode = GET_MODE (d.target);
49482 gcc_assert (VECTOR_MODE_P (d.vmode));
49483 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49484 d.testing_p = false;
49486 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
49487 gcc_assert (XVECLEN (sel, 0) == nelt);
49488 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
49490 for (i = 0; i < nelt; ++i)
49492 rtx e = XVECEXP (sel, 0, i);
49493 int ei = INTVAL (e) & (2 * nelt - 1);
49494 d.perm[i] = ei;
49495 perm[i] = ei;
49498 two_args = canonicalize_perm (&d);
49500 if (ix86_expand_vec_perm_const_1 (&d))
49501 return true;
49503 /* If the selector says both arguments are needed, but the operands are the
49504 same, the above tried to expand with one_operand_p and flattened selector.
49505 If that didn't work, retry without one_operand_p; we succeeded with that
49506 during testing. */
49507 if (two_args && d.one_operand_p)
49509 d.one_operand_p = false;
49510 memcpy (d.perm, perm, sizeof (perm));
49511 return ix86_expand_vec_perm_const_1 (&d);
49514 return false;
49517 /* Implement targetm.vectorize.vec_perm_const_ok. */
49519 static bool
49520 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
49521 const unsigned char *sel)
49523 struct expand_vec_perm_d d;
49524 unsigned int i, nelt, which;
49525 bool ret;
49527 d.vmode = vmode;
49528 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49529 d.testing_p = true;
49531 /* Given sufficient ISA support we can just return true here
49532 for selected vector modes. */
49533 switch (d.vmode)
49535 case V16SFmode:
49536 case V16SImode:
49537 case V8DImode:
49538 case V8DFmode:
49539 if (TARGET_AVX512F)
49540 /* All implementable with a single vpermi2 insn. */
49541 return true;
49542 break;
49543 case V32HImode:
49544 if (TARGET_AVX512BW)
49545 /* All implementable with a single vpermi2 insn. */
49546 return true;
49547 break;
49548 case V64QImode:
49549 if (TARGET_AVX512BW)
49550 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
49551 return true;
49552 break;
49553 case V8SImode:
49554 case V8SFmode:
49555 case V4DFmode:
49556 case V4DImode:
49557 if (TARGET_AVX512VL)
49558 /* All implementable with a single vpermi2 insn. */
49559 return true;
49560 break;
49561 case V16HImode:
49562 if (TARGET_AVX2)
49563 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49564 return true;
49565 break;
49566 case V32QImode:
49567 if (TARGET_AVX2)
49568 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49569 return true;
49570 break;
49571 case V4SImode:
49572 case V4SFmode:
49573 case V8HImode:
49574 case V16QImode:
49575 /* All implementable with a single vpperm insn. */
49576 if (TARGET_XOP)
49577 return true;
49578 /* All implementable with 2 pshufb + 1 ior. */
49579 if (TARGET_SSSE3)
49580 return true;
49581 break;
49582 case V2DImode:
49583 case V2DFmode:
49584 /* All implementable with shufpd or unpck[lh]pd. */
49585 return true;
49586 default:
49587 return false;
49590 /* Extract the values from the vector CST into the permutation
49591 array in D. */
49592 memcpy (d.perm, sel, nelt);
49593 for (i = which = 0; i < nelt; ++i)
49595 unsigned char e = d.perm[i];
49596 gcc_assert (e < 2 * nelt);
49597 which |= (e < nelt ? 1 : 2);
49600 /* For all elements from second vector, fold the elements to first. */
49601 if (which == 2)
49602 for (i = 0; i < nelt; ++i)
49603 d.perm[i] -= nelt;
49605 /* Check whether the mask can be applied to the vector type. */
49606 d.one_operand_p = (which != 3);
49608 /* Implementable with shufps or pshufd. */
49609 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49610 return true;
49612 /* Otherwise we have to go through the motions and see if we can
49613 figure out how to generate the requested permutation. */
49614 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49615 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49616 if (!d.one_operand_p)
49617 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49619 start_sequence ();
49620 ret = ix86_expand_vec_perm_const_1 (&d);
49621 end_sequence ();
49623 return ret;
49626 void
49627 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49629 struct expand_vec_perm_d d;
49630 unsigned i, nelt;
49632 d.target = targ;
49633 d.op0 = op0;
49634 d.op1 = op1;
49635 d.vmode = GET_MODE (targ);
49636 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49637 d.one_operand_p = false;
49638 d.testing_p = false;
49640 for (i = 0; i < nelt; ++i)
49641 d.perm[i] = i * 2 + odd;
49643 /* We'll either be able to implement the permutation directly... */
49644 if (expand_vec_perm_1 (&d))
49645 return;
49647 /* ... or we use the special-case patterns. */
49648 expand_vec_perm_even_odd_1 (&d, odd);
49651 static void
49652 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49654 struct expand_vec_perm_d d;
49655 unsigned i, nelt, base;
49656 bool ok;
49658 d.target = targ;
49659 d.op0 = op0;
49660 d.op1 = op1;
49661 d.vmode = GET_MODE (targ);
49662 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49663 d.one_operand_p = false;
49664 d.testing_p = false;
49666 base = high_p ? nelt / 2 : 0;
49667 for (i = 0; i < nelt / 2; ++i)
49669 d.perm[i * 2] = i + base;
49670 d.perm[i * 2 + 1] = i + base + nelt;
49673 /* Note that for AVX this isn't one instruction. */
49674 ok = ix86_expand_vec_perm_const_1 (&d);
49675 gcc_assert (ok);
49679 /* Expand a vector operation CODE for a V*QImode in terms of the
49680 same operation on V*HImode. */
49682 void
49683 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49685 machine_mode qimode = GET_MODE (dest);
49686 machine_mode himode;
49687 rtx (*gen_il) (rtx, rtx, rtx);
49688 rtx (*gen_ih) (rtx, rtx, rtx);
49689 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49690 struct expand_vec_perm_d d;
49691 bool ok, full_interleave;
49692 bool uns_p = false;
49693 int i;
49695 switch (qimode)
49697 case V16QImode:
49698 himode = V8HImode;
49699 gen_il = gen_vec_interleave_lowv16qi;
49700 gen_ih = gen_vec_interleave_highv16qi;
49701 break;
49702 case V32QImode:
49703 himode = V16HImode;
49704 gen_il = gen_avx2_interleave_lowv32qi;
49705 gen_ih = gen_avx2_interleave_highv32qi;
49706 break;
49707 case V64QImode:
49708 himode = V32HImode;
49709 gen_il = gen_avx512bw_interleave_lowv64qi;
49710 gen_ih = gen_avx512bw_interleave_highv64qi;
49711 break;
49712 default:
49713 gcc_unreachable ();
49716 op2_l = op2_h = op2;
49717 switch (code)
49719 case MULT:
49720 /* Unpack data such that we've got a source byte in each low byte of
49721 each word. We don't care what goes into the high byte of each word.
49722 Rather than trying to get zero in there, most convenient is to let
49723 it be a copy of the low byte. */
49724 op2_l = gen_reg_rtx (qimode);
49725 op2_h = gen_reg_rtx (qimode);
49726 emit_insn (gen_il (op2_l, op2, op2));
49727 emit_insn (gen_ih (op2_h, op2, op2));
49728 /* FALLTHRU */
49730 op1_l = gen_reg_rtx (qimode);
49731 op1_h = gen_reg_rtx (qimode);
49732 emit_insn (gen_il (op1_l, op1, op1));
49733 emit_insn (gen_ih (op1_h, op1, op1));
49734 full_interleave = qimode == V16QImode;
49735 break;
49737 case ASHIFT:
49738 case LSHIFTRT:
49739 uns_p = true;
49740 /* FALLTHRU */
49741 case ASHIFTRT:
49742 op1_l = gen_reg_rtx (himode);
49743 op1_h = gen_reg_rtx (himode);
49744 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
49745 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
49746 full_interleave = true;
49747 break;
49748 default:
49749 gcc_unreachable ();
49752 /* Perform the operation. */
49753 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
49754 1, OPTAB_DIRECT);
49755 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
49756 1, OPTAB_DIRECT);
49757 gcc_assert (res_l && res_h);
49759 /* Merge the data back into the right place. */
49760 d.target = dest;
49761 d.op0 = gen_lowpart (qimode, res_l);
49762 d.op1 = gen_lowpart (qimode, res_h);
49763 d.vmode = qimode;
49764 d.nelt = GET_MODE_NUNITS (qimode);
49765 d.one_operand_p = false;
49766 d.testing_p = false;
49768 if (full_interleave)
49770 /* For SSE2, we used an full interleave, so the desired
49771 results are in the even elements. */
49772 for (i = 0; i < 64; ++i)
49773 d.perm[i] = i * 2;
49775 else
49777 /* For AVX, the interleave used above was not cross-lane. So the
49778 extraction is evens but with the second and third quarter swapped.
49779 Happily, that is even one insn shorter than even extraction. */
49780 for (i = 0; i < 64; ++i)
49781 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
49784 ok = ix86_expand_vec_perm_const_1 (&d);
49785 gcc_assert (ok);
49787 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49788 gen_rtx_fmt_ee (code, qimode, op1, op2));
49791 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
49792 if op is CONST_VECTOR with all odd elements equal to their
49793 preceding element. */
49795 static bool
49796 const_vector_equal_evenodd_p (rtx op)
49798 machine_mode mode = GET_MODE (op);
49799 int i, nunits = GET_MODE_NUNITS (mode);
49800 if (GET_CODE (op) != CONST_VECTOR
49801 || nunits != CONST_VECTOR_NUNITS (op))
49802 return false;
49803 for (i = 0; i < nunits; i += 2)
49804 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
49805 return false;
49806 return true;
49809 void
49810 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
49811 bool uns_p, bool odd_p)
49813 machine_mode mode = GET_MODE (op1);
49814 machine_mode wmode = GET_MODE (dest);
49815 rtx x;
49816 rtx orig_op1 = op1, orig_op2 = op2;
49818 if (!nonimmediate_operand (op1, mode))
49819 op1 = force_reg (mode, op1);
49820 if (!nonimmediate_operand (op2, mode))
49821 op2 = force_reg (mode, op2);
49823 /* We only play even/odd games with vectors of SImode. */
49824 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
49826 /* If we're looking for the odd results, shift those members down to
49827 the even slots. For some cpus this is faster than a PSHUFD. */
49828 if (odd_p)
49830 /* For XOP use vpmacsdqh, but only for smult, as it is only
49831 signed. */
49832 if (TARGET_XOP && mode == V4SImode && !uns_p)
49834 x = force_reg (wmode, CONST0_RTX (wmode));
49835 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
49836 return;
49839 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
49840 if (!const_vector_equal_evenodd_p (orig_op1))
49841 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
49842 x, NULL, 1, OPTAB_DIRECT);
49843 if (!const_vector_equal_evenodd_p (orig_op2))
49844 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
49845 x, NULL, 1, OPTAB_DIRECT);
49846 op1 = gen_lowpart (mode, op1);
49847 op2 = gen_lowpart (mode, op2);
49850 if (mode == V16SImode)
49852 if (uns_p)
49853 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
49854 else
49855 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
49857 else if (mode == V8SImode)
49859 if (uns_p)
49860 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
49861 else
49862 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
49864 else if (uns_p)
49865 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
49866 else if (TARGET_SSE4_1)
49867 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
49868 else
49870 rtx s1, s2, t0, t1, t2;
49872 /* The easiest way to implement this without PMULDQ is to go through
49873 the motions as if we are performing a full 64-bit multiply. With
49874 the exception that we need to do less shuffling of the elements. */
49876 /* Compute the sign-extension, aka highparts, of the two operands. */
49877 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49878 op1, pc_rtx, pc_rtx);
49879 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49880 op2, pc_rtx, pc_rtx);
49882 /* Multiply LO(A) * HI(B), and vice-versa. */
49883 t1 = gen_reg_rtx (wmode);
49884 t2 = gen_reg_rtx (wmode);
49885 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
49886 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
49888 /* Multiply LO(A) * LO(B). */
49889 t0 = gen_reg_rtx (wmode);
49890 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
49892 /* Combine and shift the highparts into place. */
49893 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
49894 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
49895 1, OPTAB_DIRECT);
49897 /* Combine high and low parts. */
49898 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
49899 return;
49901 emit_insn (x);
49904 void
49905 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
49906 bool uns_p, bool high_p)
49908 machine_mode wmode = GET_MODE (dest);
49909 machine_mode mode = GET_MODE (op1);
49910 rtx t1, t2, t3, t4, mask;
49912 switch (mode)
49914 case V4SImode:
49915 t1 = gen_reg_rtx (mode);
49916 t2 = gen_reg_rtx (mode);
49917 if (TARGET_XOP && !uns_p)
49919 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
49920 shuffle the elements once so that all elements are in the right
49921 place for immediate use: { A C B D }. */
49922 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
49923 const1_rtx, GEN_INT (3)));
49924 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
49925 const1_rtx, GEN_INT (3)));
49927 else
49929 /* Put the elements into place for the multiply. */
49930 ix86_expand_vec_interleave (t1, op1, op1, high_p);
49931 ix86_expand_vec_interleave (t2, op2, op2, high_p);
49932 high_p = false;
49934 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
49935 break;
49937 case V8SImode:
49938 /* Shuffle the elements between the lanes. After this we
49939 have { A B E F | C D G H } for each operand. */
49940 t1 = gen_reg_rtx (V4DImode);
49941 t2 = gen_reg_rtx (V4DImode);
49942 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
49943 const0_rtx, const2_rtx,
49944 const1_rtx, GEN_INT (3)));
49945 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
49946 const0_rtx, const2_rtx,
49947 const1_rtx, GEN_INT (3)));
49949 /* Shuffle the elements within the lanes. After this we
49950 have { A A B B | C C D D } or { E E F F | G G H H }. */
49951 t3 = gen_reg_rtx (V8SImode);
49952 t4 = gen_reg_rtx (V8SImode);
49953 mask = GEN_INT (high_p
49954 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
49955 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
49956 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
49957 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
49959 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
49960 break;
49962 case V8HImode:
49963 case V16HImode:
49964 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
49965 uns_p, OPTAB_DIRECT);
49966 t2 = expand_binop (mode,
49967 uns_p ? umul_highpart_optab : smul_highpart_optab,
49968 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
49969 gcc_assert (t1 && t2);
49971 t3 = gen_reg_rtx (mode);
49972 ix86_expand_vec_interleave (t3, t1, t2, high_p);
49973 emit_move_insn (dest, gen_lowpart (wmode, t3));
49974 break;
49976 case V16QImode:
49977 case V32QImode:
49978 case V32HImode:
49979 case V16SImode:
49980 case V64QImode:
49981 t1 = gen_reg_rtx (wmode);
49982 t2 = gen_reg_rtx (wmode);
49983 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
49984 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
49986 emit_insn (gen_rtx_SET (dest, gen_rtx_MULT (wmode, t1, t2)));
49987 break;
49989 default:
49990 gcc_unreachable ();
49994 void
49995 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
49997 rtx res_1, res_2, res_3, res_4;
49999 res_1 = gen_reg_rtx (V4SImode);
50000 res_2 = gen_reg_rtx (V4SImode);
50001 res_3 = gen_reg_rtx (V2DImode);
50002 res_4 = gen_reg_rtx (V2DImode);
50003 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
50004 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
50006 /* Move the results in element 2 down to element 1; we don't care
50007 what goes in elements 2 and 3. Then we can merge the parts
50008 back together with an interleave.
50010 Note that two other sequences were tried:
50011 (1) Use interleaves at the start instead of psrldq, which allows
50012 us to use a single shufps to merge things back at the end.
50013 (2) Use shufps here to combine the two vectors, then pshufd to
50014 put the elements in the correct order.
50015 In both cases the cost of the reformatting stall was too high
50016 and the overall sequence slower. */
50018 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
50019 const0_rtx, const2_rtx,
50020 const0_rtx, const0_rtx));
50021 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
50022 const0_rtx, const2_rtx,
50023 const0_rtx, const0_rtx));
50024 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
50026 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
50029 void
50030 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
50032 machine_mode mode = GET_MODE (op0);
50033 rtx t1, t2, t3, t4, t5, t6;
50035 if (TARGET_AVX512DQ && mode == V8DImode)
50036 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
50037 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
50038 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
50039 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
50040 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
50041 else if (TARGET_XOP && mode == V2DImode)
50043 /* op1: A,B,C,D, op2: E,F,G,H */
50044 op1 = gen_lowpart (V4SImode, op1);
50045 op2 = gen_lowpart (V4SImode, op2);
50047 t1 = gen_reg_rtx (V4SImode);
50048 t2 = gen_reg_rtx (V4SImode);
50049 t3 = gen_reg_rtx (V2DImode);
50050 t4 = gen_reg_rtx (V2DImode);
50052 /* t1: B,A,D,C */
50053 emit_insn (gen_sse2_pshufd_1 (t1, op1,
50054 GEN_INT (1),
50055 GEN_INT (0),
50056 GEN_INT (3),
50057 GEN_INT (2)));
50059 /* t2: (B*E),(A*F),(D*G),(C*H) */
50060 emit_insn (gen_mulv4si3 (t2, t1, op2));
50062 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
50063 emit_insn (gen_xop_phadddq (t3, t2));
50065 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
50066 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
50068 /* Multiply lower parts and add all */
50069 t5 = gen_reg_rtx (V2DImode);
50070 emit_insn (gen_vec_widen_umult_even_v4si (t5,
50071 gen_lowpart (V4SImode, op1),
50072 gen_lowpart (V4SImode, op2)));
50073 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
50076 else
50078 machine_mode nmode;
50079 rtx (*umul) (rtx, rtx, rtx);
50081 if (mode == V2DImode)
50083 umul = gen_vec_widen_umult_even_v4si;
50084 nmode = V4SImode;
50086 else if (mode == V4DImode)
50088 umul = gen_vec_widen_umult_even_v8si;
50089 nmode = V8SImode;
50091 else if (mode == V8DImode)
50093 umul = gen_vec_widen_umult_even_v16si;
50094 nmode = V16SImode;
50096 else
50097 gcc_unreachable ();
50100 /* Multiply low parts. */
50101 t1 = gen_reg_rtx (mode);
50102 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
50104 /* Shift input vectors right 32 bits so we can multiply high parts. */
50105 t6 = GEN_INT (32);
50106 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
50107 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
50109 /* Multiply high parts by low parts. */
50110 t4 = gen_reg_rtx (mode);
50111 t5 = gen_reg_rtx (mode);
50112 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
50113 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
50115 /* Combine and shift the highparts back. */
50116 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
50117 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
50119 /* Combine high and low parts. */
50120 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
50123 set_unique_reg_note (get_last_insn (), REG_EQUAL,
50124 gen_rtx_MULT (mode, op1, op2));
50127 /* Return 1 if control tansfer instruction INSN
50128 should be encoded with bnd prefix.
50129 If insn is NULL then return 1 when control
50130 transfer instructions should be prefixed with
50131 bnd by default for current function. */
50133 bool
50134 ix86_bnd_prefixed_insn_p (rtx insn)
50136 /* For call insns check special flag. */
50137 if (insn && CALL_P (insn))
50139 rtx call = get_call_rtx_from (insn);
50140 if (call)
50141 return CALL_EXPR_WITH_BOUNDS_P (call);
50144 /* All other insns are prefixed only if function is instrumented. */
50145 return chkp_function_instrumented_p (current_function_decl);
50148 /* Calculate integer abs() using only SSE2 instructions. */
50150 void
50151 ix86_expand_sse2_abs (rtx target, rtx input)
50153 machine_mode mode = GET_MODE (target);
50154 rtx tmp0, tmp1, x;
50156 switch (mode)
50158 /* For 32-bit signed integer X, the best way to calculate the absolute
50159 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
50160 case V4SImode:
50161 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
50162 GEN_INT (GET_MODE_BITSIZE
50163 (GET_MODE_INNER (mode)) - 1),
50164 NULL, 0, OPTAB_DIRECT);
50165 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
50166 NULL, 0, OPTAB_DIRECT);
50167 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
50168 target, 0, OPTAB_DIRECT);
50169 break;
50171 /* For 16-bit signed integer X, the best way to calculate the absolute
50172 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
50173 case V8HImode:
50174 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50176 x = expand_simple_binop (mode, SMAX, tmp0, input,
50177 target, 0, OPTAB_DIRECT);
50178 break;
50180 /* For 8-bit signed integer X, the best way to calculate the absolute
50181 value of X is min ((unsigned char) X, (unsigned char) (-X)),
50182 as SSE2 provides the PMINUB insn. */
50183 case V16QImode:
50184 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50186 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
50187 target, 0, OPTAB_DIRECT);
50188 break;
50190 default:
50191 gcc_unreachable ();
50194 if (x != target)
50195 emit_move_insn (target, x);
50198 /* Expand an insert into a vector register through pinsr insn.
50199 Return true if successful. */
50201 bool
50202 ix86_expand_pinsr (rtx *operands)
50204 rtx dst = operands[0];
50205 rtx src = operands[3];
50207 unsigned int size = INTVAL (operands[1]);
50208 unsigned int pos = INTVAL (operands[2]);
50210 if (GET_CODE (dst) == SUBREG)
50212 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
50213 dst = SUBREG_REG (dst);
50216 if (GET_CODE (src) == SUBREG)
50217 src = SUBREG_REG (src);
50219 switch (GET_MODE (dst))
50221 case V16QImode:
50222 case V8HImode:
50223 case V4SImode:
50224 case V2DImode:
50226 machine_mode srcmode, dstmode;
50227 rtx (*pinsr)(rtx, rtx, rtx, rtx);
50229 srcmode = mode_for_size (size, MODE_INT, 0);
50231 switch (srcmode)
50233 case QImode:
50234 if (!TARGET_SSE4_1)
50235 return false;
50236 dstmode = V16QImode;
50237 pinsr = gen_sse4_1_pinsrb;
50238 break;
50240 case HImode:
50241 if (!TARGET_SSE2)
50242 return false;
50243 dstmode = V8HImode;
50244 pinsr = gen_sse2_pinsrw;
50245 break;
50247 case SImode:
50248 if (!TARGET_SSE4_1)
50249 return false;
50250 dstmode = V4SImode;
50251 pinsr = gen_sse4_1_pinsrd;
50252 break;
50254 case DImode:
50255 gcc_assert (TARGET_64BIT);
50256 if (!TARGET_SSE4_1)
50257 return false;
50258 dstmode = V2DImode;
50259 pinsr = gen_sse4_1_pinsrq;
50260 break;
50262 default:
50263 return false;
50266 rtx d = dst;
50267 if (GET_MODE (dst) != dstmode)
50268 d = gen_reg_rtx (dstmode);
50269 src = gen_lowpart (srcmode, src);
50271 pos /= size;
50273 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
50274 GEN_INT (1 << pos)));
50275 if (d != dst)
50276 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
50277 return true;
50280 default:
50281 return false;
50285 /* This function returns the calling abi specific va_list type node.
50286 It returns the FNDECL specific va_list type. */
50288 static tree
50289 ix86_fn_abi_va_list (tree fndecl)
50291 if (!TARGET_64BIT)
50292 return va_list_type_node;
50293 gcc_assert (fndecl != NULL_TREE);
50295 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
50296 return ms_va_list_type_node;
50297 else
50298 return sysv_va_list_type_node;
50301 /* Returns the canonical va_list type specified by TYPE. If there
50302 is no valid TYPE provided, it return NULL_TREE. */
50304 static tree
50305 ix86_canonical_va_list_type (tree type)
50307 tree wtype, htype;
50309 /* Resolve references and pointers to va_list type. */
50310 if (TREE_CODE (type) == MEM_REF)
50311 type = TREE_TYPE (type);
50312 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
50313 type = TREE_TYPE (type);
50314 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
50315 type = TREE_TYPE (type);
50317 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
50319 wtype = va_list_type_node;
50320 gcc_assert (wtype != NULL_TREE);
50321 htype = type;
50322 if (TREE_CODE (wtype) == ARRAY_TYPE)
50324 /* If va_list is an array type, the argument may have decayed
50325 to a pointer type, e.g. by being passed to another function.
50326 In that case, unwrap both types so that we can compare the
50327 underlying records. */
50328 if (TREE_CODE (htype) == ARRAY_TYPE
50329 || POINTER_TYPE_P (htype))
50331 wtype = TREE_TYPE (wtype);
50332 htype = TREE_TYPE (htype);
50335 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50336 return va_list_type_node;
50337 wtype = sysv_va_list_type_node;
50338 gcc_assert (wtype != NULL_TREE);
50339 htype = type;
50340 if (TREE_CODE (wtype) == ARRAY_TYPE)
50342 /* If va_list is an array type, the argument may have decayed
50343 to a pointer type, e.g. by being passed to another function.
50344 In that case, unwrap both types so that we can compare the
50345 underlying records. */
50346 if (TREE_CODE (htype) == ARRAY_TYPE
50347 || POINTER_TYPE_P (htype))
50349 wtype = TREE_TYPE (wtype);
50350 htype = TREE_TYPE (htype);
50353 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50354 return sysv_va_list_type_node;
50355 wtype = ms_va_list_type_node;
50356 gcc_assert (wtype != NULL_TREE);
50357 htype = type;
50358 if (TREE_CODE (wtype) == ARRAY_TYPE)
50360 /* If va_list is an array type, the argument may have decayed
50361 to a pointer type, e.g. by being passed to another function.
50362 In that case, unwrap both types so that we can compare the
50363 underlying records. */
50364 if (TREE_CODE (htype) == ARRAY_TYPE
50365 || POINTER_TYPE_P (htype))
50367 wtype = TREE_TYPE (wtype);
50368 htype = TREE_TYPE (htype);
50371 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50372 return ms_va_list_type_node;
50373 return NULL_TREE;
50375 return std_canonical_va_list_type (type);
50378 /* Iterate through the target-specific builtin types for va_list.
50379 IDX denotes the iterator, *PTREE is set to the result type of
50380 the va_list builtin, and *PNAME to its internal type.
50381 Returns zero if there is no element for this index, otherwise
50382 IDX should be increased upon the next call.
50383 Note, do not iterate a base builtin's name like __builtin_va_list.
50384 Used from c_common_nodes_and_builtins. */
50386 static int
50387 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
50389 if (TARGET_64BIT)
50391 switch (idx)
50393 default:
50394 break;
50396 case 0:
50397 *ptree = ms_va_list_type_node;
50398 *pname = "__builtin_ms_va_list";
50399 return 1;
50401 case 1:
50402 *ptree = sysv_va_list_type_node;
50403 *pname = "__builtin_sysv_va_list";
50404 return 1;
50408 return 0;
50411 #undef TARGET_SCHED_DISPATCH
50412 #define TARGET_SCHED_DISPATCH has_dispatch
50413 #undef TARGET_SCHED_DISPATCH_DO
50414 #define TARGET_SCHED_DISPATCH_DO do_dispatch
50415 #undef TARGET_SCHED_REASSOCIATION_WIDTH
50416 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50417 #undef TARGET_SCHED_REORDER
50418 #define TARGET_SCHED_REORDER ix86_sched_reorder
50419 #undef TARGET_SCHED_ADJUST_PRIORITY
50420 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50421 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50422 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50423 ix86_dependencies_evaluation_hook
50425 /* The size of the dispatch window is the total number of bytes of
50426 object code allowed in a window. */
50427 #define DISPATCH_WINDOW_SIZE 16
50429 /* Number of dispatch windows considered for scheduling. */
50430 #define MAX_DISPATCH_WINDOWS 3
50432 /* Maximum number of instructions in a window. */
50433 #define MAX_INSN 4
50435 /* Maximum number of immediate operands in a window. */
50436 #define MAX_IMM 4
50438 /* Maximum number of immediate bits allowed in a window. */
50439 #define MAX_IMM_SIZE 128
50441 /* Maximum number of 32 bit immediates allowed in a window. */
50442 #define MAX_IMM_32 4
50444 /* Maximum number of 64 bit immediates allowed in a window. */
50445 #define MAX_IMM_64 2
50447 /* Maximum total of loads or prefetches allowed in a window. */
50448 #define MAX_LOAD 2
50450 /* Maximum total of stores allowed in a window. */
50451 #define MAX_STORE 1
50453 #undef BIG
50454 #define BIG 100
50457 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
50458 enum dispatch_group {
50459 disp_no_group = 0,
50460 disp_load,
50461 disp_store,
50462 disp_load_store,
50463 disp_prefetch,
50464 disp_imm,
50465 disp_imm_32,
50466 disp_imm_64,
50467 disp_branch,
50468 disp_cmp,
50469 disp_jcc,
50470 disp_last
50473 /* Number of allowable groups in a dispatch window. It is an array
50474 indexed by dispatch_group enum. 100 is used as a big number,
50475 because the number of these kind of operations does not have any
50476 effect in dispatch window, but we need them for other reasons in
50477 the table. */
50478 static unsigned int num_allowable_groups[disp_last] = {
50479 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
50482 char group_name[disp_last + 1][16] = {
50483 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
50484 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
50485 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
50488 /* Instruction path. */
50489 enum insn_path {
50490 no_path = 0,
50491 path_single, /* Single micro op. */
50492 path_double, /* Double micro op. */
50493 path_multi, /* Instructions with more than 2 micro op.. */
50494 last_path
50497 /* sched_insn_info defines a window to the instructions scheduled in
50498 the basic block. It contains a pointer to the insn_info table and
50499 the instruction scheduled.
50501 Windows are allocated for each basic block and are linked
50502 together. */
50503 typedef struct sched_insn_info_s {
50504 rtx insn;
50505 enum dispatch_group group;
50506 enum insn_path path;
50507 int byte_len;
50508 int imm_bytes;
50509 } sched_insn_info;
50511 /* Linked list of dispatch windows. This is a two way list of
50512 dispatch windows of a basic block. It contains information about
50513 the number of uops in the window and the total number of
50514 instructions and of bytes in the object code for this dispatch
50515 window. */
50516 typedef struct dispatch_windows_s {
50517 int num_insn; /* Number of insn in the window. */
50518 int num_uops; /* Number of uops in the window. */
50519 int window_size; /* Number of bytes in the window. */
50520 int window_num; /* Window number between 0 or 1. */
50521 int num_imm; /* Number of immediates in an insn. */
50522 int num_imm_32; /* Number of 32 bit immediates in an insn. */
50523 int num_imm_64; /* Number of 64 bit immediates in an insn. */
50524 int imm_size; /* Total immediates in the window. */
50525 int num_loads; /* Total memory loads in the window. */
50526 int num_stores; /* Total memory stores in the window. */
50527 int violation; /* Violation exists in window. */
50528 sched_insn_info *window; /* Pointer to the window. */
50529 struct dispatch_windows_s *next;
50530 struct dispatch_windows_s *prev;
50531 } dispatch_windows;
50533 /* Immediate valuse used in an insn. */
50534 typedef struct imm_info_s
50536 int imm;
50537 int imm32;
50538 int imm64;
50539 } imm_info;
50541 static dispatch_windows *dispatch_window_list;
50542 static dispatch_windows *dispatch_window_list1;
50544 /* Get dispatch group of insn. */
50546 static enum dispatch_group
50547 get_mem_group (rtx_insn *insn)
50549 enum attr_memory memory;
50551 if (INSN_CODE (insn) < 0)
50552 return disp_no_group;
50553 memory = get_attr_memory (insn);
50554 if (memory == MEMORY_STORE)
50555 return disp_store;
50557 if (memory == MEMORY_LOAD)
50558 return disp_load;
50560 if (memory == MEMORY_BOTH)
50561 return disp_load_store;
50563 return disp_no_group;
50566 /* Return true if insn is a compare instruction. */
50568 static bool
50569 is_cmp (rtx_insn *insn)
50571 enum attr_type type;
50573 type = get_attr_type (insn);
50574 return (type == TYPE_TEST
50575 || type == TYPE_ICMP
50576 || type == TYPE_FCMP
50577 || GET_CODE (PATTERN (insn)) == COMPARE);
50580 /* Return true if a dispatch violation encountered. */
50582 static bool
50583 dispatch_violation (void)
50585 if (dispatch_window_list->next)
50586 return dispatch_window_list->next->violation;
50587 return dispatch_window_list->violation;
50590 /* Return true if insn is a branch instruction. */
50592 static bool
50593 is_branch (rtx_insn *insn)
50595 return (CALL_P (insn) || JUMP_P (insn));
50598 /* Return true if insn is a prefetch instruction. */
50600 static bool
50601 is_prefetch (rtx_insn *insn)
50603 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
50606 /* This function initializes a dispatch window and the list container holding a
50607 pointer to the window. */
50609 static void
50610 init_window (int window_num)
50612 int i;
50613 dispatch_windows *new_list;
50615 if (window_num == 0)
50616 new_list = dispatch_window_list;
50617 else
50618 new_list = dispatch_window_list1;
50620 new_list->num_insn = 0;
50621 new_list->num_uops = 0;
50622 new_list->window_size = 0;
50623 new_list->next = NULL;
50624 new_list->prev = NULL;
50625 new_list->window_num = window_num;
50626 new_list->num_imm = 0;
50627 new_list->num_imm_32 = 0;
50628 new_list->num_imm_64 = 0;
50629 new_list->imm_size = 0;
50630 new_list->num_loads = 0;
50631 new_list->num_stores = 0;
50632 new_list->violation = false;
50634 for (i = 0; i < MAX_INSN; i++)
50636 new_list->window[i].insn = NULL;
50637 new_list->window[i].group = disp_no_group;
50638 new_list->window[i].path = no_path;
50639 new_list->window[i].byte_len = 0;
50640 new_list->window[i].imm_bytes = 0;
50642 return;
50645 /* This function allocates and initializes a dispatch window and the
50646 list container holding a pointer to the window. */
50648 static dispatch_windows *
50649 allocate_window (void)
50651 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
50652 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
50654 return new_list;
50657 /* This routine initializes the dispatch scheduling information. It
50658 initiates building dispatch scheduler tables and constructs the
50659 first dispatch window. */
50661 static void
50662 init_dispatch_sched (void)
50664 /* Allocate a dispatch list and a window. */
50665 dispatch_window_list = allocate_window ();
50666 dispatch_window_list1 = allocate_window ();
50667 init_window (0);
50668 init_window (1);
50671 /* This function returns true if a branch is detected. End of a basic block
50672 does not have to be a branch, but here we assume only branches end a
50673 window. */
50675 static bool
50676 is_end_basic_block (enum dispatch_group group)
50678 return group == disp_branch;
50681 /* This function is called when the end of a window processing is reached. */
50683 static void
50684 process_end_window (void)
50686 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
50687 if (dispatch_window_list->next)
50689 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
50690 gcc_assert (dispatch_window_list->window_size
50691 + dispatch_window_list1->window_size <= 48);
50692 init_window (1);
50694 init_window (0);
50697 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
50698 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
50699 for 48 bytes of instructions. Note that these windows are not dispatch
50700 windows that their sizes are DISPATCH_WINDOW_SIZE. */
50702 static dispatch_windows *
50703 allocate_next_window (int window_num)
50705 if (window_num == 0)
50707 if (dispatch_window_list->next)
50708 init_window (1);
50709 init_window (0);
50710 return dispatch_window_list;
50713 dispatch_window_list->next = dispatch_window_list1;
50714 dispatch_window_list1->prev = dispatch_window_list;
50716 return dispatch_window_list1;
50719 /* Compute number of immediate operands of an instruction. */
50721 static void
50722 find_constant (rtx in_rtx, imm_info *imm_values)
50724 if (INSN_P (in_rtx))
50725 in_rtx = PATTERN (in_rtx);
50726 subrtx_iterator::array_type array;
50727 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
50728 if (const_rtx x = *iter)
50729 switch (GET_CODE (x))
50731 case CONST:
50732 case SYMBOL_REF:
50733 case CONST_INT:
50734 (imm_values->imm)++;
50735 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
50736 (imm_values->imm32)++;
50737 else
50738 (imm_values->imm64)++;
50739 break;
50741 case CONST_DOUBLE:
50742 case CONST_WIDE_INT:
50743 (imm_values->imm)++;
50744 (imm_values->imm64)++;
50745 break;
50747 case CODE_LABEL:
50748 if (LABEL_KIND (x) == LABEL_NORMAL)
50750 (imm_values->imm)++;
50751 (imm_values->imm32)++;
50753 break;
50755 default:
50756 break;
50760 /* Return total size of immediate operands of an instruction along with number
50761 of corresponding immediate-operands. It initializes its parameters to zero
50762 befor calling FIND_CONSTANT.
50763 INSN is the input instruction. IMM is the total of immediates.
50764 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
50765 bit immediates. */
50767 static int
50768 get_num_immediates (rtx_insn *insn, int *imm, int *imm32, int *imm64)
50770 imm_info imm_values = {0, 0, 0};
50772 find_constant (insn, &imm_values);
50773 *imm = imm_values.imm;
50774 *imm32 = imm_values.imm32;
50775 *imm64 = imm_values.imm64;
50776 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
50779 /* This function indicates if an operand of an instruction is an
50780 immediate. */
50782 static bool
50783 has_immediate (rtx_insn *insn)
50785 int num_imm_operand;
50786 int num_imm32_operand;
50787 int num_imm64_operand;
50789 if (insn)
50790 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50791 &num_imm64_operand);
50792 return false;
50795 /* Return single or double path for instructions. */
50797 static enum insn_path
50798 get_insn_path (rtx_insn *insn)
50800 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
50802 if ((int)path == 0)
50803 return path_single;
50805 if ((int)path == 1)
50806 return path_double;
50808 return path_multi;
50811 /* Return insn dispatch group. */
50813 static enum dispatch_group
50814 get_insn_group (rtx_insn *insn)
50816 enum dispatch_group group = get_mem_group (insn);
50817 if (group)
50818 return group;
50820 if (is_branch (insn))
50821 return disp_branch;
50823 if (is_cmp (insn))
50824 return disp_cmp;
50826 if (has_immediate (insn))
50827 return disp_imm;
50829 if (is_prefetch (insn))
50830 return disp_prefetch;
50832 return disp_no_group;
50835 /* Count number of GROUP restricted instructions in a dispatch
50836 window WINDOW_LIST. */
50838 static int
50839 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
50841 enum dispatch_group group = get_insn_group (insn);
50842 int imm_size;
50843 int num_imm_operand;
50844 int num_imm32_operand;
50845 int num_imm64_operand;
50847 if (group == disp_no_group)
50848 return 0;
50850 if (group == disp_imm)
50852 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50853 &num_imm64_operand);
50854 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
50855 || num_imm_operand + window_list->num_imm > MAX_IMM
50856 || (num_imm32_operand > 0
50857 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
50858 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
50859 || (num_imm64_operand > 0
50860 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
50861 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
50862 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
50863 && num_imm64_operand > 0
50864 && ((window_list->num_imm_64 > 0
50865 && window_list->num_insn >= 2)
50866 || window_list->num_insn >= 3)))
50867 return BIG;
50869 return 1;
50872 if ((group == disp_load_store
50873 && (window_list->num_loads >= MAX_LOAD
50874 || window_list->num_stores >= MAX_STORE))
50875 || ((group == disp_load
50876 || group == disp_prefetch)
50877 && window_list->num_loads >= MAX_LOAD)
50878 || (group == disp_store
50879 && window_list->num_stores >= MAX_STORE))
50880 return BIG;
50882 return 1;
50885 /* This function returns true if insn satisfies dispatch rules on the
50886 last window scheduled. */
50888 static bool
50889 fits_dispatch_window (rtx_insn *insn)
50891 dispatch_windows *window_list = dispatch_window_list;
50892 dispatch_windows *window_list_next = dispatch_window_list->next;
50893 unsigned int num_restrict;
50894 enum dispatch_group group = get_insn_group (insn);
50895 enum insn_path path = get_insn_path (insn);
50896 int sum;
50898 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
50899 instructions should be given the lowest priority in the
50900 scheduling process in Haifa scheduler to make sure they will be
50901 scheduled in the same dispatch window as the reference to them. */
50902 if (group == disp_jcc || group == disp_cmp)
50903 return false;
50905 /* Check nonrestricted. */
50906 if (group == disp_no_group || group == disp_branch)
50907 return true;
50909 /* Get last dispatch window. */
50910 if (window_list_next)
50911 window_list = window_list_next;
50913 if (window_list->window_num == 1)
50915 sum = window_list->prev->window_size + window_list->window_size;
50917 if (sum == 32
50918 || (min_insn_size (insn) + sum) >= 48)
50919 /* Window 1 is full. Go for next window. */
50920 return true;
50923 num_restrict = count_num_restricted (insn, window_list);
50925 if (num_restrict > num_allowable_groups[group])
50926 return false;
50928 /* See if it fits in the first window. */
50929 if (window_list->window_num == 0)
50931 /* The first widow should have only single and double path
50932 uops. */
50933 if (path == path_double
50934 && (window_list->num_uops + 2) > MAX_INSN)
50935 return false;
50936 else if (path != path_single)
50937 return false;
50939 return true;
50942 /* Add an instruction INSN with NUM_UOPS micro-operations to the
50943 dispatch window WINDOW_LIST. */
50945 static void
50946 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
50948 int byte_len = min_insn_size (insn);
50949 int num_insn = window_list->num_insn;
50950 int imm_size;
50951 sched_insn_info *window = window_list->window;
50952 enum dispatch_group group = get_insn_group (insn);
50953 enum insn_path path = get_insn_path (insn);
50954 int num_imm_operand;
50955 int num_imm32_operand;
50956 int num_imm64_operand;
50958 if (!window_list->violation && group != disp_cmp
50959 && !fits_dispatch_window (insn))
50960 window_list->violation = true;
50962 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50963 &num_imm64_operand);
50965 /* Initialize window with new instruction. */
50966 window[num_insn].insn = insn;
50967 window[num_insn].byte_len = byte_len;
50968 window[num_insn].group = group;
50969 window[num_insn].path = path;
50970 window[num_insn].imm_bytes = imm_size;
50972 window_list->window_size += byte_len;
50973 window_list->num_insn = num_insn + 1;
50974 window_list->num_uops = window_list->num_uops + num_uops;
50975 window_list->imm_size += imm_size;
50976 window_list->num_imm += num_imm_operand;
50977 window_list->num_imm_32 += num_imm32_operand;
50978 window_list->num_imm_64 += num_imm64_operand;
50980 if (group == disp_store)
50981 window_list->num_stores += 1;
50982 else if (group == disp_load
50983 || group == disp_prefetch)
50984 window_list->num_loads += 1;
50985 else if (group == disp_load_store)
50987 window_list->num_stores += 1;
50988 window_list->num_loads += 1;
50992 /* Adds a scheduled instruction, INSN, to the current dispatch window.
50993 If the total bytes of instructions or the number of instructions in
50994 the window exceed allowable, it allocates a new window. */
50996 static void
50997 add_to_dispatch_window (rtx_insn *insn)
50999 int byte_len;
51000 dispatch_windows *window_list;
51001 dispatch_windows *next_list;
51002 dispatch_windows *window0_list;
51003 enum insn_path path;
51004 enum dispatch_group insn_group;
51005 bool insn_fits;
51006 int num_insn;
51007 int num_uops;
51008 int window_num;
51009 int insn_num_uops;
51010 int sum;
51012 if (INSN_CODE (insn) < 0)
51013 return;
51015 byte_len = min_insn_size (insn);
51016 window_list = dispatch_window_list;
51017 next_list = window_list->next;
51018 path = get_insn_path (insn);
51019 insn_group = get_insn_group (insn);
51021 /* Get the last dispatch window. */
51022 if (next_list)
51023 window_list = dispatch_window_list->next;
51025 if (path == path_single)
51026 insn_num_uops = 1;
51027 else if (path == path_double)
51028 insn_num_uops = 2;
51029 else
51030 insn_num_uops = (int) path;
51032 /* If current window is full, get a new window.
51033 Window number zero is full, if MAX_INSN uops are scheduled in it.
51034 Window number one is full, if window zero's bytes plus window
51035 one's bytes is 32, or if the bytes of the new instruction added
51036 to the total makes it greater than 48, or it has already MAX_INSN
51037 instructions in it. */
51038 num_insn = window_list->num_insn;
51039 num_uops = window_list->num_uops;
51040 window_num = window_list->window_num;
51041 insn_fits = fits_dispatch_window (insn);
51043 if (num_insn >= MAX_INSN
51044 || num_uops + insn_num_uops > MAX_INSN
51045 || !(insn_fits))
51047 window_num = ~window_num & 1;
51048 window_list = allocate_next_window (window_num);
51051 if (window_num == 0)
51053 add_insn_window (insn, window_list, insn_num_uops);
51054 if (window_list->num_insn >= MAX_INSN
51055 && insn_group == disp_branch)
51057 process_end_window ();
51058 return;
51061 else if (window_num == 1)
51063 window0_list = window_list->prev;
51064 sum = window0_list->window_size + window_list->window_size;
51065 if (sum == 32
51066 || (byte_len + sum) >= 48)
51068 process_end_window ();
51069 window_list = dispatch_window_list;
51072 add_insn_window (insn, window_list, insn_num_uops);
51074 else
51075 gcc_unreachable ();
51077 if (is_end_basic_block (insn_group))
51079 /* End of basic block is reached do end-basic-block process. */
51080 process_end_window ();
51081 return;
51085 /* Print the dispatch window, WINDOW_NUM, to FILE. */
51087 DEBUG_FUNCTION static void
51088 debug_dispatch_window_file (FILE *file, int window_num)
51090 dispatch_windows *list;
51091 int i;
51093 if (window_num == 0)
51094 list = dispatch_window_list;
51095 else
51096 list = dispatch_window_list1;
51098 fprintf (file, "Window #%d:\n", list->window_num);
51099 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
51100 list->num_insn, list->num_uops, list->window_size);
51101 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51102 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
51104 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
51105 list->num_stores);
51106 fprintf (file, " insn info:\n");
51108 for (i = 0; i < MAX_INSN; i++)
51110 if (!list->window[i].insn)
51111 break;
51112 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
51113 i, group_name[list->window[i].group],
51114 i, (void *)list->window[i].insn,
51115 i, list->window[i].path,
51116 i, list->window[i].byte_len,
51117 i, list->window[i].imm_bytes);
51121 /* Print to stdout a dispatch window. */
51123 DEBUG_FUNCTION void
51124 debug_dispatch_window (int window_num)
51126 debug_dispatch_window_file (stdout, window_num);
51129 /* Print INSN dispatch information to FILE. */
51131 DEBUG_FUNCTION static void
51132 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
51134 int byte_len;
51135 enum insn_path path;
51136 enum dispatch_group group;
51137 int imm_size;
51138 int num_imm_operand;
51139 int num_imm32_operand;
51140 int num_imm64_operand;
51142 if (INSN_CODE (insn) < 0)
51143 return;
51145 byte_len = min_insn_size (insn);
51146 path = get_insn_path (insn);
51147 group = get_insn_group (insn);
51148 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51149 &num_imm64_operand);
51151 fprintf (file, " insn info:\n");
51152 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
51153 group_name[group], path, byte_len);
51154 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51155 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
51158 /* Print to STDERR the status of the ready list with respect to
51159 dispatch windows. */
51161 DEBUG_FUNCTION void
51162 debug_ready_dispatch (void)
51164 int i;
51165 int no_ready = number_in_ready ();
51167 fprintf (stdout, "Number of ready: %d\n", no_ready);
51169 for (i = 0; i < no_ready; i++)
51170 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
51173 /* This routine is the driver of the dispatch scheduler. */
51175 static void
51176 do_dispatch (rtx_insn *insn, int mode)
51178 if (mode == DISPATCH_INIT)
51179 init_dispatch_sched ();
51180 else if (mode == ADD_TO_DISPATCH_WINDOW)
51181 add_to_dispatch_window (insn);
51184 /* Return TRUE if Dispatch Scheduling is supported. */
51186 static bool
51187 has_dispatch (rtx_insn *insn, int action)
51189 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
51190 && flag_dispatch_scheduler)
51191 switch (action)
51193 default:
51194 return false;
51196 case IS_DISPATCH_ON:
51197 return true;
51198 break;
51200 case IS_CMP:
51201 return is_cmp (insn);
51203 case DISPATCH_VIOLATION:
51204 return dispatch_violation ();
51206 case FITS_DISPATCH_WINDOW:
51207 return fits_dispatch_window (insn);
51210 return false;
51213 /* Implementation of reassociation_width target hook used by
51214 reassoc phase to identify parallelism level in reassociated
51215 tree. Statements tree_code is passed in OPC. Arguments type
51216 is passed in MODE.
51218 Currently parallel reassociation is enabled for Atom
51219 processors only and we set reassociation width to be 2
51220 because Atom may issue up to 2 instructions per cycle.
51222 Return value should be fixed if parallel reassociation is
51223 enabled for other processors. */
51225 static int
51226 ix86_reassociation_width (unsigned int, machine_mode mode)
51228 /* Vector part. */
51229 if (VECTOR_MODE_P (mode))
51231 if (TARGET_VECTOR_PARALLEL_EXECUTION)
51232 return 2;
51233 else
51234 return 1;
51237 /* Scalar part. */
51238 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
51239 return 2;
51240 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
51241 return 2;
51242 else
51243 return 1;
51246 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
51247 place emms and femms instructions. */
51249 static machine_mode
51250 ix86_preferred_simd_mode (machine_mode mode)
51252 if (!TARGET_SSE)
51253 return word_mode;
51255 switch (mode)
51257 case QImode:
51258 return TARGET_AVX512BW ? V64QImode :
51259 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
51260 case HImode:
51261 return TARGET_AVX512BW ? V32HImode :
51262 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
51263 case SImode:
51264 return TARGET_AVX512F ? V16SImode :
51265 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
51266 case DImode:
51267 return TARGET_AVX512F ? V8DImode :
51268 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
51270 case SFmode:
51271 if (TARGET_AVX512F)
51272 return V16SFmode;
51273 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51274 return V8SFmode;
51275 else
51276 return V4SFmode;
51278 case DFmode:
51279 if (!TARGET_VECTORIZE_DOUBLE)
51280 return word_mode;
51281 else if (TARGET_AVX512F)
51282 return V8DFmode;
51283 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51284 return V4DFmode;
51285 else if (TARGET_SSE2)
51286 return V2DFmode;
51287 /* FALLTHRU */
51289 default:
51290 return word_mode;
51294 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
51295 vectors. If AVX512F is enabled then try vectorizing with 512bit,
51296 256bit and 128bit vectors. */
51298 static unsigned int
51299 ix86_autovectorize_vector_sizes (void)
51301 return TARGET_AVX512F ? 64 | 32 | 16 :
51302 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
51307 /* Return class of registers which could be used for pseudo of MODE
51308 and of class RCLASS for spilling instead of memory. Return NO_REGS
51309 if it is not possible or non-profitable. */
51310 static reg_class_t
51311 ix86_spill_class (reg_class_t rclass, machine_mode mode)
51313 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
51314 && (mode == SImode || (TARGET_64BIT && mode == DImode))
51315 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
51316 return ALL_SSE_REGS;
51317 return NO_REGS;
51320 /* Implement targetm.vectorize.init_cost. */
51322 static void *
51323 ix86_init_cost (struct loop *)
51325 unsigned *cost = XNEWVEC (unsigned, 3);
51326 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
51327 return cost;
51330 /* Implement targetm.vectorize.add_stmt_cost. */
51332 static unsigned
51333 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
51334 struct _stmt_vec_info *stmt_info, int misalign,
51335 enum vect_cost_model_location where)
51337 unsigned *cost = (unsigned *) data;
51338 unsigned retval = 0;
51340 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
51341 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
51343 /* Statements in an inner loop relative to the loop being
51344 vectorized are weighted more heavily. The value here is
51345 arbitrary and could potentially be improved with analysis. */
51346 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
51347 count *= 50; /* FIXME. */
51349 retval = (unsigned) (count * stmt_cost);
51351 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
51352 for Silvermont as it has out of order integer pipeline and can execute
51353 2 scalar instruction per tick, but has in order SIMD pipeline. */
51354 if (TARGET_SILVERMONT || TARGET_INTEL)
51355 if (stmt_info && stmt_info->stmt)
51357 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
51358 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
51359 retval = (retval * 17) / 10;
51362 cost[where] += retval;
51364 return retval;
51367 /* Implement targetm.vectorize.finish_cost. */
51369 static void
51370 ix86_finish_cost (void *data, unsigned *prologue_cost,
51371 unsigned *body_cost, unsigned *epilogue_cost)
51373 unsigned *cost = (unsigned *) data;
51374 *prologue_cost = cost[vect_prologue];
51375 *body_cost = cost[vect_body];
51376 *epilogue_cost = cost[vect_epilogue];
51379 /* Implement targetm.vectorize.destroy_cost_data. */
51381 static void
51382 ix86_destroy_cost_data (void *data)
51384 free (data);
51387 /* Validate target specific memory model bits in VAL. */
51389 static unsigned HOST_WIDE_INT
51390 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
51392 enum memmodel model = memmodel_from_int (val);
51393 bool strong;
51395 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
51396 |MEMMODEL_MASK)
51397 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
51399 warning (OPT_Winvalid_memory_model,
51400 "Unknown architecture specific memory model");
51401 return MEMMODEL_SEQ_CST;
51403 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
51404 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
51406 warning (OPT_Winvalid_memory_model,
51407 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
51408 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
51410 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
51412 warning (OPT_Winvalid_memory_model,
51413 "HLE_RELEASE not used with RELEASE or stronger memory model");
51414 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
51416 return val;
51419 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
51420 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
51421 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
51422 or number of vecsize_mangle variants that should be emitted. */
51424 static int
51425 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
51426 struct cgraph_simd_clone *clonei,
51427 tree base_type, int num)
51429 int ret = 1;
51431 if (clonei->simdlen
51432 && (clonei->simdlen < 2
51433 || clonei->simdlen > 16
51434 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
51436 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51437 "unsupported simdlen %d", clonei->simdlen);
51438 return 0;
51441 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
51442 if (TREE_CODE (ret_type) != VOID_TYPE)
51443 switch (TYPE_MODE (ret_type))
51445 case QImode:
51446 case HImode:
51447 case SImode:
51448 case DImode:
51449 case SFmode:
51450 case DFmode:
51451 /* case SCmode: */
51452 /* case DCmode: */
51453 break;
51454 default:
51455 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51456 "unsupported return type %qT for simd\n", ret_type);
51457 return 0;
51460 tree t;
51461 int i;
51463 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
51464 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
51465 switch (TYPE_MODE (TREE_TYPE (t)))
51467 case QImode:
51468 case HImode:
51469 case SImode:
51470 case DImode:
51471 case SFmode:
51472 case DFmode:
51473 /* case SCmode: */
51474 /* case DCmode: */
51475 break;
51476 default:
51477 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51478 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
51479 return 0;
51482 if (clonei->cilk_elemental)
51484 /* Parse here processor clause. If not present, default to 'b'. */
51485 clonei->vecsize_mangle = 'b';
51487 else if (!TREE_PUBLIC (node->decl))
51489 /* If the function isn't exported, we can pick up just one ISA
51490 for the clones. */
51491 if (TARGET_AVX2)
51492 clonei->vecsize_mangle = 'd';
51493 else if (TARGET_AVX)
51494 clonei->vecsize_mangle = 'c';
51495 else
51496 clonei->vecsize_mangle = 'b';
51497 ret = 1;
51499 else
51501 clonei->vecsize_mangle = "bcd"[num];
51502 ret = 3;
51504 switch (clonei->vecsize_mangle)
51506 case 'b':
51507 clonei->vecsize_int = 128;
51508 clonei->vecsize_float = 128;
51509 break;
51510 case 'c':
51511 clonei->vecsize_int = 128;
51512 clonei->vecsize_float = 256;
51513 break;
51514 case 'd':
51515 clonei->vecsize_int = 256;
51516 clonei->vecsize_float = 256;
51517 break;
51519 if (clonei->simdlen == 0)
51521 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
51522 clonei->simdlen = clonei->vecsize_int;
51523 else
51524 clonei->simdlen = clonei->vecsize_float;
51525 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
51526 if (clonei->simdlen > 16)
51527 clonei->simdlen = 16;
51529 return ret;
51532 /* Add target attribute to SIMD clone NODE if needed. */
51534 static void
51535 ix86_simd_clone_adjust (struct cgraph_node *node)
51537 const char *str = NULL;
51538 gcc_assert (node->decl == cfun->decl);
51539 switch (node->simdclone->vecsize_mangle)
51541 case 'b':
51542 if (!TARGET_SSE2)
51543 str = "sse2";
51544 break;
51545 case 'c':
51546 if (!TARGET_AVX)
51547 str = "avx";
51548 break;
51549 case 'd':
51550 if (!TARGET_AVX2)
51551 str = "avx2";
51552 break;
51553 default:
51554 gcc_unreachable ();
51556 if (str == NULL)
51557 return;
51558 push_cfun (NULL);
51559 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
51560 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
51561 gcc_assert (ok);
51562 pop_cfun ();
51563 ix86_reset_previous_fndecl ();
51564 ix86_set_current_function (node->decl);
51567 /* If SIMD clone NODE can't be used in a vectorized loop
51568 in current function, return -1, otherwise return a badness of using it
51569 (0 if it is most desirable from vecsize_mangle point of view, 1
51570 slightly less desirable, etc.). */
51572 static int
51573 ix86_simd_clone_usable (struct cgraph_node *node)
51575 switch (node->simdclone->vecsize_mangle)
51577 case 'b':
51578 if (!TARGET_SSE2)
51579 return -1;
51580 if (!TARGET_AVX)
51581 return 0;
51582 return TARGET_AVX2 ? 2 : 1;
51583 case 'c':
51584 if (!TARGET_AVX)
51585 return -1;
51586 return TARGET_AVX2 ? 1 : 0;
51587 break;
51588 case 'd':
51589 if (!TARGET_AVX2)
51590 return -1;
51591 return 0;
51592 default:
51593 gcc_unreachable ();
51597 /* This function adjusts the unroll factor based on
51598 the hardware capabilities. For ex, bdver3 has
51599 a loop buffer which makes unrolling of smaller
51600 loops less important. This function decides the
51601 unroll factor using number of memory references
51602 (value 32 is used) as a heuristic. */
51604 static unsigned
51605 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
51607 basic_block *bbs;
51608 rtx_insn *insn;
51609 unsigned i;
51610 unsigned mem_count = 0;
51612 if (!TARGET_ADJUST_UNROLL)
51613 return nunroll;
51615 /* Count the number of memory references within the loop body.
51616 This value determines the unrolling factor for bdver3 and bdver4
51617 architectures. */
51618 subrtx_iterator::array_type array;
51619 bbs = get_loop_body (loop);
51620 for (i = 0; i < loop->num_nodes; i++)
51621 FOR_BB_INSNS (bbs[i], insn)
51622 if (NONDEBUG_INSN_P (insn))
51623 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
51624 if (const_rtx x = *iter)
51625 if (MEM_P (x))
51627 machine_mode mode = GET_MODE (x);
51628 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
51629 if (n_words > 4)
51630 mem_count += 2;
51631 else
51632 mem_count += 1;
51634 free (bbs);
51636 if (mem_count && mem_count <=32)
51637 return 32/mem_count;
51639 return nunroll;
51643 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
51645 static bool
51646 ix86_float_exceptions_rounding_supported_p (void)
51648 /* For x87 floating point with standard excess precision handling,
51649 there is no adddf3 pattern (since x87 floating point only has
51650 XFmode operations) so the default hook implementation gets this
51651 wrong. */
51652 return TARGET_80387 || TARGET_SSE_MATH;
51655 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
51657 static void
51658 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
51660 if (!TARGET_80387 && !TARGET_SSE_MATH)
51661 return;
51662 tree exceptions_var = create_tmp_var (integer_type_node);
51663 if (TARGET_80387)
51665 tree fenv_index_type = build_index_type (size_int (6));
51666 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
51667 tree fenv_var = create_tmp_var (fenv_type);
51668 mark_addressable (fenv_var);
51669 tree fenv_ptr = build_pointer_type (fenv_type);
51670 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
51671 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
51672 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
51673 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
51674 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
51675 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
51676 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
51677 tree hold_fnclex = build_call_expr (fnclex, 0);
51678 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
51679 hold_fnclex);
51680 *clear = build_call_expr (fnclex, 0);
51681 tree sw_var = create_tmp_var (short_unsigned_type_node);
51682 tree fnstsw_call = build_call_expr (fnstsw, 0);
51683 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
51684 sw_var, fnstsw_call);
51685 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
51686 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
51687 exceptions_var, exceptions_x87);
51688 *update = build2 (COMPOUND_EXPR, integer_type_node,
51689 sw_mod, update_mod);
51690 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
51691 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
51693 if (TARGET_SSE_MATH)
51695 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node);
51696 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node);
51697 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
51698 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
51699 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
51700 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
51701 mxcsr_orig_var, stmxcsr_hold_call);
51702 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
51703 mxcsr_orig_var,
51704 build_int_cst (unsigned_type_node, 0x1f80));
51705 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
51706 build_int_cst (unsigned_type_node, 0xffffffc0));
51707 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
51708 mxcsr_mod_var, hold_mod_val);
51709 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51710 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
51711 hold_assign_orig, hold_assign_mod);
51712 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
51713 ldmxcsr_hold_call);
51714 if (*hold)
51715 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
51716 else
51717 *hold = hold_all;
51718 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51719 if (*clear)
51720 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
51721 ldmxcsr_clear_call);
51722 else
51723 *clear = ldmxcsr_clear_call;
51724 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
51725 tree exceptions_sse = fold_convert (integer_type_node,
51726 stxmcsr_update_call);
51727 if (*update)
51729 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
51730 exceptions_var, exceptions_sse);
51731 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
51732 exceptions_var, exceptions_mod);
51733 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
51734 exceptions_assign);
51736 else
51737 *update = build2 (MODIFY_EXPR, integer_type_node,
51738 exceptions_var, exceptions_sse);
51739 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
51740 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51741 ldmxcsr_update_call);
51743 tree atomic_feraiseexcept
51744 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
51745 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
51746 1, exceptions_var);
51747 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51748 atomic_feraiseexcept_call);
51751 /* Return mode to be used for bounds or VOIDmode
51752 if bounds are not supported. */
51754 static enum machine_mode
51755 ix86_mpx_bound_mode ()
51757 /* Do not support pointer checker if MPX
51758 is not enabled. */
51759 if (!TARGET_MPX)
51761 if (flag_check_pointer_bounds)
51762 warning (0, "Pointer Checker requires MPX support on this target."
51763 " Use -mmpx options to enable MPX.");
51764 return VOIDmode;
51767 return BNDmode;
51770 /* Return constant used to statically initialize constant bounds.
51772 This function is used to create special bound values. For now
51773 only INIT bounds and NONE bounds are expected. More special
51774 values may be added later. */
51776 static tree
51777 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
51779 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
51780 : build_zero_cst (pointer_sized_int_node);
51781 tree high = ub ? build_zero_cst (pointer_sized_int_node)
51782 : build_minus_one_cst (pointer_sized_int_node);
51784 /* This function is supposed to be used to create INIT and
51785 NONE bounds only. */
51786 gcc_assert ((lb == 0 && ub == -1)
51787 || (lb == -1 && ub == 0));
51789 return build_complex (NULL, low, high);
51792 /* Generate a list of statements STMTS to initialize pointer bounds
51793 variable VAR with bounds LB and UB. Return the number of generated
51794 statements. */
51796 static int
51797 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
51799 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
51800 tree lhs, modify, var_p;
51802 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
51803 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
51805 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
51806 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
51807 append_to_statement_list (modify, stmts);
51809 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
51810 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
51811 TYPE_SIZE_UNIT (pointer_sized_int_node)));
51812 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
51813 append_to_statement_list (modify, stmts);
51815 return 2;
51818 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
51819 /* For i386, common symbol is local only for non-PIE binaries. For
51820 x86-64, common symbol is local only for non-PIE binaries or linker
51821 supports copy reloc in PIE binaries. */
51823 static bool
51824 ix86_binds_local_p (const_tree exp)
51826 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
51827 (!flag_pic
51828 || (TARGET_64BIT
51829 && HAVE_LD_PIE_COPYRELOC != 0)));
51831 #endif
51833 /* If MEM is in the form of [base+offset], extract the two parts
51834 of address and set to BASE and OFFSET, otherwise return false. */
51836 static bool
51837 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
51839 rtx addr;
51841 gcc_assert (MEM_P (mem));
51843 addr = XEXP (mem, 0);
51845 if (GET_CODE (addr) == CONST)
51846 addr = XEXP (addr, 0);
51848 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
51850 *base = addr;
51851 *offset = const0_rtx;
51852 return true;
51855 if (GET_CODE (addr) == PLUS
51856 && (REG_P (XEXP (addr, 0))
51857 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
51858 && CONST_INT_P (XEXP (addr, 1)))
51860 *base = XEXP (addr, 0);
51861 *offset = XEXP (addr, 1);
51862 return true;
51865 return false;
51868 /* Given OPERANDS of consecutive load/store, check if we can merge
51869 them into move multiple. LOAD is true if they are load instructions.
51870 MODE is the mode of memory operands. */
51872 bool
51873 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
51874 enum machine_mode mode)
51876 HOST_WIDE_INT offval_1, offval_2, msize;
51877 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
51879 if (load)
51881 mem_1 = operands[1];
51882 mem_2 = operands[3];
51883 reg_1 = operands[0];
51884 reg_2 = operands[2];
51886 else
51888 mem_1 = operands[0];
51889 mem_2 = operands[2];
51890 reg_1 = operands[1];
51891 reg_2 = operands[3];
51894 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
51896 if (REGNO (reg_1) != REGNO (reg_2))
51897 return false;
51899 /* Check if the addresses are in the form of [base+offset]. */
51900 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
51901 return false;
51902 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
51903 return false;
51905 /* Check if the bases are the same. */
51906 if (!rtx_equal_p (base_1, base_2))
51907 return false;
51909 offval_1 = INTVAL (offset_1);
51910 offval_2 = INTVAL (offset_2);
51911 msize = GET_MODE_SIZE (mode);
51912 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
51913 if (offval_1 + msize != offval_2)
51914 return false;
51916 return true;
51919 /* Initialize the GCC target structure. */
51920 #undef TARGET_RETURN_IN_MEMORY
51921 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
51923 #undef TARGET_LEGITIMIZE_ADDRESS
51924 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
51926 #undef TARGET_ATTRIBUTE_TABLE
51927 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
51928 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
51929 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
51930 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51931 # undef TARGET_MERGE_DECL_ATTRIBUTES
51932 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
51933 #endif
51935 #undef TARGET_COMP_TYPE_ATTRIBUTES
51936 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
51938 #undef TARGET_INIT_BUILTINS
51939 #define TARGET_INIT_BUILTINS ix86_init_builtins
51940 #undef TARGET_BUILTIN_DECL
51941 #define TARGET_BUILTIN_DECL ix86_builtin_decl
51942 #undef TARGET_EXPAND_BUILTIN
51943 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
51945 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
51946 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
51947 ix86_builtin_vectorized_function
51949 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
51950 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
51952 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
51953 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
51955 #undef TARGET_VECTORIZE_BUILTIN_GATHER
51956 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
51958 #undef TARGET_BUILTIN_RECIPROCAL
51959 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
51961 #undef TARGET_ASM_FUNCTION_EPILOGUE
51962 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
51964 #undef TARGET_ENCODE_SECTION_INFO
51965 #ifndef SUBTARGET_ENCODE_SECTION_INFO
51966 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
51967 #else
51968 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
51969 #endif
51971 #undef TARGET_ASM_OPEN_PAREN
51972 #define TARGET_ASM_OPEN_PAREN ""
51973 #undef TARGET_ASM_CLOSE_PAREN
51974 #define TARGET_ASM_CLOSE_PAREN ""
51976 #undef TARGET_ASM_BYTE_OP
51977 #define TARGET_ASM_BYTE_OP ASM_BYTE
51979 #undef TARGET_ASM_ALIGNED_HI_OP
51980 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
51981 #undef TARGET_ASM_ALIGNED_SI_OP
51982 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
51983 #ifdef ASM_QUAD
51984 #undef TARGET_ASM_ALIGNED_DI_OP
51985 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
51986 #endif
51988 #undef TARGET_PROFILE_BEFORE_PROLOGUE
51989 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
51991 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
51992 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
51994 #undef TARGET_ASM_UNALIGNED_HI_OP
51995 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
51996 #undef TARGET_ASM_UNALIGNED_SI_OP
51997 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
51998 #undef TARGET_ASM_UNALIGNED_DI_OP
51999 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
52001 #undef TARGET_PRINT_OPERAND
52002 #define TARGET_PRINT_OPERAND ix86_print_operand
52003 #undef TARGET_PRINT_OPERAND_ADDRESS
52004 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
52005 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
52006 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
52007 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
52008 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
52010 #undef TARGET_SCHED_INIT_GLOBAL
52011 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
52012 #undef TARGET_SCHED_ADJUST_COST
52013 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
52014 #undef TARGET_SCHED_ISSUE_RATE
52015 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
52016 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
52017 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
52018 ia32_multipass_dfa_lookahead
52019 #undef TARGET_SCHED_MACRO_FUSION_P
52020 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
52021 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
52022 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
52024 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
52025 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
52027 #undef TARGET_MEMMODEL_CHECK
52028 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
52030 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
52031 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
52033 #ifdef HAVE_AS_TLS
52034 #undef TARGET_HAVE_TLS
52035 #define TARGET_HAVE_TLS true
52036 #endif
52037 #undef TARGET_CANNOT_FORCE_CONST_MEM
52038 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
52039 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
52040 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
52042 #undef TARGET_DELEGITIMIZE_ADDRESS
52043 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
52045 #undef TARGET_MS_BITFIELD_LAYOUT_P
52046 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
52048 #if TARGET_MACHO
52049 #undef TARGET_BINDS_LOCAL_P
52050 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
52051 #else
52052 #undef TARGET_BINDS_LOCAL_P
52053 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
52054 #endif
52055 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
52056 #undef TARGET_BINDS_LOCAL_P
52057 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
52058 #endif
52060 #undef TARGET_ASM_OUTPUT_MI_THUNK
52061 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
52062 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
52063 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
52065 #undef TARGET_ASM_FILE_START
52066 #define TARGET_ASM_FILE_START x86_file_start
52068 #undef TARGET_OPTION_OVERRIDE
52069 #define TARGET_OPTION_OVERRIDE ix86_option_override
52071 #undef TARGET_REGISTER_MOVE_COST
52072 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
52073 #undef TARGET_MEMORY_MOVE_COST
52074 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
52075 #undef TARGET_RTX_COSTS
52076 #define TARGET_RTX_COSTS ix86_rtx_costs
52077 #undef TARGET_ADDRESS_COST
52078 #define TARGET_ADDRESS_COST ix86_address_cost
52080 #undef TARGET_FIXED_CONDITION_CODE_REGS
52081 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
52082 #undef TARGET_CC_MODES_COMPATIBLE
52083 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
52085 #undef TARGET_MACHINE_DEPENDENT_REORG
52086 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
52088 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
52089 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
52091 #undef TARGET_BUILD_BUILTIN_VA_LIST
52092 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
52094 #undef TARGET_FOLD_BUILTIN
52095 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
52097 #undef TARGET_COMPARE_VERSION_PRIORITY
52098 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
52100 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
52101 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
52102 ix86_generate_version_dispatcher_body
52104 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
52105 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
52106 ix86_get_function_versions_dispatcher
52108 #undef TARGET_ENUM_VA_LIST_P
52109 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
52111 #undef TARGET_FN_ABI_VA_LIST
52112 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
52114 #undef TARGET_CANONICAL_VA_LIST_TYPE
52115 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
52117 #undef TARGET_EXPAND_BUILTIN_VA_START
52118 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
52120 #undef TARGET_MD_ASM_ADJUST
52121 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
52123 #undef TARGET_PROMOTE_PROTOTYPES
52124 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
52125 #undef TARGET_SETUP_INCOMING_VARARGS
52126 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
52127 #undef TARGET_MUST_PASS_IN_STACK
52128 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
52129 #undef TARGET_FUNCTION_ARG_ADVANCE
52130 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
52131 #undef TARGET_FUNCTION_ARG
52132 #define TARGET_FUNCTION_ARG ix86_function_arg
52133 #undef TARGET_INIT_PIC_REG
52134 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
52135 #undef TARGET_USE_PSEUDO_PIC_REG
52136 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
52137 #undef TARGET_FUNCTION_ARG_BOUNDARY
52138 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
52139 #undef TARGET_PASS_BY_REFERENCE
52140 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
52141 #undef TARGET_INTERNAL_ARG_POINTER
52142 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
52143 #undef TARGET_UPDATE_STACK_BOUNDARY
52144 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
52145 #undef TARGET_GET_DRAP_RTX
52146 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
52147 #undef TARGET_STRICT_ARGUMENT_NAMING
52148 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
52149 #undef TARGET_STATIC_CHAIN
52150 #define TARGET_STATIC_CHAIN ix86_static_chain
52151 #undef TARGET_TRAMPOLINE_INIT
52152 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
52153 #undef TARGET_RETURN_POPS_ARGS
52154 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
52156 #undef TARGET_LEGITIMATE_COMBINED_INSN
52157 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
52159 #undef TARGET_ASAN_SHADOW_OFFSET
52160 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
52162 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
52163 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
52165 #undef TARGET_SCALAR_MODE_SUPPORTED_P
52166 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
52168 #undef TARGET_VECTOR_MODE_SUPPORTED_P
52169 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
52171 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
52172 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
52173 ix86_libgcc_floating_mode_supported_p
52175 #undef TARGET_C_MODE_FOR_SUFFIX
52176 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
52178 #ifdef HAVE_AS_TLS
52179 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
52180 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
52181 #endif
52183 #ifdef SUBTARGET_INSERT_ATTRIBUTES
52184 #undef TARGET_INSERT_ATTRIBUTES
52185 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
52186 #endif
52188 #undef TARGET_MANGLE_TYPE
52189 #define TARGET_MANGLE_TYPE ix86_mangle_type
52191 #if !TARGET_MACHO
52192 #undef TARGET_STACK_PROTECT_FAIL
52193 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
52194 #endif
52196 #undef TARGET_FUNCTION_VALUE
52197 #define TARGET_FUNCTION_VALUE ix86_function_value
52199 #undef TARGET_FUNCTION_VALUE_REGNO_P
52200 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
52202 #undef TARGET_PROMOTE_FUNCTION_MODE
52203 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
52205 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
52206 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
52208 #undef TARGET_MEMBER_TYPE_FORCES_BLK
52209 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
52211 #undef TARGET_INSTANTIATE_DECLS
52212 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
52214 #undef TARGET_SECONDARY_RELOAD
52215 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
52217 #undef TARGET_CLASS_MAX_NREGS
52218 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
52220 #undef TARGET_PREFERRED_RELOAD_CLASS
52221 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
52222 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
52223 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
52224 #undef TARGET_CLASS_LIKELY_SPILLED_P
52225 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
52227 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
52228 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
52229 ix86_builtin_vectorization_cost
52230 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
52231 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
52232 ix86_vectorize_vec_perm_const_ok
52233 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
52234 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
52235 ix86_preferred_simd_mode
52236 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
52237 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
52238 ix86_autovectorize_vector_sizes
52239 #undef TARGET_VECTORIZE_INIT_COST
52240 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
52241 #undef TARGET_VECTORIZE_ADD_STMT_COST
52242 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
52243 #undef TARGET_VECTORIZE_FINISH_COST
52244 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
52245 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
52246 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
52248 #undef TARGET_SET_CURRENT_FUNCTION
52249 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
52251 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
52252 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
52254 #undef TARGET_OPTION_SAVE
52255 #define TARGET_OPTION_SAVE ix86_function_specific_save
52257 #undef TARGET_OPTION_RESTORE
52258 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
52260 #undef TARGET_OPTION_POST_STREAM_IN
52261 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
52263 #undef TARGET_OPTION_PRINT
52264 #define TARGET_OPTION_PRINT ix86_function_specific_print
52266 #undef TARGET_OPTION_FUNCTION_VERSIONS
52267 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
52269 #undef TARGET_CAN_INLINE_P
52270 #define TARGET_CAN_INLINE_P ix86_can_inline_p
52272 #undef TARGET_EXPAND_TO_RTL_HOOK
52273 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
52275 #undef TARGET_LEGITIMATE_ADDRESS_P
52276 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
52278 #undef TARGET_LRA_P
52279 #define TARGET_LRA_P hook_bool_void_true
52281 #undef TARGET_REGISTER_PRIORITY
52282 #define TARGET_REGISTER_PRIORITY ix86_register_priority
52284 #undef TARGET_REGISTER_USAGE_LEVELING_P
52285 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
52287 #undef TARGET_LEGITIMATE_CONSTANT_P
52288 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
52290 #undef TARGET_FRAME_POINTER_REQUIRED
52291 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
52293 #undef TARGET_CAN_ELIMINATE
52294 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
52296 #undef TARGET_EXTRA_LIVE_ON_ENTRY
52297 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
52299 #undef TARGET_ASM_CODE_END
52300 #define TARGET_ASM_CODE_END ix86_code_end
52302 #undef TARGET_CONDITIONAL_REGISTER_USAGE
52303 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
52305 #if TARGET_MACHO
52306 #undef TARGET_INIT_LIBFUNCS
52307 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
52308 #endif
52310 #undef TARGET_LOOP_UNROLL_ADJUST
52311 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
52313 #undef TARGET_SPILL_CLASS
52314 #define TARGET_SPILL_CLASS ix86_spill_class
52316 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
52317 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
52318 ix86_simd_clone_compute_vecsize_and_simdlen
52320 #undef TARGET_SIMD_CLONE_ADJUST
52321 #define TARGET_SIMD_CLONE_ADJUST \
52322 ix86_simd_clone_adjust
52324 #undef TARGET_SIMD_CLONE_USABLE
52325 #define TARGET_SIMD_CLONE_USABLE \
52326 ix86_simd_clone_usable
52328 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
52329 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
52330 ix86_float_exceptions_rounding_supported_p
52332 #undef TARGET_MODE_EMIT
52333 #define TARGET_MODE_EMIT ix86_emit_mode_set
52335 #undef TARGET_MODE_NEEDED
52336 #define TARGET_MODE_NEEDED ix86_mode_needed
52338 #undef TARGET_MODE_AFTER
52339 #define TARGET_MODE_AFTER ix86_mode_after
52341 #undef TARGET_MODE_ENTRY
52342 #define TARGET_MODE_ENTRY ix86_mode_entry
52344 #undef TARGET_MODE_EXIT
52345 #define TARGET_MODE_EXIT ix86_mode_exit
52347 #undef TARGET_MODE_PRIORITY
52348 #define TARGET_MODE_PRIORITY ix86_mode_priority
52350 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
52351 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
52353 #undef TARGET_LOAD_BOUNDS_FOR_ARG
52354 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
52356 #undef TARGET_STORE_BOUNDS_FOR_ARG
52357 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
52359 #undef TARGET_LOAD_RETURNED_BOUNDS
52360 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
52362 #undef TARGET_STORE_RETURNED_BOUNDS
52363 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
52365 #undef TARGET_CHKP_BOUND_MODE
52366 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
52368 #undef TARGET_BUILTIN_CHKP_FUNCTION
52369 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
52371 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
52372 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
52374 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
52375 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
52377 #undef TARGET_CHKP_INITIALIZE_BOUNDS
52378 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
52380 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
52381 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
52383 #undef TARGET_OFFLOAD_OPTIONS
52384 #define TARGET_OFFLOAD_OPTIONS \
52385 ix86_offload_options
52387 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
52388 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
52390 struct gcc_target targetm = TARGET_INITIALIZER;
52392 #include "gt-i386.h"