gcc/ada/
[official-gcc.git] / gcc / config / i386 / i386.c
bloba05f226ef57d9b16c79bfd9c3b0a6c75bbd42a7c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "alias.h"
26 #include "symtab.h"
27 #include "tree.h"
28 #include "fold-const.h"
29 #include "stringpool.h"
30 #include "attribs.h"
31 #include "calls.h"
32 #include "stor-layout.h"
33 #include "varasm.h"
34 #include "tm_p.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-codes.h"
41 #include "insn-attr.h"
42 #include "flags.h"
43 #include "except.h"
44 #include "function.h"
45 #include "recog.h"
46 #include "expmed.h"
47 #include "dojump.h"
48 #include "explow.h"
49 #include "emit-rtl.h"
50 #include "stmt.h"
51 #include "expr.h"
52 #include "optabs.h"
53 #include "diagnostic-core.h"
54 #include "toplev.h"
55 #include "predict.h"
56 #include "dominance.h"
57 #include "cfg.h"
58 #include "cfgrtl.h"
59 #include "cfganal.h"
60 #include "lcm.h"
61 #include "cfgbuild.h"
62 #include "cfgcleanup.h"
63 #include "basic-block.h"
64 #include "target.h"
65 #include "target-def.h"
66 #include "common/common-target.h"
67 #include "langhooks.h"
68 #include "reload.h"
69 #include "plugin-api.h"
70 #include "ipa-ref.h"
71 #include "cgraph.h"
72 #include "tree-ssa-alias.h"
73 #include "internal-fn.h"
74 #include "gimple-fold.h"
75 #include "tree-eh.h"
76 #include "gimple-expr.h"
77 #include "gimple.h"
78 #include "gimplify.h"
79 #include "cfgloop.h"
80 #include "dwarf2.h"
81 #include "df.h"
82 #include "tm-constrs.h"
83 #include "params.h"
84 #include "cselib.h"
85 #include "debug.h"
86 #include "sched-int.h"
87 #include "sbitmap.h"
88 #include "fibheap.h"
89 #include "opts.h"
90 #include "diagnostic.h"
91 #include "dumpfile.h"
92 #include "tree-pass.h"
93 #include "context.h"
94 #include "pass_manager.h"
95 #include "target-globals.h"
96 #include "tree-vectorizer.h"
97 #include "shrink-wrap.h"
98 #include "builtins.h"
99 #include "rtl-iter.h"
100 #include "tree-iterator.h"
101 #include "tree-chkp.h"
102 #include "rtl-chkp.h"
104 static rtx legitimize_dllimport_symbol (rtx, bool);
105 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
106 static rtx legitimize_pe_coff_symbol (rtx, bool);
108 #ifndef CHECK_STACK_LIMIT
109 #define CHECK_STACK_LIMIT (-1)
110 #endif
112 /* Return index of given mode in mult and division cost tables. */
113 #define MODE_INDEX(mode) \
114 ((mode) == QImode ? 0 \
115 : (mode) == HImode ? 1 \
116 : (mode) == SImode ? 2 \
117 : (mode) == DImode ? 3 \
118 : 4)
120 /* Processor costs (relative to an add) */
121 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
122 #define COSTS_N_BYTES(N) ((N) * 2)
124 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
126 static stringop_algs ix86_size_memcpy[2] = {
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
128 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
129 static stringop_algs ix86_size_memset[2] = {
130 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
131 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
133 const
134 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
135 COSTS_N_BYTES (2), /* cost of an add instruction */
136 COSTS_N_BYTES (3), /* cost of a lea instruction */
137 COSTS_N_BYTES (2), /* variable shift costs */
138 COSTS_N_BYTES (3), /* constant shift costs */
139 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
140 COSTS_N_BYTES (3), /* HI */
141 COSTS_N_BYTES (3), /* SI */
142 COSTS_N_BYTES (3), /* DI */
143 COSTS_N_BYTES (5)}, /* other */
144 0, /* cost of multiply per each bit set */
145 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
146 COSTS_N_BYTES (3), /* HI */
147 COSTS_N_BYTES (3), /* SI */
148 COSTS_N_BYTES (3), /* DI */
149 COSTS_N_BYTES (5)}, /* other */
150 COSTS_N_BYTES (3), /* cost of movsx */
151 COSTS_N_BYTES (3), /* cost of movzx */
152 0, /* "large" insn */
153 2, /* MOVE_RATIO */
154 2, /* cost for loading QImode using movzbl */
155 {2, 2, 2}, /* cost of loading integer registers
156 in QImode, HImode and SImode.
157 Relative to reg-reg move (2). */
158 {2, 2, 2}, /* cost of storing integer registers */
159 2, /* cost of reg,reg fld/fst */
160 {2, 2, 2}, /* cost of loading fp registers
161 in SFmode, DFmode and XFmode */
162 {2, 2, 2}, /* cost of storing fp registers
163 in SFmode, DFmode and XFmode */
164 3, /* cost of moving MMX register */
165 {3, 3}, /* cost of loading MMX registers
166 in SImode and DImode */
167 {3, 3}, /* cost of storing MMX registers
168 in SImode and DImode */
169 3, /* cost of moving SSE register */
170 {3, 3, 3}, /* cost of loading SSE registers
171 in SImode, DImode and TImode */
172 {3, 3, 3}, /* cost of storing SSE registers
173 in SImode, DImode and TImode */
174 3, /* MMX or SSE register to integer */
175 0, /* size of l1 cache */
176 0, /* size of l2 cache */
177 0, /* size of prefetch block */
178 0, /* number of parallel prefetches */
179 2, /* Branch cost */
180 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
181 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
182 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
183 COSTS_N_BYTES (2), /* cost of FABS instruction. */
184 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
185 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
186 ix86_size_memcpy,
187 ix86_size_memset,
188 1, /* scalar_stmt_cost. */
189 1, /* scalar load_cost. */
190 1, /* scalar_store_cost. */
191 1, /* vec_stmt_cost. */
192 1, /* vec_to_scalar_cost. */
193 1, /* scalar_to_vec_cost. */
194 1, /* vec_align_load_cost. */
195 1, /* vec_unalign_load_cost. */
196 1, /* vec_store_cost. */
197 1, /* cond_taken_branch_cost. */
198 1, /* cond_not_taken_branch_cost. */
201 /* Processor costs (relative to an add) */
202 static stringop_algs i386_memcpy[2] = {
203 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
204 DUMMY_STRINGOP_ALGS};
205 static stringop_algs i386_memset[2] = {
206 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
207 DUMMY_STRINGOP_ALGS};
209 static const
210 struct processor_costs i386_cost = { /* 386 specific costs */
211 COSTS_N_INSNS (1), /* cost of an add instruction */
212 COSTS_N_INSNS (1), /* cost of a lea instruction */
213 COSTS_N_INSNS (3), /* variable shift costs */
214 COSTS_N_INSNS (2), /* constant shift costs */
215 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
216 COSTS_N_INSNS (6), /* HI */
217 COSTS_N_INSNS (6), /* SI */
218 COSTS_N_INSNS (6), /* DI */
219 COSTS_N_INSNS (6)}, /* other */
220 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
221 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
222 COSTS_N_INSNS (23), /* HI */
223 COSTS_N_INSNS (23), /* SI */
224 COSTS_N_INSNS (23), /* DI */
225 COSTS_N_INSNS (23)}, /* other */
226 COSTS_N_INSNS (3), /* cost of movsx */
227 COSTS_N_INSNS (2), /* cost of movzx */
228 15, /* "large" insn */
229 3, /* MOVE_RATIO */
230 4, /* cost for loading QImode using movzbl */
231 {2, 4, 2}, /* cost of loading integer registers
232 in QImode, HImode and SImode.
233 Relative to reg-reg move (2). */
234 {2, 4, 2}, /* cost of storing integer registers */
235 2, /* cost of reg,reg fld/fst */
236 {8, 8, 8}, /* cost of loading fp registers
237 in SFmode, DFmode and XFmode */
238 {8, 8, 8}, /* cost of storing fp registers
239 in SFmode, DFmode and XFmode */
240 2, /* cost of moving MMX register */
241 {4, 8}, /* cost of loading MMX registers
242 in SImode and DImode */
243 {4, 8}, /* cost of storing MMX registers
244 in SImode and DImode */
245 2, /* cost of moving SSE register */
246 {4, 8, 16}, /* cost of loading SSE registers
247 in SImode, DImode and TImode */
248 {4, 8, 16}, /* cost of storing SSE registers
249 in SImode, DImode and TImode */
250 3, /* MMX or SSE register to integer */
251 0, /* size of l1 cache */
252 0, /* size of l2 cache */
253 0, /* size of prefetch block */
254 0, /* number of parallel prefetches */
255 1, /* Branch cost */
256 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
257 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
258 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
259 COSTS_N_INSNS (22), /* cost of FABS instruction. */
260 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
261 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
262 i386_memcpy,
263 i386_memset,
264 1, /* scalar_stmt_cost. */
265 1, /* scalar load_cost. */
266 1, /* scalar_store_cost. */
267 1, /* vec_stmt_cost. */
268 1, /* vec_to_scalar_cost. */
269 1, /* scalar_to_vec_cost. */
270 1, /* vec_align_load_cost. */
271 2, /* vec_unalign_load_cost. */
272 1, /* vec_store_cost. */
273 3, /* cond_taken_branch_cost. */
274 1, /* cond_not_taken_branch_cost. */
277 static stringop_algs i486_memcpy[2] = {
278 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
279 DUMMY_STRINGOP_ALGS};
280 static stringop_algs i486_memset[2] = {
281 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
282 DUMMY_STRINGOP_ALGS};
284 static const
285 struct processor_costs i486_cost = { /* 486 specific costs */
286 COSTS_N_INSNS (1), /* cost of an add instruction */
287 COSTS_N_INSNS (1), /* cost of a lea instruction */
288 COSTS_N_INSNS (3), /* variable shift costs */
289 COSTS_N_INSNS (2), /* constant shift costs */
290 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
291 COSTS_N_INSNS (12), /* HI */
292 COSTS_N_INSNS (12), /* SI */
293 COSTS_N_INSNS (12), /* DI */
294 COSTS_N_INSNS (12)}, /* other */
295 1, /* cost of multiply per each bit set */
296 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
297 COSTS_N_INSNS (40), /* HI */
298 COSTS_N_INSNS (40), /* SI */
299 COSTS_N_INSNS (40), /* DI */
300 COSTS_N_INSNS (40)}, /* other */
301 COSTS_N_INSNS (3), /* cost of movsx */
302 COSTS_N_INSNS (2), /* cost of movzx */
303 15, /* "large" insn */
304 3, /* MOVE_RATIO */
305 4, /* cost for loading QImode using movzbl */
306 {2, 4, 2}, /* cost of loading integer registers
307 in QImode, HImode and SImode.
308 Relative to reg-reg move (2). */
309 {2, 4, 2}, /* cost of storing integer registers */
310 2, /* cost of reg,reg fld/fst */
311 {8, 8, 8}, /* cost of loading fp registers
312 in SFmode, DFmode and XFmode */
313 {8, 8, 8}, /* cost of storing fp registers
314 in SFmode, DFmode and XFmode */
315 2, /* cost of moving MMX register */
316 {4, 8}, /* cost of loading MMX registers
317 in SImode and DImode */
318 {4, 8}, /* cost of storing MMX registers
319 in SImode and DImode */
320 2, /* cost of moving SSE register */
321 {4, 8, 16}, /* cost of loading SSE registers
322 in SImode, DImode and TImode */
323 {4, 8, 16}, /* cost of storing SSE registers
324 in SImode, DImode and TImode */
325 3, /* MMX or SSE register to integer */
326 4, /* size of l1 cache. 486 has 8kB cache
327 shared for code and data, so 4kB is
328 not really precise. */
329 4, /* size of l2 cache */
330 0, /* size of prefetch block */
331 0, /* number of parallel prefetches */
332 1, /* Branch cost */
333 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
334 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
335 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
336 COSTS_N_INSNS (3), /* cost of FABS instruction. */
337 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
338 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
339 i486_memcpy,
340 i486_memset,
341 1, /* scalar_stmt_cost. */
342 1, /* scalar load_cost. */
343 1, /* scalar_store_cost. */
344 1, /* vec_stmt_cost. */
345 1, /* vec_to_scalar_cost. */
346 1, /* scalar_to_vec_cost. */
347 1, /* vec_align_load_cost. */
348 2, /* vec_unalign_load_cost. */
349 1, /* vec_store_cost. */
350 3, /* cond_taken_branch_cost. */
351 1, /* cond_not_taken_branch_cost. */
354 static stringop_algs pentium_memcpy[2] = {
355 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
356 DUMMY_STRINGOP_ALGS};
357 static stringop_algs pentium_memset[2] = {
358 {libcall, {{-1, rep_prefix_4_byte, false}}},
359 DUMMY_STRINGOP_ALGS};
361 static const
362 struct processor_costs pentium_cost = {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (4), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (11), /* HI */
369 COSTS_N_INSNS (11), /* SI */
370 COSTS_N_INSNS (11), /* DI */
371 COSTS_N_INSNS (11)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (25), /* HI */
375 COSTS_N_INSNS (25), /* SI */
376 COSTS_N_INSNS (25), /* DI */
377 COSTS_N_INSNS (25)}, /* other */
378 COSTS_N_INSNS (3), /* cost of movsx */
379 COSTS_N_INSNS (2), /* cost of movzx */
380 8, /* "large" insn */
381 6, /* MOVE_RATIO */
382 6, /* cost for loading QImode using movzbl */
383 {2, 4, 2}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 4, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 8, /* cost of moving MMX register */
393 {8, 8}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {8, 8}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {4, 8, 16}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {4, 8, 16}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 8, /* size of l2 cache */
405 0, /* size of prefetch block */
406 0, /* number of parallel prefetches */
407 2, /* Branch cost */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (1), /* cost of FABS instruction. */
412 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
414 pentium_memcpy,
415 pentium_memset,
416 1, /* scalar_stmt_cost. */
417 1, /* scalar load_cost. */
418 1, /* scalar_store_cost. */
419 1, /* vec_stmt_cost. */
420 1, /* vec_to_scalar_cost. */
421 1, /* scalar_to_vec_cost. */
422 1, /* vec_align_load_cost. */
423 2, /* vec_unalign_load_cost. */
424 1, /* vec_store_cost. */
425 3, /* cond_taken_branch_cost. */
426 1, /* cond_not_taken_branch_cost. */
429 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
430 (we ensure the alignment). For small blocks inline loop is still a
431 noticeable win, for bigger blocks either rep movsl or rep movsb is
432 way to go. Rep movsb has apparently more expensive startup time in CPU,
433 but after 4K the difference is down in the noise. */
434 static stringop_algs pentiumpro_memcpy[2] = {
435 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
436 {8192, rep_prefix_4_byte, false},
437 {-1, rep_prefix_1_byte, false}}},
438 DUMMY_STRINGOP_ALGS};
439 static stringop_algs pentiumpro_memset[2] = {
440 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
441 {8192, rep_prefix_4_byte, false},
442 {-1, libcall, false}}},
443 DUMMY_STRINGOP_ALGS};
444 static const
445 struct processor_costs pentiumpro_cost = {
446 COSTS_N_INSNS (1), /* cost of an add instruction */
447 COSTS_N_INSNS (1), /* cost of a lea instruction */
448 COSTS_N_INSNS (1), /* variable shift costs */
449 COSTS_N_INSNS (1), /* constant shift costs */
450 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
451 COSTS_N_INSNS (4), /* HI */
452 COSTS_N_INSNS (4), /* SI */
453 COSTS_N_INSNS (4), /* DI */
454 COSTS_N_INSNS (4)}, /* other */
455 0, /* cost of multiply per each bit set */
456 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
457 COSTS_N_INSNS (17), /* HI */
458 COSTS_N_INSNS (17), /* SI */
459 COSTS_N_INSNS (17), /* DI */
460 COSTS_N_INSNS (17)}, /* other */
461 COSTS_N_INSNS (1), /* cost of movsx */
462 COSTS_N_INSNS (1), /* cost of movzx */
463 8, /* "large" insn */
464 6, /* MOVE_RATIO */
465 2, /* cost for loading QImode using movzbl */
466 {4, 4, 4}, /* cost of loading integer registers
467 in QImode, HImode and SImode.
468 Relative to reg-reg move (2). */
469 {2, 2, 2}, /* cost of storing integer registers */
470 2, /* cost of reg,reg fld/fst */
471 {2, 2, 6}, /* cost of loading fp registers
472 in SFmode, DFmode and XFmode */
473 {4, 4, 6}, /* cost of storing fp registers
474 in SFmode, DFmode and XFmode */
475 2, /* cost of moving MMX register */
476 {2, 2}, /* cost of loading MMX registers
477 in SImode and DImode */
478 {2, 2}, /* cost of storing MMX registers
479 in SImode and DImode */
480 2, /* cost of moving SSE register */
481 {2, 2, 8}, /* cost of loading SSE registers
482 in SImode, DImode and TImode */
483 {2, 2, 8}, /* cost of storing SSE registers
484 in SImode, DImode and TImode */
485 3, /* MMX or SSE register to integer */
486 8, /* size of l1 cache. */
487 256, /* size of l2 cache */
488 32, /* size of prefetch block */
489 6, /* number of parallel prefetches */
490 2, /* Branch cost */
491 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
492 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
493 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
494 COSTS_N_INSNS (2), /* cost of FABS instruction. */
495 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
496 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
497 pentiumpro_memcpy,
498 pentiumpro_memset,
499 1, /* scalar_stmt_cost. */
500 1, /* scalar load_cost. */
501 1, /* scalar_store_cost. */
502 1, /* vec_stmt_cost. */
503 1, /* vec_to_scalar_cost. */
504 1, /* scalar_to_vec_cost. */
505 1, /* vec_align_load_cost. */
506 2, /* vec_unalign_load_cost. */
507 1, /* vec_store_cost. */
508 3, /* cond_taken_branch_cost. */
509 1, /* cond_not_taken_branch_cost. */
512 static stringop_algs geode_memcpy[2] = {
513 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
514 DUMMY_STRINGOP_ALGS};
515 static stringop_algs geode_memset[2] = {
516 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
517 DUMMY_STRINGOP_ALGS};
518 static const
519 struct processor_costs geode_cost = {
520 COSTS_N_INSNS (1), /* cost of an add instruction */
521 COSTS_N_INSNS (1), /* cost of a lea instruction */
522 COSTS_N_INSNS (2), /* variable shift costs */
523 COSTS_N_INSNS (1), /* constant shift costs */
524 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
525 COSTS_N_INSNS (4), /* HI */
526 COSTS_N_INSNS (7), /* SI */
527 COSTS_N_INSNS (7), /* DI */
528 COSTS_N_INSNS (7)}, /* other */
529 0, /* cost of multiply per each bit set */
530 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
531 COSTS_N_INSNS (23), /* HI */
532 COSTS_N_INSNS (39), /* SI */
533 COSTS_N_INSNS (39), /* DI */
534 COSTS_N_INSNS (39)}, /* other */
535 COSTS_N_INSNS (1), /* cost of movsx */
536 COSTS_N_INSNS (1), /* cost of movzx */
537 8, /* "large" insn */
538 4, /* MOVE_RATIO */
539 1, /* cost for loading QImode using movzbl */
540 {1, 1, 1}, /* cost of loading integer registers
541 in QImode, HImode and SImode.
542 Relative to reg-reg move (2). */
543 {1, 1, 1}, /* cost of storing integer registers */
544 1, /* cost of reg,reg fld/fst */
545 {1, 1, 1}, /* cost of loading fp registers
546 in SFmode, DFmode and XFmode */
547 {4, 6, 6}, /* cost of storing fp registers
548 in SFmode, DFmode and XFmode */
550 1, /* cost of moving MMX register */
551 {1, 1}, /* cost of loading MMX registers
552 in SImode and DImode */
553 {1, 1}, /* cost of storing MMX registers
554 in SImode and DImode */
555 1, /* cost of moving SSE register */
556 {1, 1, 1}, /* cost of loading SSE registers
557 in SImode, DImode and TImode */
558 {1, 1, 1}, /* cost of storing SSE registers
559 in SImode, DImode and TImode */
560 1, /* MMX or SSE register to integer */
561 64, /* size of l1 cache. */
562 128, /* size of l2 cache. */
563 32, /* size of prefetch block */
564 1, /* number of parallel prefetches */
565 1, /* Branch cost */
566 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
567 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
568 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
569 COSTS_N_INSNS (1), /* cost of FABS instruction. */
570 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
571 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
572 geode_memcpy,
573 geode_memset,
574 1, /* scalar_stmt_cost. */
575 1, /* scalar load_cost. */
576 1, /* scalar_store_cost. */
577 1, /* vec_stmt_cost. */
578 1, /* vec_to_scalar_cost. */
579 1, /* scalar_to_vec_cost. */
580 1, /* vec_align_load_cost. */
581 2, /* vec_unalign_load_cost. */
582 1, /* vec_store_cost. */
583 3, /* cond_taken_branch_cost. */
584 1, /* cond_not_taken_branch_cost. */
587 static stringop_algs k6_memcpy[2] = {
588 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
589 DUMMY_STRINGOP_ALGS};
590 static stringop_algs k6_memset[2] = {
591 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
592 DUMMY_STRINGOP_ALGS};
593 static const
594 struct processor_costs k6_cost = {
595 COSTS_N_INSNS (1), /* cost of an add instruction */
596 COSTS_N_INSNS (2), /* cost of a lea instruction */
597 COSTS_N_INSNS (1), /* variable shift costs */
598 COSTS_N_INSNS (1), /* constant shift costs */
599 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
600 COSTS_N_INSNS (3), /* HI */
601 COSTS_N_INSNS (3), /* SI */
602 COSTS_N_INSNS (3), /* DI */
603 COSTS_N_INSNS (3)}, /* other */
604 0, /* cost of multiply per each bit set */
605 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
606 COSTS_N_INSNS (18), /* HI */
607 COSTS_N_INSNS (18), /* SI */
608 COSTS_N_INSNS (18), /* DI */
609 COSTS_N_INSNS (18)}, /* other */
610 COSTS_N_INSNS (2), /* cost of movsx */
611 COSTS_N_INSNS (2), /* cost of movzx */
612 8, /* "large" insn */
613 4, /* MOVE_RATIO */
614 3, /* cost for loading QImode using movzbl */
615 {4, 5, 4}, /* cost of loading integer registers
616 in QImode, HImode and SImode.
617 Relative to reg-reg move (2). */
618 {2, 3, 2}, /* cost of storing integer registers */
619 4, /* cost of reg,reg fld/fst */
620 {6, 6, 6}, /* cost of loading fp registers
621 in SFmode, DFmode and XFmode */
622 {4, 4, 4}, /* cost of storing fp registers
623 in SFmode, DFmode and XFmode */
624 2, /* cost of moving MMX register */
625 {2, 2}, /* cost of loading MMX registers
626 in SImode and DImode */
627 {2, 2}, /* cost of storing MMX registers
628 in SImode and DImode */
629 2, /* cost of moving SSE register */
630 {2, 2, 8}, /* cost of loading SSE registers
631 in SImode, DImode and TImode */
632 {2, 2, 8}, /* cost of storing SSE registers
633 in SImode, DImode and TImode */
634 6, /* MMX or SSE register to integer */
635 32, /* size of l1 cache. */
636 32, /* size of l2 cache. Some models
637 have integrated l2 cache, but
638 optimizing for k6 is not important
639 enough to worry about that. */
640 32, /* size of prefetch block */
641 1, /* number of parallel prefetches */
642 1, /* Branch cost */
643 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
644 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
645 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
646 COSTS_N_INSNS (2), /* cost of FABS instruction. */
647 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
648 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
649 k6_memcpy,
650 k6_memset,
651 1, /* scalar_stmt_cost. */
652 1, /* scalar load_cost. */
653 1, /* scalar_store_cost. */
654 1, /* vec_stmt_cost. */
655 1, /* vec_to_scalar_cost. */
656 1, /* scalar_to_vec_cost. */
657 1, /* vec_align_load_cost. */
658 2, /* vec_unalign_load_cost. */
659 1, /* vec_store_cost. */
660 3, /* cond_taken_branch_cost. */
661 1, /* cond_not_taken_branch_cost. */
664 /* For some reason, Athlon deals better with REP prefix (relative to loops)
665 compared to K8. Alignment becomes important after 8 bytes for memcpy and
666 128 bytes for memset. */
667 static stringop_algs athlon_memcpy[2] = {
668 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
669 DUMMY_STRINGOP_ALGS};
670 static stringop_algs athlon_memset[2] = {
671 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
672 DUMMY_STRINGOP_ALGS};
673 static const
674 struct processor_costs athlon_cost = {
675 COSTS_N_INSNS (1), /* cost of an add instruction */
676 COSTS_N_INSNS (2), /* cost of a lea instruction */
677 COSTS_N_INSNS (1), /* variable shift costs */
678 COSTS_N_INSNS (1), /* constant shift costs */
679 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
680 COSTS_N_INSNS (5), /* HI */
681 COSTS_N_INSNS (5), /* SI */
682 COSTS_N_INSNS (5), /* DI */
683 COSTS_N_INSNS (5)}, /* other */
684 0, /* cost of multiply per each bit set */
685 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
686 COSTS_N_INSNS (26), /* HI */
687 COSTS_N_INSNS (42), /* SI */
688 COSTS_N_INSNS (74), /* DI */
689 COSTS_N_INSNS (74)}, /* other */
690 COSTS_N_INSNS (1), /* cost of movsx */
691 COSTS_N_INSNS (1), /* cost of movzx */
692 8, /* "large" insn */
693 9, /* MOVE_RATIO */
694 4, /* cost for loading QImode using movzbl */
695 {3, 4, 3}, /* cost of loading integer registers
696 in QImode, HImode and SImode.
697 Relative to reg-reg move (2). */
698 {3, 4, 3}, /* cost of storing integer registers */
699 4, /* cost of reg,reg fld/fst */
700 {4, 4, 12}, /* cost of loading fp registers
701 in SFmode, DFmode and XFmode */
702 {6, 6, 8}, /* cost of storing fp registers
703 in SFmode, DFmode and XFmode */
704 2, /* cost of moving MMX register */
705 {4, 4}, /* cost of loading MMX registers
706 in SImode and DImode */
707 {4, 4}, /* cost of storing MMX registers
708 in SImode and DImode */
709 2, /* cost of moving SSE register */
710 {4, 4, 6}, /* cost of loading SSE registers
711 in SImode, DImode and TImode */
712 {4, 4, 5}, /* cost of storing SSE registers
713 in SImode, DImode and TImode */
714 5, /* MMX or SSE register to integer */
715 64, /* size of l1 cache. */
716 256, /* size of l2 cache. */
717 64, /* size of prefetch block */
718 6, /* number of parallel prefetches */
719 5, /* Branch cost */
720 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
721 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
722 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
723 COSTS_N_INSNS (2), /* cost of FABS instruction. */
724 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
725 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
726 athlon_memcpy,
727 athlon_memset,
728 1, /* scalar_stmt_cost. */
729 1, /* scalar load_cost. */
730 1, /* scalar_store_cost. */
731 1, /* vec_stmt_cost. */
732 1, /* vec_to_scalar_cost. */
733 1, /* scalar_to_vec_cost. */
734 1, /* vec_align_load_cost. */
735 2, /* vec_unalign_load_cost. */
736 1, /* vec_store_cost. */
737 3, /* cond_taken_branch_cost. */
738 1, /* cond_not_taken_branch_cost. */
741 /* K8 has optimized REP instruction for medium sized blocks, but for very
742 small blocks it is better to use loop. For large blocks, libcall can
743 do nontemporary accesses and beat inline considerably. */
744 static stringop_algs k8_memcpy[2] = {
745 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
746 {-1, rep_prefix_4_byte, false}}},
747 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
748 {-1, libcall, false}}}};
749 static stringop_algs k8_memset[2] = {
750 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
751 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
752 {libcall, {{48, unrolled_loop, false},
753 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
754 static const
755 struct processor_costs k8_cost = {
756 COSTS_N_INSNS (1), /* cost of an add instruction */
757 COSTS_N_INSNS (2), /* cost of a lea instruction */
758 COSTS_N_INSNS (1), /* variable shift costs */
759 COSTS_N_INSNS (1), /* constant shift costs */
760 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
761 COSTS_N_INSNS (4), /* HI */
762 COSTS_N_INSNS (3), /* SI */
763 COSTS_N_INSNS (4), /* DI */
764 COSTS_N_INSNS (5)}, /* other */
765 0, /* cost of multiply per each bit set */
766 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
767 COSTS_N_INSNS (26), /* HI */
768 COSTS_N_INSNS (42), /* SI */
769 COSTS_N_INSNS (74), /* DI */
770 COSTS_N_INSNS (74)}, /* other */
771 COSTS_N_INSNS (1), /* cost of movsx */
772 COSTS_N_INSNS (1), /* cost of movzx */
773 8, /* "large" insn */
774 9, /* MOVE_RATIO */
775 4, /* cost for loading QImode using movzbl */
776 {3, 4, 3}, /* cost of loading integer registers
777 in QImode, HImode and SImode.
778 Relative to reg-reg move (2). */
779 {3, 4, 3}, /* cost of storing integer registers */
780 4, /* cost of reg,reg fld/fst */
781 {4, 4, 12}, /* cost of loading fp registers
782 in SFmode, DFmode and XFmode */
783 {6, 6, 8}, /* cost of storing fp registers
784 in SFmode, DFmode and XFmode */
785 2, /* cost of moving MMX register */
786 {3, 3}, /* cost of loading MMX registers
787 in SImode and DImode */
788 {4, 4}, /* cost of storing MMX registers
789 in SImode and DImode */
790 2, /* cost of moving SSE register */
791 {4, 3, 6}, /* cost of loading SSE registers
792 in SImode, DImode and TImode */
793 {4, 4, 5}, /* cost of storing SSE registers
794 in SImode, DImode and TImode */
795 5, /* MMX or SSE register to integer */
796 64, /* size of l1 cache. */
797 512, /* size of l2 cache. */
798 64, /* size of prefetch block */
799 /* New AMD processors never drop prefetches; if they cannot be performed
800 immediately, they are queued. We set number of simultaneous prefetches
801 to a large constant to reflect this (it probably is not a good idea not
802 to limit number of prefetches at all, as their execution also takes some
803 time). */
804 100, /* number of parallel prefetches */
805 3, /* Branch cost */
806 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
807 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
808 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
809 COSTS_N_INSNS (2), /* cost of FABS instruction. */
810 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
811 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
813 k8_memcpy,
814 k8_memset,
815 4, /* scalar_stmt_cost. */
816 2, /* scalar load_cost. */
817 2, /* scalar_store_cost. */
818 5, /* vec_stmt_cost. */
819 0, /* vec_to_scalar_cost. */
820 2, /* scalar_to_vec_cost. */
821 2, /* vec_align_load_cost. */
822 3, /* vec_unalign_load_cost. */
823 3, /* vec_store_cost. */
824 3, /* cond_taken_branch_cost. */
825 2, /* cond_not_taken_branch_cost. */
828 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
829 very small blocks it is better to use loop. For large blocks, libcall can
830 do nontemporary accesses and beat inline considerably. */
831 static stringop_algs amdfam10_memcpy[2] = {
832 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
833 {-1, rep_prefix_4_byte, false}}},
834 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
835 {-1, libcall, false}}}};
836 static stringop_algs amdfam10_memset[2] = {
837 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
838 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
839 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
840 {-1, libcall, false}}}};
841 struct processor_costs amdfam10_cost = {
842 COSTS_N_INSNS (1), /* cost of an add instruction */
843 COSTS_N_INSNS (2), /* cost of a lea instruction */
844 COSTS_N_INSNS (1), /* variable shift costs */
845 COSTS_N_INSNS (1), /* constant shift costs */
846 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
847 COSTS_N_INSNS (4), /* HI */
848 COSTS_N_INSNS (3), /* SI */
849 COSTS_N_INSNS (4), /* DI */
850 COSTS_N_INSNS (5)}, /* other */
851 0, /* cost of multiply per each bit set */
852 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
853 COSTS_N_INSNS (35), /* HI */
854 COSTS_N_INSNS (51), /* SI */
855 COSTS_N_INSNS (83), /* DI */
856 COSTS_N_INSNS (83)}, /* other */
857 COSTS_N_INSNS (1), /* cost of movsx */
858 COSTS_N_INSNS (1), /* cost of movzx */
859 8, /* "large" insn */
860 9, /* MOVE_RATIO */
861 4, /* cost for loading QImode using movzbl */
862 {3, 4, 3}, /* cost of loading integer registers
863 in QImode, HImode and SImode.
864 Relative to reg-reg move (2). */
865 {3, 4, 3}, /* cost of storing integer registers */
866 4, /* cost of reg,reg fld/fst */
867 {4, 4, 12}, /* cost of loading fp registers
868 in SFmode, DFmode and XFmode */
869 {6, 6, 8}, /* cost of storing fp registers
870 in SFmode, DFmode and XFmode */
871 2, /* cost of moving MMX register */
872 {3, 3}, /* cost of loading MMX registers
873 in SImode and DImode */
874 {4, 4}, /* cost of storing MMX registers
875 in SImode and DImode */
876 2, /* cost of moving SSE register */
877 {4, 4, 3}, /* cost of loading SSE registers
878 in SImode, DImode and TImode */
879 {4, 4, 5}, /* cost of storing SSE registers
880 in SImode, DImode and TImode */
881 3, /* MMX or SSE register to integer */
882 /* On K8:
883 MOVD reg64, xmmreg Double FSTORE 4
884 MOVD reg32, xmmreg Double FSTORE 4
885 On AMDFAM10:
886 MOVD reg64, xmmreg Double FADD 3
887 1/1 1/1
888 MOVD reg32, xmmreg Double FADD 3
889 1/1 1/1 */
890 64, /* size of l1 cache. */
891 512, /* size of l2 cache. */
892 64, /* size of prefetch block */
893 /* New AMD processors never drop prefetches; if they cannot be performed
894 immediately, they are queued. We set number of simultaneous prefetches
895 to a large constant to reflect this (it probably is not a good idea not
896 to limit number of prefetches at all, as their execution also takes some
897 time). */
898 100, /* number of parallel prefetches */
899 2, /* Branch cost */
900 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
901 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
902 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
903 COSTS_N_INSNS (2), /* cost of FABS instruction. */
904 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
905 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
907 amdfam10_memcpy,
908 amdfam10_memset,
909 4, /* scalar_stmt_cost. */
910 2, /* scalar load_cost. */
911 2, /* scalar_store_cost. */
912 6, /* vec_stmt_cost. */
913 0, /* vec_to_scalar_cost. */
914 2, /* scalar_to_vec_cost. */
915 2, /* vec_align_load_cost. */
916 2, /* vec_unalign_load_cost. */
917 2, /* vec_store_cost. */
918 2, /* cond_taken_branch_cost. */
919 1, /* cond_not_taken_branch_cost. */
922 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
923 very small blocks it is better to use loop. For large blocks, libcall
924 can do nontemporary accesses and beat inline considerably. */
925 static stringop_algs bdver1_memcpy[2] = {
926 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
927 {-1, rep_prefix_4_byte, false}}},
928 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
929 {-1, libcall, false}}}};
930 static stringop_algs bdver1_memset[2] = {
931 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
932 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
933 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
934 {-1, libcall, false}}}};
936 const struct processor_costs bdver1_cost = {
937 COSTS_N_INSNS (1), /* cost of an add instruction */
938 COSTS_N_INSNS (1), /* cost of a lea instruction */
939 COSTS_N_INSNS (1), /* variable shift costs */
940 COSTS_N_INSNS (1), /* constant shift costs */
941 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
942 COSTS_N_INSNS (4), /* HI */
943 COSTS_N_INSNS (4), /* SI */
944 COSTS_N_INSNS (6), /* DI */
945 COSTS_N_INSNS (6)}, /* other */
946 0, /* cost of multiply per each bit set */
947 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
948 COSTS_N_INSNS (35), /* HI */
949 COSTS_N_INSNS (51), /* SI */
950 COSTS_N_INSNS (83), /* DI */
951 COSTS_N_INSNS (83)}, /* other */
952 COSTS_N_INSNS (1), /* cost of movsx */
953 COSTS_N_INSNS (1), /* cost of movzx */
954 8, /* "large" insn */
955 9, /* MOVE_RATIO */
956 4, /* cost for loading QImode using movzbl */
957 {5, 5, 4}, /* cost of loading integer registers
958 in QImode, HImode and SImode.
959 Relative to reg-reg move (2). */
960 {4, 4, 4}, /* cost of storing integer registers */
961 2, /* cost of reg,reg fld/fst */
962 {5, 5, 12}, /* cost of loading fp registers
963 in SFmode, DFmode and XFmode */
964 {4, 4, 8}, /* cost of storing fp registers
965 in SFmode, DFmode and XFmode */
966 2, /* cost of moving MMX register */
967 {4, 4}, /* cost of loading MMX registers
968 in SImode and DImode */
969 {4, 4}, /* cost of storing MMX registers
970 in SImode and DImode */
971 2, /* cost of moving SSE register */
972 {4, 4, 4}, /* cost of loading SSE registers
973 in SImode, DImode and TImode */
974 {4, 4, 4}, /* cost of storing SSE registers
975 in SImode, DImode and TImode */
976 2, /* MMX or SSE register to integer */
977 /* On K8:
978 MOVD reg64, xmmreg Double FSTORE 4
979 MOVD reg32, xmmreg Double FSTORE 4
980 On AMDFAM10:
981 MOVD reg64, xmmreg Double FADD 3
982 1/1 1/1
983 MOVD reg32, xmmreg Double FADD 3
984 1/1 1/1 */
985 16, /* size of l1 cache. */
986 2048, /* size of l2 cache. */
987 64, /* size of prefetch block */
988 /* New AMD processors never drop prefetches; if they cannot be performed
989 immediately, they are queued. We set number of simultaneous prefetches
990 to a large constant to reflect this (it probably is not a good idea not
991 to limit number of prefetches at all, as their execution also takes some
992 time). */
993 100, /* number of parallel prefetches */
994 2, /* Branch cost */
995 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
996 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
997 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
998 COSTS_N_INSNS (2), /* cost of FABS instruction. */
999 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1000 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1002 bdver1_memcpy,
1003 bdver1_memset,
1004 6, /* scalar_stmt_cost. */
1005 4, /* scalar load_cost. */
1006 4, /* scalar_store_cost. */
1007 6, /* vec_stmt_cost. */
1008 0, /* vec_to_scalar_cost. */
1009 2, /* scalar_to_vec_cost. */
1010 4, /* vec_align_load_cost. */
1011 4, /* vec_unalign_load_cost. */
1012 4, /* vec_store_cost. */
1013 4, /* cond_taken_branch_cost. */
1014 2, /* cond_not_taken_branch_cost. */
1017 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1018 very small blocks it is better to use loop. For large blocks, libcall
1019 can do nontemporary accesses and beat inline considerably. */
1021 static stringop_algs bdver2_memcpy[2] = {
1022 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1023 {-1, rep_prefix_4_byte, false}}},
1024 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1025 {-1, libcall, false}}}};
1026 static stringop_algs bdver2_memset[2] = {
1027 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1028 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1029 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1030 {-1, libcall, false}}}};
1032 const struct processor_costs bdver2_cost = {
1033 COSTS_N_INSNS (1), /* cost of an add instruction */
1034 COSTS_N_INSNS (1), /* cost of a lea instruction */
1035 COSTS_N_INSNS (1), /* variable shift costs */
1036 COSTS_N_INSNS (1), /* constant shift costs */
1037 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1038 COSTS_N_INSNS (4), /* HI */
1039 COSTS_N_INSNS (4), /* SI */
1040 COSTS_N_INSNS (6), /* DI */
1041 COSTS_N_INSNS (6)}, /* other */
1042 0, /* cost of multiply per each bit set */
1043 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1044 COSTS_N_INSNS (35), /* HI */
1045 COSTS_N_INSNS (51), /* SI */
1046 COSTS_N_INSNS (83), /* DI */
1047 COSTS_N_INSNS (83)}, /* other */
1048 COSTS_N_INSNS (1), /* cost of movsx */
1049 COSTS_N_INSNS (1), /* cost of movzx */
1050 8, /* "large" insn */
1051 9, /* MOVE_RATIO */
1052 4, /* cost for loading QImode using movzbl */
1053 {5, 5, 4}, /* cost of loading integer registers
1054 in QImode, HImode and SImode.
1055 Relative to reg-reg move (2). */
1056 {4, 4, 4}, /* cost of storing integer registers */
1057 2, /* cost of reg,reg fld/fst */
1058 {5, 5, 12}, /* cost of loading fp registers
1059 in SFmode, DFmode and XFmode */
1060 {4, 4, 8}, /* cost of storing fp registers
1061 in SFmode, DFmode and XFmode */
1062 2, /* cost of moving MMX register */
1063 {4, 4}, /* cost of loading MMX registers
1064 in SImode and DImode */
1065 {4, 4}, /* cost of storing MMX registers
1066 in SImode and DImode */
1067 2, /* cost of moving SSE register */
1068 {4, 4, 4}, /* cost of loading SSE registers
1069 in SImode, DImode and TImode */
1070 {4, 4, 4}, /* cost of storing SSE registers
1071 in SImode, DImode and TImode */
1072 2, /* MMX or SSE register to integer */
1073 /* On K8:
1074 MOVD reg64, xmmreg Double FSTORE 4
1075 MOVD reg32, xmmreg Double FSTORE 4
1076 On AMDFAM10:
1077 MOVD reg64, xmmreg Double FADD 3
1078 1/1 1/1
1079 MOVD reg32, xmmreg Double FADD 3
1080 1/1 1/1 */
1081 16, /* size of l1 cache. */
1082 2048, /* size of l2 cache. */
1083 64, /* size of prefetch block */
1084 /* New AMD processors never drop prefetches; if they cannot be performed
1085 immediately, they are queued. We set number of simultaneous prefetches
1086 to a large constant to reflect this (it probably is not a good idea not
1087 to limit number of prefetches at all, as their execution also takes some
1088 time). */
1089 100, /* number of parallel prefetches */
1090 2, /* Branch cost */
1091 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1092 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1093 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1094 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1095 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1096 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1098 bdver2_memcpy,
1099 bdver2_memset,
1100 6, /* scalar_stmt_cost. */
1101 4, /* scalar load_cost. */
1102 4, /* scalar_store_cost. */
1103 6, /* vec_stmt_cost. */
1104 0, /* vec_to_scalar_cost. */
1105 2, /* scalar_to_vec_cost. */
1106 4, /* vec_align_load_cost. */
1107 4, /* vec_unalign_load_cost. */
1108 4, /* vec_store_cost. */
1109 4, /* cond_taken_branch_cost. */
1110 2, /* cond_not_taken_branch_cost. */
1114 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1115 very small blocks it is better to use loop. For large blocks, libcall
1116 can do nontemporary accesses and beat inline considerably. */
1117 static stringop_algs bdver3_memcpy[2] = {
1118 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1119 {-1, rep_prefix_4_byte, false}}},
1120 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1121 {-1, libcall, false}}}};
1122 static stringop_algs bdver3_memset[2] = {
1123 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1124 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1125 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1126 {-1, libcall, false}}}};
1127 struct processor_costs bdver3_cost = {
1128 COSTS_N_INSNS (1), /* cost of an add instruction */
1129 COSTS_N_INSNS (1), /* cost of a lea instruction */
1130 COSTS_N_INSNS (1), /* variable shift costs */
1131 COSTS_N_INSNS (1), /* constant shift costs */
1132 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1133 COSTS_N_INSNS (4), /* HI */
1134 COSTS_N_INSNS (4), /* SI */
1135 COSTS_N_INSNS (6), /* DI */
1136 COSTS_N_INSNS (6)}, /* other */
1137 0, /* cost of multiply per each bit set */
1138 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1139 COSTS_N_INSNS (35), /* HI */
1140 COSTS_N_INSNS (51), /* SI */
1141 COSTS_N_INSNS (83), /* DI */
1142 COSTS_N_INSNS (83)}, /* other */
1143 COSTS_N_INSNS (1), /* cost of movsx */
1144 COSTS_N_INSNS (1), /* cost of movzx */
1145 8, /* "large" insn */
1146 9, /* MOVE_RATIO */
1147 4, /* cost for loading QImode using movzbl */
1148 {5, 5, 4}, /* cost of loading integer registers
1149 in QImode, HImode and SImode.
1150 Relative to reg-reg move (2). */
1151 {4, 4, 4}, /* cost of storing integer registers */
1152 2, /* cost of reg,reg fld/fst */
1153 {5, 5, 12}, /* cost of loading fp registers
1154 in SFmode, DFmode and XFmode */
1155 {4, 4, 8}, /* cost of storing fp registers
1156 in SFmode, DFmode and XFmode */
1157 2, /* cost of moving MMX register */
1158 {4, 4}, /* cost of loading MMX registers
1159 in SImode and DImode */
1160 {4, 4}, /* cost of storing MMX registers
1161 in SImode and DImode */
1162 2, /* cost of moving SSE register */
1163 {4, 4, 4}, /* cost of loading SSE registers
1164 in SImode, DImode and TImode */
1165 {4, 4, 4}, /* cost of storing SSE registers
1166 in SImode, DImode and TImode */
1167 2, /* MMX or SSE register to integer */
1168 16, /* size of l1 cache. */
1169 2048, /* size of l2 cache. */
1170 64, /* size of prefetch block */
1171 /* New AMD processors never drop prefetches; if they cannot be performed
1172 immediately, they are queued. We set number of simultaneous prefetches
1173 to a large constant to reflect this (it probably is not a good idea not
1174 to limit number of prefetches at all, as their execution also takes some
1175 time). */
1176 100, /* number of parallel prefetches */
1177 2, /* Branch cost */
1178 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1179 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1180 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1181 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1182 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1183 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1185 bdver3_memcpy,
1186 bdver3_memset,
1187 6, /* scalar_stmt_cost. */
1188 4, /* scalar load_cost. */
1189 4, /* scalar_store_cost. */
1190 6, /* vec_stmt_cost. */
1191 0, /* vec_to_scalar_cost. */
1192 2, /* scalar_to_vec_cost. */
1193 4, /* vec_align_load_cost. */
1194 4, /* vec_unalign_load_cost. */
1195 4, /* vec_store_cost. */
1196 4, /* cond_taken_branch_cost. */
1197 2, /* cond_not_taken_branch_cost. */
1200 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1201 very small blocks it is better to use loop. For large blocks, libcall
1202 can do nontemporary accesses and beat inline considerably. */
1203 static stringop_algs bdver4_memcpy[2] = {
1204 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1205 {-1, rep_prefix_4_byte, false}}},
1206 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1207 {-1, libcall, false}}}};
1208 static stringop_algs bdver4_memset[2] = {
1209 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1210 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1211 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1212 {-1, libcall, false}}}};
1213 struct processor_costs bdver4_cost = {
1214 COSTS_N_INSNS (1), /* cost of an add instruction */
1215 COSTS_N_INSNS (1), /* cost of a lea instruction */
1216 COSTS_N_INSNS (1), /* variable shift costs */
1217 COSTS_N_INSNS (1), /* constant shift costs */
1218 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1219 COSTS_N_INSNS (4), /* HI */
1220 COSTS_N_INSNS (4), /* SI */
1221 COSTS_N_INSNS (6), /* DI */
1222 COSTS_N_INSNS (6)}, /* other */
1223 0, /* cost of multiply per each bit set */
1224 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1225 COSTS_N_INSNS (35), /* HI */
1226 COSTS_N_INSNS (51), /* SI */
1227 COSTS_N_INSNS (83), /* DI */
1228 COSTS_N_INSNS (83)}, /* other */
1229 COSTS_N_INSNS (1), /* cost of movsx */
1230 COSTS_N_INSNS (1), /* cost of movzx */
1231 8, /* "large" insn */
1232 9, /* MOVE_RATIO */
1233 4, /* cost for loading QImode using movzbl */
1234 {5, 5, 4}, /* cost of loading integer registers
1235 in QImode, HImode and SImode.
1236 Relative to reg-reg move (2). */
1237 {4, 4, 4}, /* cost of storing integer registers */
1238 2, /* cost of reg,reg fld/fst */
1239 {5, 5, 12}, /* cost of loading fp registers
1240 in SFmode, DFmode and XFmode */
1241 {4, 4, 8}, /* cost of storing fp registers
1242 in SFmode, DFmode and XFmode */
1243 2, /* cost of moving MMX register */
1244 {4, 4}, /* cost of loading MMX registers
1245 in SImode and DImode */
1246 {4, 4}, /* cost of storing MMX registers
1247 in SImode and DImode */
1248 2, /* cost of moving SSE register */
1249 {4, 4, 4}, /* cost of loading SSE registers
1250 in SImode, DImode and TImode */
1251 {4, 4, 4}, /* cost of storing SSE registers
1252 in SImode, DImode and TImode */
1253 2, /* MMX or SSE register to integer */
1254 16, /* size of l1 cache. */
1255 2048, /* size of l2 cache. */
1256 64, /* size of prefetch block */
1257 /* New AMD processors never drop prefetches; if they cannot be performed
1258 immediately, they are queued. We set number of simultaneous prefetches
1259 to a large constant to reflect this (it probably is not a good idea not
1260 to limit number of prefetches at all, as their execution also takes some
1261 time). */
1262 100, /* number of parallel prefetches */
1263 2, /* Branch cost */
1264 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1265 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1266 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1267 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1268 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1269 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1271 bdver4_memcpy,
1272 bdver4_memset,
1273 6, /* scalar_stmt_cost. */
1274 4, /* scalar load_cost. */
1275 4, /* scalar_store_cost. */
1276 6, /* vec_stmt_cost. */
1277 0, /* vec_to_scalar_cost. */
1278 2, /* scalar_to_vec_cost. */
1279 4, /* vec_align_load_cost. */
1280 4, /* vec_unalign_load_cost. */
1281 4, /* vec_store_cost. */
1282 4, /* cond_taken_branch_cost. */
1283 2, /* cond_not_taken_branch_cost. */
1286 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1287 very small blocks it is better to use loop. For large blocks, libcall can
1288 do nontemporary accesses and beat inline considerably. */
1289 static stringop_algs btver1_memcpy[2] = {
1290 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1291 {-1, rep_prefix_4_byte, false}}},
1292 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1293 {-1, libcall, false}}}};
1294 static stringop_algs btver1_memset[2] = {
1295 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1296 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1297 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1298 {-1, libcall, false}}}};
1299 const struct processor_costs btver1_cost = {
1300 COSTS_N_INSNS (1), /* cost of an add instruction */
1301 COSTS_N_INSNS (2), /* cost of a lea instruction */
1302 COSTS_N_INSNS (1), /* variable shift costs */
1303 COSTS_N_INSNS (1), /* constant shift costs */
1304 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1305 COSTS_N_INSNS (4), /* HI */
1306 COSTS_N_INSNS (3), /* SI */
1307 COSTS_N_INSNS (4), /* DI */
1308 COSTS_N_INSNS (5)}, /* other */
1309 0, /* cost of multiply per each bit set */
1310 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1311 COSTS_N_INSNS (35), /* HI */
1312 COSTS_N_INSNS (51), /* SI */
1313 COSTS_N_INSNS (83), /* DI */
1314 COSTS_N_INSNS (83)}, /* other */
1315 COSTS_N_INSNS (1), /* cost of movsx */
1316 COSTS_N_INSNS (1), /* cost of movzx */
1317 8, /* "large" insn */
1318 9, /* MOVE_RATIO */
1319 4, /* cost for loading QImode using movzbl */
1320 {3, 4, 3}, /* cost of loading integer registers
1321 in QImode, HImode and SImode.
1322 Relative to reg-reg move (2). */
1323 {3, 4, 3}, /* cost of storing integer registers */
1324 4, /* cost of reg,reg fld/fst */
1325 {4, 4, 12}, /* cost of loading fp registers
1326 in SFmode, DFmode and XFmode */
1327 {6, 6, 8}, /* cost of storing fp registers
1328 in SFmode, DFmode and XFmode */
1329 2, /* cost of moving MMX register */
1330 {3, 3}, /* cost of loading MMX registers
1331 in SImode and DImode */
1332 {4, 4}, /* cost of storing MMX registers
1333 in SImode and DImode */
1334 2, /* cost of moving SSE register */
1335 {4, 4, 3}, /* cost of loading SSE registers
1336 in SImode, DImode and TImode */
1337 {4, 4, 5}, /* cost of storing SSE registers
1338 in SImode, DImode and TImode */
1339 3, /* MMX or SSE register to integer */
1340 /* On K8:
1341 MOVD reg64, xmmreg Double FSTORE 4
1342 MOVD reg32, xmmreg Double FSTORE 4
1343 On AMDFAM10:
1344 MOVD reg64, xmmreg Double FADD 3
1345 1/1 1/1
1346 MOVD reg32, xmmreg Double FADD 3
1347 1/1 1/1 */
1348 32, /* size of l1 cache. */
1349 512, /* size of l2 cache. */
1350 64, /* size of prefetch block */
1351 100, /* number of parallel prefetches */
1352 2, /* Branch cost */
1353 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1354 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1355 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1356 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1357 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1358 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1360 btver1_memcpy,
1361 btver1_memset,
1362 4, /* scalar_stmt_cost. */
1363 2, /* scalar load_cost. */
1364 2, /* scalar_store_cost. */
1365 6, /* vec_stmt_cost. */
1366 0, /* vec_to_scalar_cost. */
1367 2, /* scalar_to_vec_cost. */
1368 2, /* vec_align_load_cost. */
1369 2, /* vec_unalign_load_cost. */
1370 2, /* vec_store_cost. */
1371 2, /* cond_taken_branch_cost. */
1372 1, /* cond_not_taken_branch_cost. */
1375 static stringop_algs btver2_memcpy[2] = {
1376 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1377 {-1, rep_prefix_4_byte, false}}},
1378 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1379 {-1, libcall, false}}}};
1380 static stringop_algs btver2_memset[2] = {
1381 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1382 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1383 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1384 {-1, libcall, false}}}};
1385 const struct processor_costs btver2_cost = {
1386 COSTS_N_INSNS (1), /* cost of an add instruction */
1387 COSTS_N_INSNS (2), /* cost of a lea instruction */
1388 COSTS_N_INSNS (1), /* variable shift costs */
1389 COSTS_N_INSNS (1), /* constant shift costs */
1390 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1391 COSTS_N_INSNS (4), /* HI */
1392 COSTS_N_INSNS (3), /* SI */
1393 COSTS_N_INSNS (4), /* DI */
1394 COSTS_N_INSNS (5)}, /* other */
1395 0, /* cost of multiply per each bit set */
1396 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1397 COSTS_N_INSNS (35), /* HI */
1398 COSTS_N_INSNS (51), /* SI */
1399 COSTS_N_INSNS (83), /* DI */
1400 COSTS_N_INSNS (83)}, /* other */
1401 COSTS_N_INSNS (1), /* cost of movsx */
1402 COSTS_N_INSNS (1), /* cost of movzx */
1403 8, /* "large" insn */
1404 9, /* MOVE_RATIO */
1405 4, /* cost for loading QImode using movzbl */
1406 {3, 4, 3}, /* cost of loading integer registers
1407 in QImode, HImode and SImode.
1408 Relative to reg-reg move (2). */
1409 {3, 4, 3}, /* cost of storing integer registers */
1410 4, /* cost of reg,reg fld/fst */
1411 {4, 4, 12}, /* cost of loading fp registers
1412 in SFmode, DFmode and XFmode */
1413 {6, 6, 8}, /* cost of storing fp registers
1414 in SFmode, DFmode and XFmode */
1415 2, /* cost of moving MMX register */
1416 {3, 3}, /* cost of loading MMX registers
1417 in SImode and DImode */
1418 {4, 4}, /* cost of storing MMX registers
1419 in SImode and DImode */
1420 2, /* cost of moving SSE register */
1421 {4, 4, 3}, /* cost of loading SSE registers
1422 in SImode, DImode and TImode */
1423 {4, 4, 5}, /* cost of storing SSE registers
1424 in SImode, DImode and TImode */
1425 3, /* MMX or SSE register to integer */
1426 /* On K8:
1427 MOVD reg64, xmmreg Double FSTORE 4
1428 MOVD reg32, xmmreg Double FSTORE 4
1429 On AMDFAM10:
1430 MOVD reg64, xmmreg Double FADD 3
1431 1/1 1/1
1432 MOVD reg32, xmmreg Double FADD 3
1433 1/1 1/1 */
1434 32, /* size of l1 cache. */
1435 2048, /* size of l2 cache. */
1436 64, /* size of prefetch block */
1437 100, /* number of parallel prefetches */
1438 2, /* Branch cost */
1439 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1440 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1441 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1442 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1443 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1444 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1445 btver2_memcpy,
1446 btver2_memset,
1447 4, /* scalar_stmt_cost. */
1448 2, /* scalar load_cost. */
1449 2, /* scalar_store_cost. */
1450 6, /* vec_stmt_cost. */
1451 0, /* vec_to_scalar_cost. */
1452 2, /* scalar_to_vec_cost. */
1453 2, /* vec_align_load_cost. */
1454 2, /* vec_unalign_load_cost. */
1455 2, /* vec_store_cost. */
1456 2, /* cond_taken_branch_cost. */
1457 1, /* cond_not_taken_branch_cost. */
1460 static stringop_algs pentium4_memcpy[2] = {
1461 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1462 DUMMY_STRINGOP_ALGS};
1463 static stringop_algs pentium4_memset[2] = {
1464 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1465 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1466 DUMMY_STRINGOP_ALGS};
1468 static const
1469 struct processor_costs pentium4_cost = {
1470 COSTS_N_INSNS (1), /* cost of an add instruction */
1471 COSTS_N_INSNS (3), /* cost of a lea instruction */
1472 COSTS_N_INSNS (4), /* variable shift costs */
1473 COSTS_N_INSNS (4), /* constant shift costs */
1474 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1475 COSTS_N_INSNS (15), /* HI */
1476 COSTS_N_INSNS (15), /* SI */
1477 COSTS_N_INSNS (15), /* DI */
1478 COSTS_N_INSNS (15)}, /* other */
1479 0, /* cost of multiply per each bit set */
1480 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1481 COSTS_N_INSNS (56), /* HI */
1482 COSTS_N_INSNS (56), /* SI */
1483 COSTS_N_INSNS (56), /* DI */
1484 COSTS_N_INSNS (56)}, /* other */
1485 COSTS_N_INSNS (1), /* cost of movsx */
1486 COSTS_N_INSNS (1), /* cost of movzx */
1487 16, /* "large" insn */
1488 6, /* MOVE_RATIO */
1489 2, /* cost for loading QImode using movzbl */
1490 {4, 5, 4}, /* cost of loading integer registers
1491 in QImode, HImode and SImode.
1492 Relative to reg-reg move (2). */
1493 {2, 3, 2}, /* cost of storing integer registers */
1494 2, /* cost of reg,reg fld/fst */
1495 {2, 2, 6}, /* cost of loading fp registers
1496 in SFmode, DFmode and XFmode */
1497 {4, 4, 6}, /* cost of storing fp registers
1498 in SFmode, DFmode and XFmode */
1499 2, /* cost of moving MMX register */
1500 {2, 2}, /* cost of loading MMX registers
1501 in SImode and DImode */
1502 {2, 2}, /* cost of storing MMX registers
1503 in SImode and DImode */
1504 12, /* cost of moving SSE register */
1505 {12, 12, 12}, /* cost of loading SSE registers
1506 in SImode, DImode and TImode */
1507 {2, 2, 8}, /* cost of storing SSE registers
1508 in SImode, DImode and TImode */
1509 10, /* MMX or SSE register to integer */
1510 8, /* size of l1 cache. */
1511 256, /* size of l2 cache. */
1512 64, /* size of prefetch block */
1513 6, /* number of parallel prefetches */
1514 2, /* Branch cost */
1515 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1516 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1517 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1518 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1519 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1520 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1521 pentium4_memcpy,
1522 pentium4_memset,
1523 1, /* scalar_stmt_cost. */
1524 1, /* scalar load_cost. */
1525 1, /* scalar_store_cost. */
1526 1, /* vec_stmt_cost. */
1527 1, /* vec_to_scalar_cost. */
1528 1, /* scalar_to_vec_cost. */
1529 1, /* vec_align_load_cost. */
1530 2, /* vec_unalign_load_cost. */
1531 1, /* vec_store_cost. */
1532 3, /* cond_taken_branch_cost. */
1533 1, /* cond_not_taken_branch_cost. */
1536 static stringop_algs nocona_memcpy[2] = {
1537 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1538 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1539 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1541 static stringop_algs nocona_memset[2] = {
1542 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1543 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1544 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1545 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1547 static const
1548 struct processor_costs nocona_cost = {
1549 COSTS_N_INSNS (1), /* cost of an add instruction */
1550 COSTS_N_INSNS (1), /* cost of a lea instruction */
1551 COSTS_N_INSNS (1), /* variable shift costs */
1552 COSTS_N_INSNS (1), /* constant shift costs */
1553 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1554 COSTS_N_INSNS (10), /* HI */
1555 COSTS_N_INSNS (10), /* SI */
1556 COSTS_N_INSNS (10), /* DI */
1557 COSTS_N_INSNS (10)}, /* other */
1558 0, /* cost of multiply per each bit set */
1559 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1560 COSTS_N_INSNS (66), /* HI */
1561 COSTS_N_INSNS (66), /* SI */
1562 COSTS_N_INSNS (66), /* DI */
1563 COSTS_N_INSNS (66)}, /* other */
1564 COSTS_N_INSNS (1), /* cost of movsx */
1565 COSTS_N_INSNS (1), /* cost of movzx */
1566 16, /* "large" insn */
1567 17, /* MOVE_RATIO */
1568 4, /* cost for loading QImode using movzbl */
1569 {4, 4, 4}, /* cost of loading integer registers
1570 in QImode, HImode and SImode.
1571 Relative to reg-reg move (2). */
1572 {4, 4, 4}, /* cost of storing integer registers */
1573 3, /* cost of reg,reg fld/fst */
1574 {12, 12, 12}, /* cost of loading fp registers
1575 in SFmode, DFmode and XFmode */
1576 {4, 4, 4}, /* cost of storing fp registers
1577 in SFmode, DFmode and XFmode */
1578 6, /* cost of moving MMX register */
1579 {12, 12}, /* cost of loading MMX registers
1580 in SImode and DImode */
1581 {12, 12}, /* cost of storing MMX registers
1582 in SImode and DImode */
1583 6, /* cost of moving SSE register */
1584 {12, 12, 12}, /* cost of loading SSE registers
1585 in SImode, DImode and TImode */
1586 {12, 12, 12}, /* cost of storing SSE registers
1587 in SImode, DImode and TImode */
1588 8, /* MMX or SSE register to integer */
1589 8, /* size of l1 cache. */
1590 1024, /* size of l2 cache. */
1591 64, /* size of prefetch block */
1592 8, /* number of parallel prefetches */
1593 1, /* Branch cost */
1594 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1595 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1596 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1597 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1598 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1599 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1600 nocona_memcpy,
1601 nocona_memset,
1602 1, /* scalar_stmt_cost. */
1603 1, /* scalar load_cost. */
1604 1, /* scalar_store_cost. */
1605 1, /* vec_stmt_cost. */
1606 1, /* vec_to_scalar_cost. */
1607 1, /* scalar_to_vec_cost. */
1608 1, /* vec_align_load_cost. */
1609 2, /* vec_unalign_load_cost. */
1610 1, /* vec_store_cost. */
1611 3, /* cond_taken_branch_cost. */
1612 1, /* cond_not_taken_branch_cost. */
1615 static stringop_algs atom_memcpy[2] = {
1616 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1617 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1618 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1619 static stringop_algs atom_memset[2] = {
1620 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1621 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1622 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1623 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1624 static const
1625 struct processor_costs atom_cost = {
1626 COSTS_N_INSNS (1), /* cost of an add instruction */
1627 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1628 COSTS_N_INSNS (1), /* variable shift costs */
1629 COSTS_N_INSNS (1), /* constant shift costs */
1630 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1631 COSTS_N_INSNS (4), /* HI */
1632 COSTS_N_INSNS (3), /* SI */
1633 COSTS_N_INSNS (4), /* DI */
1634 COSTS_N_INSNS (2)}, /* other */
1635 0, /* cost of multiply per each bit set */
1636 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1637 COSTS_N_INSNS (26), /* HI */
1638 COSTS_N_INSNS (42), /* SI */
1639 COSTS_N_INSNS (74), /* DI */
1640 COSTS_N_INSNS (74)}, /* other */
1641 COSTS_N_INSNS (1), /* cost of movsx */
1642 COSTS_N_INSNS (1), /* cost of movzx */
1643 8, /* "large" insn */
1644 17, /* MOVE_RATIO */
1645 4, /* cost for loading QImode using movzbl */
1646 {4, 4, 4}, /* cost of loading integer registers
1647 in QImode, HImode and SImode.
1648 Relative to reg-reg move (2). */
1649 {4, 4, 4}, /* cost of storing integer registers */
1650 4, /* cost of reg,reg fld/fst */
1651 {12, 12, 12}, /* cost of loading fp registers
1652 in SFmode, DFmode and XFmode */
1653 {6, 6, 8}, /* cost of storing fp registers
1654 in SFmode, DFmode and XFmode */
1655 2, /* cost of moving MMX register */
1656 {8, 8}, /* cost of loading MMX registers
1657 in SImode and DImode */
1658 {8, 8}, /* cost of storing MMX registers
1659 in SImode and DImode */
1660 2, /* cost of moving SSE register */
1661 {8, 8, 8}, /* cost of loading SSE registers
1662 in SImode, DImode and TImode */
1663 {8, 8, 8}, /* cost of storing SSE registers
1664 in SImode, DImode and TImode */
1665 5, /* MMX or SSE register to integer */
1666 32, /* size of l1 cache. */
1667 256, /* size of l2 cache. */
1668 64, /* size of prefetch block */
1669 6, /* number of parallel prefetches */
1670 3, /* Branch cost */
1671 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1672 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1673 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1674 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1675 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1676 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1677 atom_memcpy,
1678 atom_memset,
1679 1, /* scalar_stmt_cost. */
1680 1, /* scalar load_cost. */
1681 1, /* scalar_store_cost. */
1682 1, /* vec_stmt_cost. */
1683 1, /* vec_to_scalar_cost. */
1684 1, /* scalar_to_vec_cost. */
1685 1, /* vec_align_load_cost. */
1686 2, /* vec_unalign_load_cost. */
1687 1, /* vec_store_cost. */
1688 3, /* cond_taken_branch_cost. */
1689 1, /* cond_not_taken_branch_cost. */
1692 static stringop_algs slm_memcpy[2] = {
1693 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1694 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1695 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1696 static stringop_algs slm_memset[2] = {
1697 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1698 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1699 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1700 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1701 static const
1702 struct processor_costs slm_cost = {
1703 COSTS_N_INSNS (1), /* cost of an add instruction */
1704 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1705 COSTS_N_INSNS (1), /* variable shift costs */
1706 COSTS_N_INSNS (1), /* constant shift costs */
1707 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1708 COSTS_N_INSNS (3), /* HI */
1709 COSTS_N_INSNS (3), /* SI */
1710 COSTS_N_INSNS (4), /* DI */
1711 COSTS_N_INSNS (2)}, /* other */
1712 0, /* cost of multiply per each bit set */
1713 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1714 COSTS_N_INSNS (26), /* HI */
1715 COSTS_N_INSNS (42), /* SI */
1716 COSTS_N_INSNS (74), /* DI */
1717 COSTS_N_INSNS (74)}, /* other */
1718 COSTS_N_INSNS (1), /* cost of movsx */
1719 COSTS_N_INSNS (1), /* cost of movzx */
1720 8, /* "large" insn */
1721 17, /* MOVE_RATIO */
1722 4, /* cost for loading QImode using movzbl */
1723 {4, 4, 4}, /* cost of loading integer registers
1724 in QImode, HImode and SImode.
1725 Relative to reg-reg move (2). */
1726 {4, 4, 4}, /* cost of storing integer registers */
1727 4, /* cost of reg,reg fld/fst */
1728 {12, 12, 12}, /* cost of loading fp registers
1729 in SFmode, DFmode and XFmode */
1730 {6, 6, 8}, /* cost of storing fp registers
1731 in SFmode, DFmode and XFmode */
1732 2, /* cost of moving MMX register */
1733 {8, 8}, /* cost of loading MMX registers
1734 in SImode and DImode */
1735 {8, 8}, /* cost of storing MMX registers
1736 in SImode and DImode */
1737 2, /* cost of moving SSE register */
1738 {8, 8, 8}, /* cost of loading SSE registers
1739 in SImode, DImode and TImode */
1740 {8, 8, 8}, /* cost of storing SSE registers
1741 in SImode, DImode and TImode */
1742 5, /* MMX or SSE register to integer */
1743 32, /* size of l1 cache. */
1744 256, /* size of l2 cache. */
1745 64, /* size of prefetch block */
1746 6, /* number of parallel prefetches */
1747 3, /* Branch cost */
1748 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1749 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1750 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1751 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1752 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1753 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1754 slm_memcpy,
1755 slm_memset,
1756 1, /* scalar_stmt_cost. */
1757 1, /* scalar load_cost. */
1758 1, /* scalar_store_cost. */
1759 1, /* vec_stmt_cost. */
1760 4, /* vec_to_scalar_cost. */
1761 1, /* scalar_to_vec_cost. */
1762 1, /* vec_align_load_cost. */
1763 2, /* vec_unalign_load_cost. */
1764 1, /* vec_store_cost. */
1765 3, /* cond_taken_branch_cost. */
1766 1, /* cond_not_taken_branch_cost. */
1769 static stringop_algs intel_memcpy[2] = {
1770 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1771 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1772 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1773 static stringop_algs intel_memset[2] = {
1774 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1775 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1776 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1777 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1778 static const
1779 struct processor_costs intel_cost = {
1780 COSTS_N_INSNS (1), /* cost of an add instruction */
1781 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1782 COSTS_N_INSNS (1), /* variable shift costs */
1783 COSTS_N_INSNS (1), /* constant shift costs */
1784 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1785 COSTS_N_INSNS (3), /* HI */
1786 COSTS_N_INSNS (3), /* SI */
1787 COSTS_N_INSNS (4), /* DI */
1788 COSTS_N_INSNS (2)}, /* other */
1789 0, /* cost of multiply per each bit set */
1790 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1791 COSTS_N_INSNS (26), /* HI */
1792 COSTS_N_INSNS (42), /* SI */
1793 COSTS_N_INSNS (74), /* DI */
1794 COSTS_N_INSNS (74)}, /* other */
1795 COSTS_N_INSNS (1), /* cost of movsx */
1796 COSTS_N_INSNS (1), /* cost of movzx */
1797 8, /* "large" insn */
1798 17, /* MOVE_RATIO */
1799 4, /* cost for loading QImode using movzbl */
1800 {4, 4, 4}, /* cost of loading integer registers
1801 in QImode, HImode and SImode.
1802 Relative to reg-reg move (2). */
1803 {4, 4, 4}, /* cost of storing integer registers */
1804 4, /* cost of reg,reg fld/fst */
1805 {12, 12, 12}, /* cost of loading fp registers
1806 in SFmode, DFmode and XFmode */
1807 {6, 6, 8}, /* cost of storing fp registers
1808 in SFmode, DFmode and XFmode */
1809 2, /* cost of moving MMX register */
1810 {8, 8}, /* cost of loading MMX registers
1811 in SImode and DImode */
1812 {8, 8}, /* cost of storing MMX registers
1813 in SImode and DImode */
1814 2, /* cost of moving SSE register */
1815 {8, 8, 8}, /* cost of loading SSE registers
1816 in SImode, DImode and TImode */
1817 {8, 8, 8}, /* cost of storing SSE registers
1818 in SImode, DImode and TImode */
1819 5, /* MMX or SSE register to integer */
1820 32, /* size of l1 cache. */
1821 256, /* size of l2 cache. */
1822 64, /* size of prefetch block */
1823 6, /* number of parallel prefetches */
1824 3, /* Branch cost */
1825 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1826 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1827 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1828 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1829 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1830 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1831 intel_memcpy,
1832 intel_memset,
1833 1, /* scalar_stmt_cost. */
1834 1, /* scalar load_cost. */
1835 1, /* scalar_store_cost. */
1836 1, /* vec_stmt_cost. */
1837 4, /* vec_to_scalar_cost. */
1838 1, /* scalar_to_vec_cost. */
1839 1, /* vec_align_load_cost. */
1840 2, /* vec_unalign_load_cost. */
1841 1, /* vec_store_cost. */
1842 3, /* cond_taken_branch_cost. */
1843 1, /* cond_not_taken_branch_cost. */
1846 /* Generic should produce code tuned for Core-i7 (and newer chips)
1847 and btver1 (and newer chips). */
1849 static stringop_algs generic_memcpy[2] = {
1850 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1851 {-1, libcall, false}}},
1852 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1853 {-1, libcall, false}}}};
1854 static stringop_algs generic_memset[2] = {
1855 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1856 {-1, libcall, false}}},
1857 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1858 {-1, libcall, false}}}};
1859 static const
1860 struct processor_costs generic_cost = {
1861 COSTS_N_INSNS (1), /* cost of an add instruction */
1862 /* On all chips taken into consideration lea is 2 cycles and more. With
1863 this cost however our current implementation of synth_mult results in
1864 use of unnecessary temporary registers causing regression on several
1865 SPECfp benchmarks. */
1866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1867 COSTS_N_INSNS (1), /* variable shift costs */
1868 COSTS_N_INSNS (1), /* constant shift costs */
1869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1870 COSTS_N_INSNS (4), /* HI */
1871 COSTS_N_INSNS (3), /* SI */
1872 COSTS_N_INSNS (4), /* DI */
1873 COSTS_N_INSNS (2)}, /* other */
1874 0, /* cost of multiply per each bit set */
1875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1876 COSTS_N_INSNS (26), /* HI */
1877 COSTS_N_INSNS (42), /* SI */
1878 COSTS_N_INSNS (74), /* DI */
1879 COSTS_N_INSNS (74)}, /* other */
1880 COSTS_N_INSNS (1), /* cost of movsx */
1881 COSTS_N_INSNS (1), /* cost of movzx */
1882 8, /* "large" insn */
1883 17, /* MOVE_RATIO */
1884 4, /* cost for loading QImode using movzbl */
1885 {4, 4, 4}, /* cost of loading integer registers
1886 in QImode, HImode and SImode.
1887 Relative to reg-reg move (2). */
1888 {4, 4, 4}, /* cost of storing integer registers */
1889 4, /* cost of reg,reg fld/fst */
1890 {12, 12, 12}, /* cost of loading fp registers
1891 in SFmode, DFmode and XFmode */
1892 {6, 6, 8}, /* cost of storing fp registers
1893 in SFmode, DFmode and XFmode */
1894 2, /* cost of moving MMX register */
1895 {8, 8}, /* cost of loading MMX registers
1896 in SImode and DImode */
1897 {8, 8}, /* cost of storing MMX registers
1898 in SImode and DImode */
1899 2, /* cost of moving SSE register */
1900 {8, 8, 8}, /* cost of loading SSE registers
1901 in SImode, DImode and TImode */
1902 {8, 8, 8}, /* cost of storing SSE registers
1903 in SImode, DImode and TImode */
1904 5, /* MMX or SSE register to integer */
1905 32, /* size of l1 cache. */
1906 512, /* size of l2 cache. */
1907 64, /* size of prefetch block */
1908 6, /* number of parallel prefetches */
1909 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1910 value is increased to perhaps more appropriate value of 5. */
1911 3, /* Branch cost */
1912 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1913 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1914 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1915 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1916 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1917 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1918 generic_memcpy,
1919 generic_memset,
1920 1, /* scalar_stmt_cost. */
1921 1, /* scalar load_cost. */
1922 1, /* scalar_store_cost. */
1923 1, /* vec_stmt_cost. */
1924 1, /* vec_to_scalar_cost. */
1925 1, /* scalar_to_vec_cost. */
1926 1, /* vec_align_load_cost. */
1927 2, /* vec_unalign_load_cost. */
1928 1, /* vec_store_cost. */
1929 3, /* cond_taken_branch_cost. */
1930 1, /* cond_not_taken_branch_cost. */
1933 /* core_cost should produce code tuned for Core familly of CPUs. */
1934 static stringop_algs core_memcpy[2] = {
1935 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1936 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1937 {-1, libcall, false}}}};
1938 static stringop_algs core_memset[2] = {
1939 {libcall, {{6, loop_1_byte, true},
1940 {24, loop, true},
1941 {8192, rep_prefix_4_byte, true},
1942 {-1, libcall, false}}},
1943 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1944 {-1, libcall, false}}}};
1946 static const
1947 struct processor_costs core_cost = {
1948 COSTS_N_INSNS (1), /* cost of an add instruction */
1949 /* On all chips taken into consideration lea is 2 cycles and more. With
1950 this cost however our current implementation of synth_mult results in
1951 use of unnecessary temporary registers causing regression on several
1952 SPECfp benchmarks. */
1953 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1954 COSTS_N_INSNS (1), /* variable shift costs */
1955 COSTS_N_INSNS (1), /* constant shift costs */
1956 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1957 COSTS_N_INSNS (4), /* HI */
1958 COSTS_N_INSNS (3), /* SI */
1959 COSTS_N_INSNS (4), /* DI */
1960 COSTS_N_INSNS (2)}, /* other */
1961 0, /* cost of multiply per each bit set */
1962 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1963 COSTS_N_INSNS (26), /* HI */
1964 COSTS_N_INSNS (42), /* SI */
1965 COSTS_N_INSNS (74), /* DI */
1966 COSTS_N_INSNS (74)}, /* other */
1967 COSTS_N_INSNS (1), /* cost of movsx */
1968 COSTS_N_INSNS (1), /* cost of movzx */
1969 8, /* "large" insn */
1970 17, /* MOVE_RATIO */
1971 4, /* cost for loading QImode using movzbl */
1972 {4, 4, 4}, /* cost of loading integer registers
1973 in QImode, HImode and SImode.
1974 Relative to reg-reg move (2). */
1975 {4, 4, 4}, /* cost of storing integer registers */
1976 4, /* cost of reg,reg fld/fst */
1977 {12, 12, 12}, /* cost of loading fp registers
1978 in SFmode, DFmode and XFmode */
1979 {6, 6, 8}, /* cost of storing fp registers
1980 in SFmode, DFmode and XFmode */
1981 2, /* cost of moving MMX register */
1982 {8, 8}, /* cost of loading MMX registers
1983 in SImode and DImode */
1984 {8, 8}, /* cost of storing MMX registers
1985 in SImode and DImode */
1986 2, /* cost of moving SSE register */
1987 {8, 8, 8}, /* cost of loading SSE registers
1988 in SImode, DImode and TImode */
1989 {8, 8, 8}, /* cost of storing SSE registers
1990 in SImode, DImode and TImode */
1991 5, /* MMX or SSE register to integer */
1992 64, /* size of l1 cache. */
1993 512, /* size of l2 cache. */
1994 64, /* size of prefetch block */
1995 6, /* number of parallel prefetches */
1996 /* FIXME perhaps more appropriate value is 5. */
1997 3, /* Branch cost */
1998 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1999 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2000 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2001 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2002 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2003 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2004 core_memcpy,
2005 core_memset,
2006 1, /* scalar_stmt_cost. */
2007 1, /* scalar load_cost. */
2008 1, /* scalar_store_cost. */
2009 1, /* vec_stmt_cost. */
2010 1, /* vec_to_scalar_cost. */
2011 1, /* scalar_to_vec_cost. */
2012 1, /* vec_align_load_cost. */
2013 2, /* vec_unalign_load_cost. */
2014 1, /* vec_store_cost. */
2015 3, /* cond_taken_branch_cost. */
2016 1, /* cond_not_taken_branch_cost. */
2020 /* Set by -mtune. */
2021 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2023 /* Set by -mtune or -Os. */
2024 const struct processor_costs *ix86_cost = &pentium_cost;
2026 /* Processor feature/optimization bitmasks. */
2027 #define m_386 (1<<PROCESSOR_I386)
2028 #define m_486 (1<<PROCESSOR_I486)
2029 #define m_PENT (1<<PROCESSOR_PENTIUM)
2030 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2031 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2032 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2033 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2034 #define m_CORE2 (1<<PROCESSOR_CORE2)
2035 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2036 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2037 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2038 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2039 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2040 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2041 #define m_KNL (1<<PROCESSOR_KNL)
2042 #define m_INTEL (1<<PROCESSOR_INTEL)
2044 #define m_GEODE (1<<PROCESSOR_GEODE)
2045 #define m_K6 (1<<PROCESSOR_K6)
2046 #define m_K6_GEODE (m_K6 | m_GEODE)
2047 #define m_K8 (1<<PROCESSOR_K8)
2048 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2049 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2050 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2051 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2052 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2053 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2054 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2055 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2056 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2057 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2058 #define m_BTVER (m_BTVER1 | m_BTVER2)
2059 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2061 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2063 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2064 #undef DEF_TUNE
2065 #define DEF_TUNE(tune, name, selector) name,
2066 #include "x86-tune.def"
2067 #undef DEF_TUNE
2070 /* Feature tests against the various tunings. */
2071 unsigned char ix86_tune_features[X86_TUNE_LAST];
2073 /* Feature tests against the various tunings used to create ix86_tune_features
2074 based on the processor mask. */
2075 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2076 #undef DEF_TUNE
2077 #define DEF_TUNE(tune, name, selector) selector,
2078 #include "x86-tune.def"
2079 #undef DEF_TUNE
2082 /* Feature tests against the various architecture variations. */
2083 unsigned char ix86_arch_features[X86_ARCH_LAST];
2085 /* Feature tests against the various architecture variations, used to create
2086 ix86_arch_features based on the processor mask. */
2087 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2088 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2089 ~(m_386 | m_486 | m_PENT | m_K6),
2091 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2092 ~m_386,
2094 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2095 ~(m_386 | m_486),
2097 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2098 ~m_386,
2100 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2101 ~m_386,
2104 /* In case the average insn count for single function invocation is
2105 lower than this constant, emit fast (but longer) prologue and
2106 epilogue code. */
2107 #define FAST_PROLOGUE_INSN_COUNT 20
2109 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2110 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2111 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2112 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2114 /* Array of the smallest class containing reg number REGNO, indexed by
2115 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2117 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2119 /* ax, dx, cx, bx */
2120 AREG, DREG, CREG, BREG,
2121 /* si, di, bp, sp */
2122 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2123 /* FP registers */
2124 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2125 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2126 /* arg pointer */
2127 NON_Q_REGS,
2128 /* flags, fpsr, fpcr, frame */
2129 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2130 /* SSE registers */
2131 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2132 SSE_REGS, SSE_REGS,
2133 /* MMX registers */
2134 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2135 MMX_REGS, MMX_REGS,
2136 /* REX registers */
2137 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2138 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2139 /* SSE REX registers */
2140 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2141 SSE_REGS, SSE_REGS,
2142 /* AVX-512 SSE registers */
2143 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2144 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2145 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2146 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2147 /* Mask registers. */
2148 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2149 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2150 /* MPX bound registers */
2151 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2154 /* The "default" register map used in 32bit mode. */
2156 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2158 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2159 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2160 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2161 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2162 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2163 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2164 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2165 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2166 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2167 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2168 101, 102, 103, 104, /* bound registers */
2171 /* The "default" register map used in 64bit mode. */
2173 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2175 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2176 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2177 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2178 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2179 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2180 8,9,10,11,12,13,14,15, /* extended integer registers */
2181 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2182 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2183 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2184 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2185 126, 127, 128, 129, /* bound registers */
2188 /* Define the register numbers to be used in Dwarf debugging information.
2189 The SVR4 reference port C compiler uses the following register numbers
2190 in its Dwarf output code:
2191 0 for %eax (gcc regno = 0)
2192 1 for %ecx (gcc regno = 2)
2193 2 for %edx (gcc regno = 1)
2194 3 for %ebx (gcc regno = 3)
2195 4 for %esp (gcc regno = 7)
2196 5 for %ebp (gcc regno = 6)
2197 6 for %esi (gcc regno = 4)
2198 7 for %edi (gcc regno = 5)
2199 The following three DWARF register numbers are never generated by
2200 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2201 believes these numbers have these meanings.
2202 8 for %eip (no gcc equivalent)
2203 9 for %eflags (gcc regno = 17)
2204 10 for %trapno (no gcc equivalent)
2205 It is not at all clear how we should number the FP stack registers
2206 for the x86 architecture. If the version of SDB on x86/svr4 were
2207 a bit less brain dead with respect to floating-point then we would
2208 have a precedent to follow with respect to DWARF register numbers
2209 for x86 FP registers, but the SDB on x86/svr4 is so completely
2210 broken with respect to FP registers that it is hardly worth thinking
2211 of it as something to strive for compatibility with.
2212 The version of x86/svr4 SDB I have at the moment does (partially)
2213 seem to believe that DWARF register number 11 is associated with
2214 the x86 register %st(0), but that's about all. Higher DWARF
2215 register numbers don't seem to be associated with anything in
2216 particular, and even for DWARF regno 11, SDB only seems to under-
2217 stand that it should say that a variable lives in %st(0) (when
2218 asked via an `=' command) if we said it was in DWARF regno 11,
2219 but SDB still prints garbage when asked for the value of the
2220 variable in question (via a `/' command).
2221 (Also note that the labels SDB prints for various FP stack regs
2222 when doing an `x' command are all wrong.)
2223 Note that these problems generally don't affect the native SVR4
2224 C compiler because it doesn't allow the use of -O with -g and
2225 because when it is *not* optimizing, it allocates a memory
2226 location for each floating-point variable, and the memory
2227 location is what gets described in the DWARF AT_location
2228 attribute for the variable in question.
2229 Regardless of the severe mental illness of the x86/svr4 SDB, we
2230 do something sensible here and we use the following DWARF
2231 register numbers. Note that these are all stack-top-relative
2232 numbers.
2233 11 for %st(0) (gcc regno = 8)
2234 12 for %st(1) (gcc regno = 9)
2235 13 for %st(2) (gcc regno = 10)
2236 14 for %st(3) (gcc regno = 11)
2237 15 for %st(4) (gcc regno = 12)
2238 16 for %st(5) (gcc regno = 13)
2239 17 for %st(6) (gcc regno = 14)
2240 18 for %st(7) (gcc regno = 15)
2242 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2244 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2245 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2246 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2247 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2248 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2249 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2250 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2251 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2252 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2253 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2254 101, 102, 103, 104, /* bound registers */
2257 /* Define parameter passing and return registers. */
2259 static int const x86_64_int_parameter_registers[6] =
2261 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2264 static int const x86_64_ms_abi_int_parameter_registers[4] =
2266 CX_REG, DX_REG, R8_REG, R9_REG
2269 static int const x86_64_int_return_registers[4] =
2271 AX_REG, DX_REG, DI_REG, SI_REG
2274 /* Additional registers that are clobbered by SYSV calls. */
2276 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2278 SI_REG, DI_REG,
2279 XMM6_REG, XMM7_REG,
2280 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2281 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2284 /* Define the structure for the machine field in struct function. */
2286 struct GTY(()) stack_local_entry {
2287 unsigned short mode;
2288 unsigned short n;
2289 rtx rtl;
2290 struct stack_local_entry *next;
2293 /* Structure describing stack frame layout.
2294 Stack grows downward:
2296 [arguments]
2297 <- ARG_POINTER
2298 saved pc
2300 saved static chain if ix86_static_chain_on_stack
2302 saved frame pointer if frame_pointer_needed
2303 <- HARD_FRAME_POINTER
2304 [saved regs]
2305 <- regs_save_offset
2306 [padding0]
2308 [saved SSE regs]
2309 <- sse_regs_save_offset
2310 [padding1] |
2311 | <- FRAME_POINTER
2312 [va_arg registers] |
2314 [frame] |
2316 [padding2] | = to_allocate
2317 <- STACK_POINTER
2319 struct ix86_frame
2321 int nsseregs;
2322 int nregs;
2323 int va_arg_size;
2324 int red_zone_size;
2325 int outgoing_arguments_size;
2327 /* The offsets relative to ARG_POINTER. */
2328 HOST_WIDE_INT frame_pointer_offset;
2329 HOST_WIDE_INT hard_frame_pointer_offset;
2330 HOST_WIDE_INT stack_pointer_offset;
2331 HOST_WIDE_INT hfp_save_offset;
2332 HOST_WIDE_INT reg_save_offset;
2333 HOST_WIDE_INT sse_reg_save_offset;
2335 /* When save_regs_using_mov is set, emit prologue using
2336 move instead of push instructions. */
2337 bool save_regs_using_mov;
2340 /* Which cpu are we scheduling for. */
2341 enum attr_cpu ix86_schedule;
2343 /* Which cpu are we optimizing for. */
2344 enum processor_type ix86_tune;
2346 /* Which instruction set architecture to use. */
2347 enum processor_type ix86_arch;
2349 /* True if processor has SSE prefetch instruction. */
2350 unsigned char x86_prefetch_sse;
2352 /* -mstackrealign option */
2353 static const char ix86_force_align_arg_pointer_string[]
2354 = "force_align_arg_pointer";
2356 static rtx (*ix86_gen_leave) (void);
2357 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2358 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2359 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2360 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2361 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2362 static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx);
2363 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2364 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2365 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2366 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2367 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2368 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2370 /* Preferred alignment for stack boundary in bits. */
2371 unsigned int ix86_preferred_stack_boundary;
2373 /* Alignment for incoming stack boundary in bits specified at
2374 command line. */
2375 static unsigned int ix86_user_incoming_stack_boundary;
2377 /* Default alignment for incoming stack boundary in bits. */
2378 static unsigned int ix86_default_incoming_stack_boundary;
2380 /* Alignment for incoming stack boundary in bits. */
2381 unsigned int ix86_incoming_stack_boundary;
2383 /* Calling abi specific va_list type nodes. */
2384 static GTY(()) tree sysv_va_list_type_node;
2385 static GTY(()) tree ms_va_list_type_node;
2387 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2388 char internal_label_prefix[16];
2389 int internal_label_prefix_len;
2391 /* Fence to use after loop using movnt. */
2392 tree x86_mfence;
2394 /* Register class used for passing given 64bit part of the argument.
2395 These represent classes as documented by the PS ABI, with the exception
2396 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2397 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2399 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2400 whenever possible (upper half does contain padding). */
2401 enum x86_64_reg_class
2403 X86_64_NO_CLASS,
2404 X86_64_INTEGER_CLASS,
2405 X86_64_INTEGERSI_CLASS,
2406 X86_64_SSE_CLASS,
2407 X86_64_SSESF_CLASS,
2408 X86_64_SSEDF_CLASS,
2409 X86_64_SSEUP_CLASS,
2410 X86_64_X87_CLASS,
2411 X86_64_X87UP_CLASS,
2412 X86_64_COMPLEX_X87_CLASS,
2413 X86_64_MEMORY_CLASS
2416 #define MAX_CLASSES 8
2418 /* Table of constants used by fldpi, fldln2, etc.... */
2419 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2420 static bool ext_80387_constants_init = 0;
2423 static struct machine_function * ix86_init_machine_status (void);
2424 static rtx ix86_function_value (const_tree, const_tree, bool);
2425 static bool ix86_function_value_regno_p (const unsigned int);
2426 static unsigned int ix86_function_arg_boundary (machine_mode,
2427 const_tree);
2428 static rtx ix86_static_chain (const_tree, bool);
2429 static int ix86_function_regparm (const_tree, const_tree);
2430 static void ix86_compute_frame_layout (struct ix86_frame *);
2431 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2432 rtx, rtx, int);
2433 static void ix86_add_new_builtins (HOST_WIDE_INT);
2434 static tree ix86_canonical_va_list_type (tree);
2435 static void predict_jump (int);
2436 static unsigned int split_stack_prologue_scratch_regno (void);
2437 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2439 enum ix86_function_specific_strings
2441 IX86_FUNCTION_SPECIFIC_ARCH,
2442 IX86_FUNCTION_SPECIFIC_TUNE,
2443 IX86_FUNCTION_SPECIFIC_MAX
2446 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2447 const char *, enum fpmath_unit, bool);
2448 static void ix86_function_specific_save (struct cl_target_option *,
2449 struct gcc_options *opts);
2450 static void ix86_function_specific_restore (struct gcc_options *opts,
2451 struct cl_target_option *);
2452 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
2453 static void ix86_function_specific_print (FILE *, int,
2454 struct cl_target_option *);
2455 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2456 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2457 struct gcc_options *,
2458 struct gcc_options *,
2459 struct gcc_options *);
2460 static bool ix86_can_inline_p (tree, tree);
2461 static void ix86_set_current_function (tree);
2462 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2464 static enum calling_abi ix86_function_abi (const_tree);
2467 #ifndef SUBTARGET32_DEFAULT_CPU
2468 #define SUBTARGET32_DEFAULT_CPU "i386"
2469 #endif
2471 /* Whether -mtune= or -march= were specified */
2472 static int ix86_tune_defaulted;
2473 static int ix86_arch_specified;
2475 /* Vectorization library interface and handlers. */
2476 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2478 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2479 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2481 /* Processor target table, indexed by processor number */
2482 struct ptt
2484 const char *const name; /* processor name */
2485 const struct processor_costs *cost; /* Processor costs */
2486 const int align_loop; /* Default alignments. */
2487 const int align_loop_max_skip;
2488 const int align_jump;
2489 const int align_jump_max_skip;
2490 const int align_func;
2493 /* This table must be in sync with enum processor_type in i386.h. */
2494 static const struct ptt processor_target_table[PROCESSOR_max] =
2496 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2497 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2498 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2499 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2500 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2501 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2502 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2503 {"core2", &core_cost, 16, 10, 16, 10, 16},
2504 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2505 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2506 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2507 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2508 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2509 {"knl", &slm_cost, 16, 15, 16, 7, 16},
2510 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2511 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2512 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2513 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2514 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2515 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2516 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2517 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2518 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2519 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2520 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2521 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2524 static unsigned int
2525 rest_of_handle_insert_vzeroupper (void)
2527 int i;
2529 /* vzeroupper instructions are inserted immediately after reload to
2530 account for possible spills from 256bit registers. The pass
2531 reuses mode switching infrastructure by re-running mode insertion
2532 pass, so disable entities that have already been processed. */
2533 for (i = 0; i < MAX_386_ENTITIES; i++)
2534 ix86_optimize_mode_switching[i] = 0;
2536 ix86_optimize_mode_switching[AVX_U128] = 1;
2538 /* Call optimize_mode_switching. */
2539 g->get_passes ()->execute_pass_mode_switching ();
2540 return 0;
2543 namespace {
2545 const pass_data pass_data_insert_vzeroupper =
2547 RTL_PASS, /* type */
2548 "vzeroupper", /* name */
2549 OPTGROUP_NONE, /* optinfo_flags */
2550 TV_NONE, /* tv_id */
2551 0, /* properties_required */
2552 0, /* properties_provided */
2553 0, /* properties_destroyed */
2554 0, /* todo_flags_start */
2555 TODO_df_finish, /* todo_flags_finish */
2558 class pass_insert_vzeroupper : public rtl_opt_pass
2560 public:
2561 pass_insert_vzeroupper(gcc::context *ctxt)
2562 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2565 /* opt_pass methods: */
2566 virtual bool gate (function *)
2568 return TARGET_AVX && !TARGET_AVX512F
2569 && TARGET_VZEROUPPER && flag_expensive_optimizations
2570 && !optimize_size;
2573 virtual unsigned int execute (function *)
2575 return rest_of_handle_insert_vzeroupper ();
2578 }; // class pass_insert_vzeroupper
2580 } // anon namespace
2582 rtl_opt_pass *
2583 make_pass_insert_vzeroupper (gcc::context *ctxt)
2585 return new pass_insert_vzeroupper (ctxt);
2588 /* Return true if a red-zone is in use. */
2590 static inline bool
2591 ix86_using_red_zone (void)
2593 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2596 /* Return a string that documents the current -m options. The caller is
2597 responsible for freeing the string. */
2599 static char *
2600 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2601 const char *tune, enum fpmath_unit fpmath,
2602 bool add_nl_p)
2604 struct ix86_target_opts
2606 const char *option; /* option string */
2607 HOST_WIDE_INT mask; /* isa mask options */
2610 /* This table is ordered so that options like -msse4.2 that imply
2611 preceding options while match those first. */
2612 static struct ix86_target_opts isa_opts[] =
2614 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2615 { "-mfma", OPTION_MASK_ISA_FMA },
2616 { "-mxop", OPTION_MASK_ISA_XOP },
2617 { "-mlwp", OPTION_MASK_ISA_LWP },
2618 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2619 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2620 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2621 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2622 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2623 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2624 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2625 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2626 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2627 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2628 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2629 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2630 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2631 { "-msse3", OPTION_MASK_ISA_SSE3 },
2632 { "-msse2", OPTION_MASK_ISA_SSE2 },
2633 { "-msse", OPTION_MASK_ISA_SSE },
2634 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2635 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2636 { "-mmmx", OPTION_MASK_ISA_MMX },
2637 { "-mabm", OPTION_MASK_ISA_ABM },
2638 { "-mbmi", OPTION_MASK_ISA_BMI },
2639 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2640 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2641 { "-mhle", OPTION_MASK_ISA_HLE },
2642 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2643 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2644 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2645 { "-madx", OPTION_MASK_ISA_ADX },
2646 { "-mtbm", OPTION_MASK_ISA_TBM },
2647 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2648 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2649 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2650 { "-maes", OPTION_MASK_ISA_AES },
2651 { "-msha", OPTION_MASK_ISA_SHA },
2652 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2653 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2654 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2655 { "-mf16c", OPTION_MASK_ISA_F16C },
2656 { "-mrtm", OPTION_MASK_ISA_RTM },
2657 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2658 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2659 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2660 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2661 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2662 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2663 { "-mmpx", OPTION_MASK_ISA_MPX },
2664 { "-mclwb", OPTION_MASK_ISA_CLWB },
2665 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
2666 { "-mmwaitx", OPTION_MASK_ISA_MWAITX },
2669 /* Flag options. */
2670 static struct ix86_target_opts flag_opts[] =
2672 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2673 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2674 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2675 { "-m80387", MASK_80387 },
2676 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2677 { "-malign-double", MASK_ALIGN_DOUBLE },
2678 { "-mcld", MASK_CLD },
2679 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2680 { "-mieee-fp", MASK_IEEE_FP },
2681 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2682 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2683 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2684 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2685 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2686 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2687 { "-mno-red-zone", MASK_NO_RED_ZONE },
2688 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2689 { "-mrecip", MASK_RECIP },
2690 { "-mrtd", MASK_RTD },
2691 { "-msseregparm", MASK_SSEREGPARM },
2692 { "-mstack-arg-probe", MASK_STACK_PROBE },
2693 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2694 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2695 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2696 { "-mvzeroupper", MASK_VZEROUPPER },
2697 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2698 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2699 { "-mprefer-avx128", MASK_PREFER_AVX128},
2702 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2704 char isa_other[40];
2705 char target_other[40];
2706 unsigned num = 0;
2707 unsigned i, j;
2708 char *ret;
2709 char *ptr;
2710 size_t len;
2711 size_t line_len;
2712 size_t sep_len;
2713 const char *abi;
2715 memset (opts, '\0', sizeof (opts));
2717 /* Add -march= option. */
2718 if (arch)
2720 opts[num][0] = "-march=";
2721 opts[num++][1] = arch;
2724 /* Add -mtune= option. */
2725 if (tune)
2727 opts[num][0] = "-mtune=";
2728 opts[num++][1] = tune;
2731 /* Add -m32/-m64/-mx32. */
2732 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2734 if ((isa & OPTION_MASK_ABI_64) != 0)
2735 abi = "-m64";
2736 else
2737 abi = "-mx32";
2738 isa &= ~ (OPTION_MASK_ISA_64BIT
2739 | OPTION_MASK_ABI_64
2740 | OPTION_MASK_ABI_X32);
2742 else
2743 abi = "-m32";
2744 opts[num++][0] = abi;
2746 /* Pick out the options in isa options. */
2747 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2749 if ((isa & isa_opts[i].mask) != 0)
2751 opts[num++][0] = isa_opts[i].option;
2752 isa &= ~ isa_opts[i].mask;
2756 if (isa && add_nl_p)
2758 opts[num++][0] = isa_other;
2759 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2760 isa);
2763 /* Add flag options. */
2764 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2766 if ((flags & flag_opts[i].mask) != 0)
2768 opts[num++][0] = flag_opts[i].option;
2769 flags &= ~ flag_opts[i].mask;
2773 if (flags && add_nl_p)
2775 opts[num++][0] = target_other;
2776 sprintf (target_other, "(other flags: %#x)", flags);
2779 /* Add -fpmath= option. */
2780 if (fpmath)
2782 opts[num][0] = "-mfpmath=";
2783 switch ((int) fpmath)
2785 case FPMATH_387:
2786 opts[num++][1] = "387";
2787 break;
2789 case FPMATH_SSE:
2790 opts[num++][1] = "sse";
2791 break;
2793 case FPMATH_387 | FPMATH_SSE:
2794 opts[num++][1] = "sse+387";
2795 break;
2797 default:
2798 gcc_unreachable ();
2802 /* Any options? */
2803 if (num == 0)
2804 return NULL;
2806 gcc_assert (num < ARRAY_SIZE (opts));
2808 /* Size the string. */
2809 len = 0;
2810 sep_len = (add_nl_p) ? 3 : 1;
2811 for (i = 0; i < num; i++)
2813 len += sep_len;
2814 for (j = 0; j < 2; j++)
2815 if (opts[i][j])
2816 len += strlen (opts[i][j]);
2819 /* Build the string. */
2820 ret = ptr = (char *) xmalloc (len);
2821 line_len = 0;
2823 for (i = 0; i < num; i++)
2825 size_t len2[2];
2827 for (j = 0; j < 2; j++)
2828 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2830 if (i != 0)
2832 *ptr++ = ' ';
2833 line_len++;
2835 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2837 *ptr++ = '\\';
2838 *ptr++ = '\n';
2839 line_len = 0;
2843 for (j = 0; j < 2; j++)
2844 if (opts[i][j])
2846 memcpy (ptr, opts[i][j], len2[j]);
2847 ptr += len2[j];
2848 line_len += len2[j];
2852 *ptr = '\0';
2853 gcc_assert (ret + len >= ptr);
2855 return ret;
2858 /* Return true, if profiling code should be emitted before
2859 prologue. Otherwise it returns false.
2860 Note: For x86 with "hotfix" it is sorried. */
2861 static bool
2862 ix86_profile_before_prologue (void)
2864 return flag_fentry != 0;
2867 /* Function that is callable from the debugger to print the current
2868 options. */
2869 void ATTRIBUTE_UNUSED
2870 ix86_debug_options (void)
2872 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2873 ix86_arch_string, ix86_tune_string,
2874 ix86_fpmath, true);
2876 if (opts)
2878 fprintf (stderr, "%s\n\n", opts);
2879 free (opts);
2881 else
2882 fputs ("<no options>\n\n", stderr);
2884 return;
2887 static const char *stringop_alg_names[] = {
2888 #define DEF_ENUM
2889 #define DEF_ALG(alg, name) #name,
2890 #include "stringop.def"
2891 #undef DEF_ENUM
2892 #undef DEF_ALG
2895 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2896 The string is of the following form (or comma separated list of it):
2898 strategy_alg:max_size:[align|noalign]
2900 where the full size range for the strategy is either [0, max_size] or
2901 [min_size, max_size], in which min_size is the max_size + 1 of the
2902 preceding range. The last size range must have max_size == -1.
2904 Examples:
2907 -mmemcpy-strategy=libcall:-1:noalign
2909 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2913 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2915 This is to tell the compiler to use the following strategy for memset
2916 1) when the expected size is between [1, 16], use rep_8byte strategy;
2917 2) when the size is between [17, 2048], use vector_loop;
2918 3) when the size is > 2048, use libcall. */
2920 struct stringop_size_range
2922 int max;
2923 stringop_alg alg;
2924 bool noalign;
2927 static void
2928 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2930 const struct stringop_algs *default_algs;
2931 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2932 char *curr_range_str, *next_range_str;
2933 int i = 0, n = 0;
2935 if (is_memset)
2936 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2937 else
2938 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2940 curr_range_str = strategy_str;
2944 int maxs;
2945 char alg_name[128];
2946 char align[16];
2947 next_range_str = strchr (curr_range_str, ',');
2948 if (next_range_str)
2949 *next_range_str++ = '\0';
2951 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2952 alg_name, &maxs, align))
2954 error ("wrong arg %s to option %s", curr_range_str,
2955 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2956 return;
2959 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2961 error ("size ranges of option %s should be increasing",
2962 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2963 return;
2966 for (i = 0; i < last_alg; i++)
2967 if (!strcmp (alg_name, stringop_alg_names[i]))
2968 break;
2970 if (i == last_alg)
2972 error ("wrong stringop strategy name %s specified for option %s",
2973 alg_name,
2974 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2975 return;
2978 if ((stringop_alg) i == rep_prefix_8_byte
2979 && !TARGET_64BIT)
2981 /* rep; movq isn't available in 32-bit code. */
2982 error ("stringop strategy name %s specified for option %s "
2983 "not supported for 32-bit code",
2984 alg_name,
2985 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2986 return;
2989 input_ranges[n].max = maxs;
2990 input_ranges[n].alg = (stringop_alg) i;
2991 if (!strcmp (align, "align"))
2992 input_ranges[n].noalign = false;
2993 else if (!strcmp (align, "noalign"))
2994 input_ranges[n].noalign = true;
2995 else
2997 error ("unknown alignment %s specified for option %s",
2998 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2999 return;
3001 n++;
3002 curr_range_str = next_range_str;
3004 while (curr_range_str);
3006 if (input_ranges[n - 1].max != -1)
3008 error ("the max value for the last size range should be -1"
3009 " for option %s",
3010 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3011 return;
3014 if (n > MAX_STRINGOP_ALGS)
3016 error ("too many size ranges specified in option %s",
3017 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3018 return;
3021 /* Now override the default algs array. */
3022 for (i = 0; i < n; i++)
3024 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3025 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3026 = input_ranges[i].alg;
3027 *const_cast<int *>(&default_algs->size[i].noalign)
3028 = input_ranges[i].noalign;
3033 /* parse -mtune-ctrl= option. When DUMP is true,
3034 print the features that are explicitly set. */
3036 static void
3037 parse_mtune_ctrl_str (bool dump)
3039 if (!ix86_tune_ctrl_string)
3040 return;
3042 char *next_feature_string = NULL;
3043 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3044 char *orig = curr_feature_string;
3045 int i;
3048 bool clear = false;
3050 next_feature_string = strchr (curr_feature_string, ',');
3051 if (next_feature_string)
3052 *next_feature_string++ = '\0';
3053 if (*curr_feature_string == '^')
3055 curr_feature_string++;
3056 clear = true;
3058 for (i = 0; i < X86_TUNE_LAST; i++)
3060 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3062 ix86_tune_features[i] = !clear;
3063 if (dump)
3064 fprintf (stderr, "Explicitly %s feature %s\n",
3065 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3066 break;
3069 if (i == X86_TUNE_LAST)
3070 error ("Unknown parameter to option -mtune-ctrl: %s",
3071 clear ? curr_feature_string - 1 : curr_feature_string);
3072 curr_feature_string = next_feature_string;
3074 while (curr_feature_string);
3075 free (orig);
3078 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3079 processor type. */
3081 static void
3082 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3084 unsigned int ix86_tune_mask = 1u << ix86_tune;
3085 int i;
3087 for (i = 0; i < X86_TUNE_LAST; ++i)
3089 if (ix86_tune_no_default)
3090 ix86_tune_features[i] = 0;
3091 else
3092 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3095 if (dump)
3097 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3098 for (i = 0; i < X86_TUNE_LAST; i++)
3099 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3100 ix86_tune_features[i] ? "on" : "off");
3103 parse_mtune_ctrl_str (dump);
3107 /* Default align_* from the processor table. */
3109 static void
3110 ix86_default_align (struct gcc_options *opts)
3112 if (opts->x_align_loops == 0)
3114 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3115 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3117 if (opts->x_align_jumps == 0)
3119 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3120 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3122 if (opts->x_align_functions == 0)
3124 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3128 /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
3130 static void
3131 ix86_override_options_after_change (void)
3133 ix86_default_align (&global_options);
3136 /* Override various settings based on options. If MAIN_ARGS_P, the
3137 options are from the command line, otherwise they are from
3138 attributes. */
3140 static void
3141 ix86_option_override_internal (bool main_args_p,
3142 struct gcc_options *opts,
3143 struct gcc_options *opts_set)
3145 int i;
3146 unsigned int ix86_arch_mask;
3147 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3148 const char *prefix;
3149 const char *suffix;
3150 const char *sw;
3152 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3153 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3154 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3155 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3156 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3157 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3158 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3159 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3160 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3161 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3162 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3163 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3164 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3165 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3166 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3167 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3168 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3169 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3170 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3171 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3172 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3173 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3174 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3175 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3176 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3177 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3178 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3179 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3180 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3181 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3182 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3183 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3184 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3185 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3186 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3187 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3188 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3189 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3190 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3191 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3192 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3193 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3194 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3195 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3196 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3197 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3198 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3199 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3200 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3201 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3202 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3203 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3204 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3205 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3206 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3207 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3208 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
3209 #define PTA_MWAITX (HOST_WIDE_INT_1 << 57)
3211 #define PTA_CORE2 \
3212 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3213 | PTA_CX16 | PTA_FXSR)
3214 #define PTA_NEHALEM \
3215 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3216 #define PTA_WESTMERE \
3217 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3218 #define PTA_SANDYBRIDGE \
3219 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3220 #define PTA_IVYBRIDGE \
3221 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3222 #define PTA_HASWELL \
3223 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3224 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3225 #define PTA_BROADWELL \
3226 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3227 #define PTA_KNL \
3228 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
3229 #define PTA_BONNELL \
3230 (PTA_CORE2 | PTA_MOVBE)
3231 #define PTA_SILVERMONT \
3232 (PTA_WESTMERE | PTA_MOVBE)
3234 /* if this reaches 64, need to widen struct pta flags below */
3236 static struct pta
3238 const char *const name; /* processor name or nickname. */
3239 const enum processor_type processor;
3240 const enum attr_cpu schedule;
3241 const unsigned HOST_WIDE_INT flags;
3243 const processor_alias_table[] =
3245 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3246 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3247 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3248 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3249 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3250 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3251 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3252 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3253 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3254 PTA_MMX | PTA_SSE | PTA_FXSR},
3255 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3256 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3257 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3258 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3259 PTA_MMX | PTA_SSE | PTA_FXSR},
3260 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3261 PTA_MMX | PTA_SSE | PTA_FXSR},
3262 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3263 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3264 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3265 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3266 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3267 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3268 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3269 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3270 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3271 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3272 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3273 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3274 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3275 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3276 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3277 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3278 PTA_SANDYBRIDGE},
3279 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3280 PTA_SANDYBRIDGE},
3281 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3282 PTA_IVYBRIDGE},
3283 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3284 PTA_IVYBRIDGE},
3285 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3286 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3287 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3288 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3289 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3290 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3291 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3292 {"knl", PROCESSOR_KNL, CPU_KNL, PTA_KNL},
3293 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3294 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3295 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3296 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3297 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3298 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3299 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3300 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3301 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3302 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3303 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3304 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3305 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3306 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3307 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3308 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3309 {"x86-64", PROCESSOR_K8, CPU_K8,
3310 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3311 {"k8", PROCESSOR_K8, CPU_K8,
3312 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3313 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3314 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3315 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3316 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3317 {"opteron", PROCESSOR_K8, CPU_K8,
3318 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3319 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3320 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3321 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3322 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3323 {"athlon64", PROCESSOR_K8, CPU_K8,
3324 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3325 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3326 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3327 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3328 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3329 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3330 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3331 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3332 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3333 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3334 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3335 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3336 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3337 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3338 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3339 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3340 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3341 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3342 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3343 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3344 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3345 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3346 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3347 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3348 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3349 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3350 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3351 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3352 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3353 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3354 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3355 | PTA_XSAVEOPT | PTA_FSGSBASE},
3356 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3357 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3358 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3359 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3360 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3361 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3362 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3363 | PTA_MOVBE | PTA_MWAITX},
3364 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3365 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3366 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3367 | PTA_FXSR | PTA_XSAVE},
3368 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3369 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3370 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3371 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3372 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3373 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3375 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3376 PTA_64BIT
3377 | PTA_HLE /* flags are only used for -march switch. */ },
3380 /* -mrecip options. */
3381 static struct
3383 const char *string; /* option name */
3384 unsigned int mask; /* mask bits to set */
3386 const recip_options[] =
3388 { "all", RECIP_MASK_ALL },
3389 { "none", RECIP_MASK_NONE },
3390 { "div", RECIP_MASK_DIV },
3391 { "sqrt", RECIP_MASK_SQRT },
3392 { "vec-div", RECIP_MASK_VEC_DIV },
3393 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3396 int const pta_size = ARRAY_SIZE (processor_alias_table);
3398 /* Set up prefix/suffix so the error messages refer to either the command
3399 line argument, or the attribute(target). */
3400 if (main_args_p)
3402 prefix = "-m";
3403 suffix = "";
3404 sw = "switch";
3406 else
3408 prefix = "option(\"";
3409 suffix = "\")";
3410 sw = "attribute";
3413 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3414 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3415 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3416 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3417 #ifdef TARGET_BI_ARCH
3418 else
3420 #if TARGET_BI_ARCH == 1
3421 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3422 is on and OPTION_MASK_ABI_X32 is off. We turn off
3423 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3424 -mx32. */
3425 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3426 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3427 #else
3428 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3429 on and OPTION_MASK_ABI_64 is off. We turn off
3430 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3431 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3432 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3433 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3434 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3435 #endif
3437 #endif
3439 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3441 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3442 OPTION_MASK_ABI_64 for TARGET_X32. */
3443 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3444 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3446 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3447 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3448 | OPTION_MASK_ABI_X32
3449 | OPTION_MASK_ABI_64);
3450 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3452 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3453 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3454 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3455 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3458 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3459 SUBTARGET_OVERRIDE_OPTIONS;
3460 #endif
3462 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3463 SUBSUBTARGET_OVERRIDE_OPTIONS;
3464 #endif
3466 /* -fPIC is the default for x86_64. */
3467 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3468 opts->x_flag_pic = 2;
3470 /* Need to check -mtune=generic first. */
3471 if (opts->x_ix86_tune_string)
3473 /* As special support for cross compilers we read -mtune=native
3474 as -mtune=generic. With native compilers we won't see the
3475 -mtune=native, as it was changed by the driver. */
3476 if (!strcmp (opts->x_ix86_tune_string, "native"))
3478 opts->x_ix86_tune_string = "generic";
3480 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3481 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3482 "%stune=k8%s or %stune=generic%s instead as appropriate",
3483 prefix, suffix, prefix, suffix, prefix, suffix);
3485 else
3487 if (opts->x_ix86_arch_string)
3488 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3489 if (!opts->x_ix86_tune_string)
3491 opts->x_ix86_tune_string
3492 = processor_target_table[TARGET_CPU_DEFAULT].name;
3493 ix86_tune_defaulted = 1;
3496 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3497 or defaulted. We need to use a sensible tune option. */
3498 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3500 opts->x_ix86_tune_string = "generic";
3504 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3505 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3507 /* rep; movq isn't available in 32-bit code. */
3508 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3509 opts->x_ix86_stringop_alg = no_stringop;
3512 if (!opts->x_ix86_arch_string)
3513 opts->x_ix86_arch_string
3514 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3515 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3516 else
3517 ix86_arch_specified = 1;
3519 if (opts_set->x_ix86_pmode)
3521 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3522 && opts->x_ix86_pmode == PMODE_SI)
3523 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3524 && opts->x_ix86_pmode == PMODE_DI))
3525 error ("address mode %qs not supported in the %s bit mode",
3526 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3527 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3529 else
3530 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3531 ? PMODE_DI : PMODE_SI;
3533 if (!opts_set->x_ix86_abi)
3534 opts->x_ix86_abi = DEFAULT_ABI;
3536 /* For targets using ms ABI enable ms-extensions, if not
3537 explicit turned off. For non-ms ABI we turn off this
3538 option. */
3539 if (!opts_set->x_flag_ms_extensions)
3540 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3542 if (opts_set->x_ix86_cmodel)
3544 switch (opts->x_ix86_cmodel)
3546 case CM_SMALL:
3547 case CM_SMALL_PIC:
3548 if (opts->x_flag_pic)
3549 opts->x_ix86_cmodel = CM_SMALL_PIC;
3550 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3551 error ("code model %qs not supported in the %s bit mode",
3552 "small", "32");
3553 break;
3555 case CM_MEDIUM:
3556 case CM_MEDIUM_PIC:
3557 if (opts->x_flag_pic)
3558 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3559 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3560 error ("code model %qs not supported in the %s bit mode",
3561 "medium", "32");
3562 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3563 error ("code model %qs not supported in x32 mode",
3564 "medium");
3565 break;
3567 case CM_LARGE:
3568 case CM_LARGE_PIC:
3569 if (opts->x_flag_pic)
3570 opts->x_ix86_cmodel = CM_LARGE_PIC;
3571 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3572 error ("code model %qs not supported in the %s bit mode",
3573 "large", "32");
3574 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3575 error ("code model %qs not supported in x32 mode",
3576 "large");
3577 break;
3579 case CM_32:
3580 if (opts->x_flag_pic)
3581 error ("code model %s does not support PIC mode", "32");
3582 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3583 error ("code model %qs not supported in the %s bit mode",
3584 "32", "64");
3585 break;
3587 case CM_KERNEL:
3588 if (opts->x_flag_pic)
3590 error ("code model %s does not support PIC mode", "kernel");
3591 opts->x_ix86_cmodel = CM_32;
3593 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3594 error ("code model %qs not supported in the %s bit mode",
3595 "kernel", "32");
3596 break;
3598 default:
3599 gcc_unreachable ();
3602 else
3604 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3605 use of rip-relative addressing. This eliminates fixups that
3606 would otherwise be needed if this object is to be placed in a
3607 DLL, and is essentially just as efficient as direct addressing. */
3608 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3609 && (TARGET_RDOS || TARGET_PECOFF))
3610 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3611 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3612 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3613 else
3614 opts->x_ix86_cmodel = CM_32;
3616 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3618 error ("-masm=intel not supported in this configuration");
3619 opts->x_ix86_asm_dialect = ASM_ATT;
3621 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3622 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3623 sorry ("%i-bit mode not compiled in",
3624 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3626 for (i = 0; i < pta_size; i++)
3627 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3629 ix86_schedule = processor_alias_table[i].schedule;
3630 ix86_arch = processor_alias_table[i].processor;
3631 /* Default cpu tuning to the architecture. */
3632 ix86_tune = ix86_arch;
3634 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3635 && !(processor_alias_table[i].flags & PTA_64BIT))
3636 error ("CPU you selected does not support x86-64 "
3637 "instruction set");
3639 if (processor_alias_table[i].flags & PTA_MMX
3640 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3641 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3642 if (processor_alias_table[i].flags & PTA_3DNOW
3643 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3644 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3645 if (processor_alias_table[i].flags & PTA_3DNOW_A
3646 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3647 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3648 if (processor_alias_table[i].flags & PTA_SSE
3649 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3650 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3651 if (processor_alias_table[i].flags & PTA_SSE2
3652 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3653 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3654 if (processor_alias_table[i].flags & PTA_SSE3
3655 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3656 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3657 if (processor_alias_table[i].flags & PTA_SSSE3
3658 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3659 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3660 if (processor_alias_table[i].flags & PTA_SSE4_1
3661 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3662 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3663 if (processor_alias_table[i].flags & PTA_SSE4_2
3664 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3665 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3666 if (processor_alias_table[i].flags & PTA_AVX
3667 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3668 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3669 if (processor_alias_table[i].flags & PTA_AVX2
3670 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3671 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3672 if (processor_alias_table[i].flags & PTA_FMA
3673 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3674 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3675 if (processor_alias_table[i].flags & PTA_SSE4A
3676 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3677 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3678 if (processor_alias_table[i].flags & PTA_FMA4
3679 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3680 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3681 if (processor_alias_table[i].flags & PTA_XOP
3682 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3683 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3684 if (processor_alias_table[i].flags & PTA_LWP
3685 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3686 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3687 if (processor_alias_table[i].flags & PTA_ABM
3688 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3689 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3690 if (processor_alias_table[i].flags & PTA_BMI
3691 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3692 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3693 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3694 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3695 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3696 if (processor_alias_table[i].flags & PTA_TBM
3697 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3698 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3699 if (processor_alias_table[i].flags & PTA_BMI2
3700 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3701 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3702 if (processor_alias_table[i].flags & PTA_CX16
3703 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3704 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3705 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3706 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3707 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3708 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3709 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3710 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3711 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3712 if (processor_alias_table[i].flags & PTA_MOVBE
3713 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3714 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3715 if (processor_alias_table[i].flags & PTA_AES
3716 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3717 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3718 if (processor_alias_table[i].flags & PTA_SHA
3719 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3720 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3721 if (processor_alias_table[i].flags & PTA_PCLMUL
3722 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3723 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3724 if (processor_alias_table[i].flags & PTA_FSGSBASE
3725 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3726 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3727 if (processor_alias_table[i].flags & PTA_RDRND
3728 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3729 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3730 if (processor_alias_table[i].flags & PTA_F16C
3731 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3732 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3733 if (processor_alias_table[i].flags & PTA_RTM
3734 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3735 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3736 if (processor_alias_table[i].flags & PTA_HLE
3737 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3738 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3739 if (processor_alias_table[i].flags & PTA_PRFCHW
3740 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3741 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3742 if (processor_alias_table[i].flags & PTA_RDSEED
3743 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3744 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3745 if (processor_alias_table[i].flags & PTA_ADX
3746 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3747 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3748 if (processor_alias_table[i].flags & PTA_FXSR
3749 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3750 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3751 if (processor_alias_table[i].flags & PTA_XSAVE
3752 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3753 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3754 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3755 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3756 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3757 if (processor_alias_table[i].flags & PTA_AVX512F
3758 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3759 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3760 if (processor_alias_table[i].flags & PTA_AVX512ER
3761 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3762 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3763 if (processor_alias_table[i].flags & PTA_AVX512PF
3764 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3765 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3766 if (processor_alias_table[i].flags & PTA_AVX512CD
3767 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3768 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3769 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3770 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3771 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3772 if (processor_alias_table[i].flags & PTA_PCOMMIT
3773 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
3774 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
3775 if (processor_alias_table[i].flags & PTA_CLWB
3776 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
3777 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
3778 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3779 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3780 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3781 if (processor_alias_table[i].flags & PTA_XSAVEC
3782 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3783 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3784 if (processor_alias_table[i].flags & PTA_XSAVES
3785 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3786 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3787 if (processor_alias_table[i].flags & PTA_AVX512DQ
3788 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3789 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3790 if (processor_alias_table[i].flags & PTA_AVX512BW
3791 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3792 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3793 if (processor_alias_table[i].flags & PTA_AVX512VL
3794 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3795 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3796 if (processor_alias_table[i].flags & PTA_MPX
3797 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3798 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3799 if (processor_alias_table[i].flags & PTA_AVX512VBMI
3800 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
3801 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
3802 if (processor_alias_table[i].flags & PTA_AVX512IFMA
3803 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
3804 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
3805 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3806 x86_prefetch_sse = true;
3807 if (processor_alias_table[i].flags & PTA_MWAITX
3808 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MWAITX))
3809 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MWAITX;
3811 break;
3814 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3815 error ("Intel MPX does not support x32");
3817 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3818 error ("Intel MPX does not support x32");
3820 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3821 error ("generic CPU can be used only for %stune=%s %s",
3822 prefix, suffix, sw);
3823 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3824 error ("intel CPU can be used only for %stune=%s %s",
3825 prefix, suffix, sw);
3826 else if (i == pta_size)
3827 error ("bad value (%s) for %sarch=%s %s",
3828 opts->x_ix86_arch_string, prefix, suffix, sw);
3830 ix86_arch_mask = 1u << ix86_arch;
3831 for (i = 0; i < X86_ARCH_LAST; ++i)
3832 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3834 for (i = 0; i < pta_size; i++)
3835 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3837 ix86_schedule = processor_alias_table[i].schedule;
3838 ix86_tune = processor_alias_table[i].processor;
3839 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3841 if (!(processor_alias_table[i].flags & PTA_64BIT))
3843 if (ix86_tune_defaulted)
3845 opts->x_ix86_tune_string = "x86-64";
3846 for (i = 0; i < pta_size; i++)
3847 if (! strcmp (opts->x_ix86_tune_string,
3848 processor_alias_table[i].name))
3849 break;
3850 ix86_schedule = processor_alias_table[i].schedule;
3851 ix86_tune = processor_alias_table[i].processor;
3853 else
3854 error ("CPU you selected does not support x86-64 "
3855 "instruction set");
3858 /* Intel CPUs have always interpreted SSE prefetch instructions as
3859 NOPs; so, we can enable SSE prefetch instructions even when
3860 -mtune (rather than -march) points us to a processor that has them.
3861 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3862 higher processors. */
3863 if (TARGET_CMOV
3864 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3865 x86_prefetch_sse = true;
3866 break;
3869 if (ix86_tune_specified && i == pta_size)
3870 error ("bad value (%s) for %stune=%s %s",
3871 opts->x_ix86_tune_string, prefix, suffix, sw);
3873 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3875 #ifndef USE_IX86_FRAME_POINTER
3876 #define USE_IX86_FRAME_POINTER 0
3877 #endif
3879 #ifndef USE_X86_64_FRAME_POINTER
3880 #define USE_X86_64_FRAME_POINTER 0
3881 #endif
3883 /* Set the default values for switches whose default depends on TARGET_64BIT
3884 in case they weren't overwritten by command line options. */
3885 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3887 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3888 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3889 if (opts->x_flag_asynchronous_unwind_tables
3890 && !opts_set->x_flag_unwind_tables
3891 && TARGET_64BIT_MS_ABI)
3892 opts->x_flag_unwind_tables = 1;
3893 if (opts->x_flag_asynchronous_unwind_tables == 2)
3894 opts->x_flag_unwind_tables
3895 = opts->x_flag_asynchronous_unwind_tables = 1;
3896 if (opts->x_flag_pcc_struct_return == 2)
3897 opts->x_flag_pcc_struct_return = 0;
3899 else
3901 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3902 opts->x_flag_omit_frame_pointer
3903 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3904 if (opts->x_flag_asynchronous_unwind_tables == 2)
3905 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3906 if (opts->x_flag_pcc_struct_return == 2)
3907 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3910 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3911 /* TODO: ix86_cost should be chosen at instruction or function granuality
3912 so for cold code we use size_cost even in !optimize_size compilation. */
3913 if (opts->x_optimize_size)
3914 ix86_cost = &ix86_size_cost;
3915 else
3916 ix86_cost = ix86_tune_cost;
3918 /* Arrange to set up i386_stack_locals for all functions. */
3919 init_machine_status = ix86_init_machine_status;
3921 /* Validate -mregparm= value. */
3922 if (opts_set->x_ix86_regparm)
3924 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3925 warning (0, "-mregparm is ignored in 64-bit mode");
3926 if (opts->x_ix86_regparm > REGPARM_MAX)
3928 error ("-mregparm=%d is not between 0 and %d",
3929 opts->x_ix86_regparm, REGPARM_MAX);
3930 opts->x_ix86_regparm = 0;
3933 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3934 opts->x_ix86_regparm = REGPARM_MAX;
3936 /* Default align_* from the processor table. */
3937 ix86_default_align (opts);
3939 /* Provide default for -mbranch-cost= value. */
3940 if (!opts_set->x_ix86_branch_cost)
3941 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
3943 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3945 opts->x_target_flags
3946 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3948 /* Enable by default the SSE and MMX builtins. Do allow the user to
3949 explicitly disable any of these. In particular, disabling SSE and
3950 MMX for kernel code is extremely useful. */
3951 if (!ix86_arch_specified)
3952 opts->x_ix86_isa_flags
3953 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3954 | TARGET_SUBTARGET64_ISA_DEFAULT)
3955 & ~opts->x_ix86_isa_flags_explicit);
3957 if (TARGET_RTD_P (opts->x_target_flags))
3958 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3960 else
3962 opts->x_target_flags
3963 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3965 if (!ix86_arch_specified)
3966 opts->x_ix86_isa_flags
3967 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3969 /* i386 ABI does not specify red zone. It still makes sense to use it
3970 when programmer takes care to stack from being destroyed. */
3971 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3972 opts->x_target_flags |= MASK_NO_RED_ZONE;
3975 /* Keep nonleaf frame pointers. */
3976 if (opts->x_flag_omit_frame_pointer)
3977 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3978 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3979 opts->x_flag_omit_frame_pointer = 1;
3981 /* If we're doing fast math, we don't care about comparison order
3982 wrt NaNs. This lets us use a shorter comparison sequence. */
3983 if (opts->x_flag_finite_math_only)
3984 opts->x_target_flags &= ~MASK_IEEE_FP;
3986 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3987 since the insns won't need emulation. */
3988 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3989 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3991 /* Likewise, if the target doesn't have a 387, or we've specified
3992 software floating point, don't use 387 inline intrinsics. */
3993 if (!TARGET_80387_P (opts->x_target_flags))
3994 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3996 /* Turn on MMX builtins for -msse. */
3997 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3998 opts->x_ix86_isa_flags
3999 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
4001 /* Enable SSE prefetch. */
4002 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
4003 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
4004 x86_prefetch_sse = true;
4006 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
4007 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
4008 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
4009 opts->x_ix86_isa_flags
4010 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
4012 /* Enable popcnt instruction for -msse4.2 or -mabm. */
4013 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
4014 || TARGET_ABM_P (opts->x_ix86_isa_flags))
4015 opts->x_ix86_isa_flags
4016 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
4018 /* Enable lzcnt instruction for -mabm. */
4019 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
4020 opts->x_ix86_isa_flags
4021 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
4023 /* Validate -mpreferred-stack-boundary= value or default it to
4024 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4025 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4026 if (opts_set->x_ix86_preferred_stack_boundary_arg)
4028 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4029 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
4030 int max = (TARGET_SEH ? 4 : 12);
4032 if (opts->x_ix86_preferred_stack_boundary_arg < min
4033 || opts->x_ix86_preferred_stack_boundary_arg > max)
4035 if (min == max)
4036 error ("-mpreferred-stack-boundary is not supported "
4037 "for this target");
4038 else
4039 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4040 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4042 else
4043 ix86_preferred_stack_boundary
4044 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4047 /* Set the default value for -mstackrealign. */
4048 if (opts->x_ix86_force_align_arg_pointer == -1)
4049 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4051 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4053 /* Validate -mincoming-stack-boundary= value or default it to
4054 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4055 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4056 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4058 if (opts->x_ix86_incoming_stack_boundary_arg
4059 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
4060 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4061 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4062 opts->x_ix86_incoming_stack_boundary_arg,
4063 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
4064 else
4066 ix86_user_incoming_stack_boundary
4067 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4068 ix86_incoming_stack_boundary
4069 = ix86_user_incoming_stack_boundary;
4073 #ifndef NO_PROFILE_COUNTERS
4074 if (flag_nop_mcount)
4075 error ("-mnop-mcount is not compatible with this target");
4076 #endif
4077 if (flag_nop_mcount && flag_pic)
4078 error ("-mnop-mcount is not implemented for -fPIC");
4080 /* Accept -msseregparm only if at least SSE support is enabled. */
4081 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4082 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4083 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4085 if (opts_set->x_ix86_fpmath)
4087 if (opts->x_ix86_fpmath & FPMATH_SSE)
4089 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4091 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4092 opts->x_ix86_fpmath = FPMATH_387;
4094 else if ((opts->x_ix86_fpmath & FPMATH_387)
4095 && !TARGET_80387_P (opts->x_target_flags))
4097 warning (0, "387 instruction set disabled, using SSE arithmetics");
4098 opts->x_ix86_fpmath = FPMATH_SSE;
4102 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4103 fpmath=387. The second is however default at many targets since the
4104 extra 80bit precision of temporaries is considered to be part of ABI.
4105 Overwrite the default at least for -ffast-math.
4106 TODO: -mfpmath=both seems to produce same performing code with bit
4107 smaller binaries. It is however not clear if register allocation is
4108 ready for this setting.
4109 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4110 codegen. We may switch to 387 with -ffast-math for size optimized
4111 functions. */
4112 else if (fast_math_flags_set_p (&global_options)
4113 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4114 opts->x_ix86_fpmath = FPMATH_SSE;
4115 else
4116 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4118 /* If the i387 is disabled, then do not return values in it. */
4119 if (!TARGET_80387_P (opts->x_target_flags))
4120 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4122 /* Use external vectorized library in vectorizing intrinsics. */
4123 if (opts_set->x_ix86_veclibabi_type)
4124 switch (opts->x_ix86_veclibabi_type)
4126 case ix86_veclibabi_type_svml:
4127 ix86_veclib_handler = ix86_veclibabi_svml;
4128 break;
4130 case ix86_veclibabi_type_acml:
4131 ix86_veclib_handler = ix86_veclibabi_acml;
4132 break;
4134 default:
4135 gcc_unreachable ();
4138 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4139 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4140 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4142 /* If stack probes are required, the space used for large function
4143 arguments on the stack must also be probed, so enable
4144 -maccumulate-outgoing-args so this happens in the prologue. */
4145 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4146 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4148 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4149 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4150 "for correctness", prefix, suffix);
4151 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4154 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4156 char *p;
4157 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4158 p = strchr (internal_label_prefix, 'X');
4159 internal_label_prefix_len = p - internal_label_prefix;
4160 *p = '\0';
4163 /* When scheduling description is not available, disable scheduler pass
4164 so it won't slow down the compilation and make x87 code slower. */
4165 if (!TARGET_SCHEDULE)
4166 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4168 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4169 ix86_tune_cost->simultaneous_prefetches,
4170 opts->x_param_values,
4171 opts_set->x_param_values);
4172 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4173 ix86_tune_cost->prefetch_block,
4174 opts->x_param_values,
4175 opts_set->x_param_values);
4176 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4177 ix86_tune_cost->l1_cache_size,
4178 opts->x_param_values,
4179 opts_set->x_param_values);
4180 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4181 ix86_tune_cost->l2_cache_size,
4182 opts->x_param_values,
4183 opts_set->x_param_values);
4185 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4186 if (opts->x_flag_prefetch_loop_arrays < 0
4187 && HAVE_prefetch
4188 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4189 && !opts->x_optimize_size
4190 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4191 opts->x_flag_prefetch_loop_arrays = 1;
4193 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4194 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4195 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4196 targetm.expand_builtin_va_start = NULL;
4198 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4200 ix86_gen_leave = gen_leave_rex64;
4201 if (Pmode == DImode)
4203 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4204 ix86_gen_tls_local_dynamic_base_64
4205 = gen_tls_local_dynamic_base_64_di;
4207 else
4209 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4210 ix86_gen_tls_local_dynamic_base_64
4211 = gen_tls_local_dynamic_base_64_si;
4214 else
4215 ix86_gen_leave = gen_leave;
4217 if (Pmode == DImode)
4219 ix86_gen_add3 = gen_adddi3;
4220 ix86_gen_sub3 = gen_subdi3;
4221 ix86_gen_sub3_carry = gen_subdi3_carry;
4222 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4223 ix86_gen_andsp = gen_anddi3;
4224 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4225 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4226 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4227 ix86_gen_monitor = gen_sse3_monitor_di;
4228 ix86_gen_monitorx = gen_monitorx_di;
4230 else
4232 ix86_gen_add3 = gen_addsi3;
4233 ix86_gen_sub3 = gen_subsi3;
4234 ix86_gen_sub3_carry = gen_subsi3_carry;
4235 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4236 ix86_gen_andsp = gen_andsi3;
4237 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4238 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4239 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4240 ix86_gen_monitor = gen_sse3_monitor_si;
4241 ix86_gen_monitorx = gen_monitorx_si;
4244 #ifdef USE_IX86_CLD
4245 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4246 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4247 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4248 #endif
4250 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4252 if (opts->x_flag_fentry > 0)
4253 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4254 "with -fpic");
4255 opts->x_flag_fentry = 0;
4257 else if (TARGET_SEH)
4259 if (opts->x_flag_fentry == 0)
4260 sorry ("-mno-fentry isn%'t compatible with SEH");
4261 opts->x_flag_fentry = 1;
4263 else if (opts->x_flag_fentry < 0)
4265 #if defined(PROFILE_BEFORE_PROLOGUE)
4266 opts->x_flag_fentry = 1;
4267 #else
4268 opts->x_flag_fentry = 0;
4269 #endif
4272 if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
4273 opts->x_target_flags |= MASK_VZEROUPPER;
4274 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4275 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4276 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4277 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4278 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4279 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4280 /* Enable 128-bit AVX instruction generation
4281 for the auto-vectorizer. */
4282 if (TARGET_AVX128_OPTIMAL
4283 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4284 opts->x_target_flags |= MASK_PREFER_AVX128;
4286 if (opts->x_ix86_recip_name)
4288 char *p = ASTRDUP (opts->x_ix86_recip_name);
4289 char *q;
4290 unsigned int mask, i;
4291 bool invert;
4293 while ((q = strtok (p, ",")) != NULL)
4295 p = NULL;
4296 if (*q == '!')
4298 invert = true;
4299 q++;
4301 else
4302 invert = false;
4304 if (!strcmp (q, "default"))
4305 mask = RECIP_MASK_ALL;
4306 else
4308 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4309 if (!strcmp (q, recip_options[i].string))
4311 mask = recip_options[i].mask;
4312 break;
4315 if (i == ARRAY_SIZE (recip_options))
4317 error ("unknown option for -mrecip=%s", q);
4318 invert = false;
4319 mask = RECIP_MASK_NONE;
4323 opts->x_recip_mask_explicit |= mask;
4324 if (invert)
4325 opts->x_recip_mask &= ~mask;
4326 else
4327 opts->x_recip_mask |= mask;
4331 if (TARGET_RECIP_P (opts->x_target_flags))
4332 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4333 else if (opts_set->x_target_flags & MASK_RECIP)
4334 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4336 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4337 for 64-bit Bionic. */
4338 if (TARGET_HAS_BIONIC
4339 && !(opts_set->x_target_flags
4340 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4341 opts->x_target_flags |= (TARGET_64BIT
4342 ? MASK_LONG_DOUBLE_128
4343 : MASK_LONG_DOUBLE_64);
4345 /* Only one of them can be active. */
4346 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4347 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4349 /* Save the initial options in case the user does function specific
4350 options. */
4351 if (main_args_p)
4352 target_option_default_node = target_option_current_node
4353 = build_target_option_node (opts);
4355 /* Handle stack protector */
4356 if (!opts_set->x_ix86_stack_protector_guard)
4357 opts->x_ix86_stack_protector_guard
4358 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4360 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4361 if (opts->x_ix86_tune_memcpy_strategy)
4363 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4364 ix86_parse_stringop_strategy_string (str, false);
4365 free (str);
4368 if (opts->x_ix86_tune_memset_strategy)
4370 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4371 ix86_parse_stringop_strategy_string (str, true);
4372 free (str);
4376 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4378 static void
4379 ix86_option_override (void)
4381 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4382 struct register_pass_info insert_vzeroupper_info
4383 = { pass_insert_vzeroupper, "reload",
4384 1, PASS_POS_INSERT_AFTER
4387 ix86_option_override_internal (true, &global_options, &global_options_set);
4390 /* This needs to be done at start up. It's convenient to do it here. */
4391 register_pass (&insert_vzeroupper_info);
4394 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4395 static char *
4396 ix86_offload_options (void)
4398 if (TARGET_LP64)
4399 return xstrdup ("-foffload-abi=lp64");
4400 return xstrdup ("-foffload-abi=ilp32");
4403 /* Update register usage after having seen the compiler flags. */
4405 static void
4406 ix86_conditional_register_usage (void)
4408 int i, c_mask;
4410 /* For 32-bit targets, squash the REX registers. */
4411 if (! TARGET_64BIT)
4413 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4414 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4415 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4416 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4417 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4418 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4421 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4422 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4423 : TARGET_64BIT ? (1 << 2)
4424 : (1 << 1));
4426 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4428 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4430 /* Set/reset conditionally defined registers from
4431 CALL_USED_REGISTERS initializer. */
4432 if (call_used_regs[i] > 1)
4433 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4435 /* Calculate registers of CLOBBERED_REGS register set
4436 as call used registers from GENERAL_REGS register set. */
4437 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4438 && call_used_regs[i])
4439 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4442 /* If MMX is disabled, squash the registers. */
4443 if (! TARGET_MMX)
4444 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4445 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4446 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4448 /* If SSE is disabled, squash the registers. */
4449 if (! TARGET_SSE)
4450 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4451 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4452 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4454 /* If the FPU is disabled, squash the registers. */
4455 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4456 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4457 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4458 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4460 /* If AVX512F is disabled, squash the registers. */
4461 if (! TARGET_AVX512F)
4463 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4464 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4466 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4467 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4470 /* If MPX is disabled, squash the registers. */
4471 if (! TARGET_MPX)
4472 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4473 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4477 /* Save the current options */
4479 static void
4480 ix86_function_specific_save (struct cl_target_option *ptr,
4481 struct gcc_options *opts)
4483 ptr->arch = ix86_arch;
4484 ptr->schedule = ix86_schedule;
4485 ptr->prefetch_sse = x86_prefetch_sse;
4486 ptr->tune = ix86_tune;
4487 ptr->branch_cost = ix86_branch_cost;
4488 ptr->tune_defaulted = ix86_tune_defaulted;
4489 ptr->arch_specified = ix86_arch_specified;
4490 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4491 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4492 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4493 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4494 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4495 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4496 ptr->x_ix86_abi = opts->x_ix86_abi;
4497 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4498 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4499 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4500 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4501 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4502 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4503 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4504 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4505 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4506 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4507 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4508 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4509 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4510 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4511 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4512 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4513 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4514 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4515 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4516 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4518 /* The fields are char but the variables are not; make sure the
4519 values fit in the fields. */
4520 gcc_assert (ptr->arch == ix86_arch);
4521 gcc_assert (ptr->schedule == ix86_schedule);
4522 gcc_assert (ptr->tune == ix86_tune);
4523 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4526 /* Restore the current options */
4528 static void
4529 ix86_function_specific_restore (struct gcc_options *opts,
4530 struct cl_target_option *ptr)
4532 enum processor_type old_tune = ix86_tune;
4533 enum processor_type old_arch = ix86_arch;
4534 unsigned int ix86_arch_mask;
4535 int i;
4537 /* We don't change -fPIC. */
4538 opts->x_flag_pic = flag_pic;
4540 ix86_arch = (enum processor_type) ptr->arch;
4541 ix86_schedule = (enum attr_cpu) ptr->schedule;
4542 ix86_tune = (enum processor_type) ptr->tune;
4543 x86_prefetch_sse = ptr->prefetch_sse;
4544 opts->x_ix86_branch_cost = ptr->branch_cost;
4545 ix86_tune_defaulted = ptr->tune_defaulted;
4546 ix86_arch_specified = ptr->arch_specified;
4547 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4548 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4549 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4550 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4551 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4552 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4553 opts->x_ix86_abi = ptr->x_ix86_abi;
4554 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4555 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4556 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4557 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4558 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4559 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4560 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4561 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4562 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4563 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4564 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4565 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4566 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4567 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4568 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4569 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4570 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4571 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4572 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4573 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4574 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4575 /* TODO: ix86_cost should be chosen at instruction or function granuality
4576 so for cold code we use size_cost even in !optimize_size compilation. */
4577 if (opts->x_optimize_size)
4578 ix86_cost = &ix86_size_cost;
4579 else
4580 ix86_cost = ix86_tune_cost;
4582 /* Recreate the arch feature tests if the arch changed */
4583 if (old_arch != ix86_arch)
4585 ix86_arch_mask = 1u << ix86_arch;
4586 for (i = 0; i < X86_ARCH_LAST; ++i)
4587 ix86_arch_features[i]
4588 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4591 /* Recreate the tune optimization tests */
4592 if (old_tune != ix86_tune)
4593 set_ix86_tune_features (ix86_tune, false);
4596 /* Adjust target options after streaming them in. This is mainly about
4597 reconciling them with global options. */
4599 static void
4600 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
4602 /* flag_pic is a global option, but ix86_cmodel is target saved option
4603 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
4604 for PIC, or error out. */
4605 if (flag_pic)
4606 switch (ptr->x_ix86_cmodel)
4608 case CM_SMALL:
4609 ptr->x_ix86_cmodel = CM_SMALL_PIC;
4610 break;
4612 case CM_MEDIUM:
4613 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
4614 break;
4616 case CM_LARGE:
4617 ptr->x_ix86_cmodel = CM_LARGE_PIC;
4618 break;
4620 case CM_KERNEL:
4621 error ("code model %s does not support PIC mode", "kernel");
4622 break;
4624 default:
4625 break;
4627 else
4628 switch (ptr->x_ix86_cmodel)
4630 case CM_SMALL_PIC:
4631 ptr->x_ix86_cmodel = CM_SMALL;
4632 break;
4634 case CM_MEDIUM_PIC:
4635 ptr->x_ix86_cmodel = CM_MEDIUM;
4636 break;
4638 case CM_LARGE_PIC:
4639 ptr->x_ix86_cmodel = CM_LARGE;
4640 break;
4642 default:
4643 break;
4647 /* Print the current options */
4649 static void
4650 ix86_function_specific_print (FILE *file, int indent,
4651 struct cl_target_option *ptr)
4653 char *target_string
4654 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4655 NULL, NULL, ptr->x_ix86_fpmath, false);
4657 gcc_assert (ptr->arch < PROCESSOR_max);
4658 fprintf (file, "%*sarch = %d (%s)\n",
4659 indent, "",
4660 ptr->arch, processor_target_table[ptr->arch].name);
4662 gcc_assert (ptr->tune < PROCESSOR_max);
4663 fprintf (file, "%*stune = %d (%s)\n",
4664 indent, "",
4665 ptr->tune, processor_target_table[ptr->tune].name);
4667 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4669 if (target_string)
4671 fprintf (file, "%*s%s\n", indent, "", target_string);
4672 free (target_string);
4677 /* Inner function to process the attribute((target(...))), take an argument and
4678 set the current options from the argument. If we have a list, recursively go
4679 over the list. */
4681 static bool
4682 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4683 struct gcc_options *opts,
4684 struct gcc_options *opts_set,
4685 struct gcc_options *enum_opts_set)
4687 char *next_optstr;
4688 bool ret = true;
4690 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4691 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4692 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4693 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4694 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4696 enum ix86_opt_type
4698 ix86_opt_unknown,
4699 ix86_opt_yes,
4700 ix86_opt_no,
4701 ix86_opt_str,
4702 ix86_opt_enum,
4703 ix86_opt_isa
4706 static const struct
4708 const char *string;
4709 size_t len;
4710 enum ix86_opt_type type;
4711 int opt;
4712 int mask;
4713 } attrs[] = {
4714 /* isa options */
4715 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4716 IX86_ATTR_ISA ("abm", OPT_mabm),
4717 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4718 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4719 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4720 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4721 IX86_ATTR_ISA ("aes", OPT_maes),
4722 IX86_ATTR_ISA ("sha", OPT_msha),
4723 IX86_ATTR_ISA ("avx", OPT_mavx),
4724 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4725 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4726 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4727 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4728 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4729 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4730 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4731 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4732 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4733 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4734 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4735 IX86_ATTR_ISA ("sse", OPT_msse),
4736 IX86_ATTR_ISA ("sse2", OPT_msse2),
4737 IX86_ATTR_ISA ("sse3", OPT_msse3),
4738 IX86_ATTR_ISA ("sse4", OPT_msse4),
4739 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4740 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4741 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4742 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4743 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4744 IX86_ATTR_ISA ("fma", OPT_mfma),
4745 IX86_ATTR_ISA ("xop", OPT_mxop),
4746 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4747 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4748 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4749 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4750 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4751 IX86_ATTR_ISA ("hle", OPT_mhle),
4752 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4753 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4754 IX86_ATTR_ISA ("adx", OPT_madx),
4755 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4756 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4757 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4758 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4759 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4760 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4761 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4762 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
4763 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
4764 IX86_ATTR_ISA ("clwb", OPT_mclwb),
4765 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
4766 IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx),
4768 /* enum options */
4769 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4771 /* string options */
4772 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4773 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4775 /* flag options */
4776 IX86_ATTR_YES ("cld",
4777 OPT_mcld,
4778 MASK_CLD),
4780 IX86_ATTR_NO ("fancy-math-387",
4781 OPT_mfancy_math_387,
4782 MASK_NO_FANCY_MATH_387),
4784 IX86_ATTR_YES ("ieee-fp",
4785 OPT_mieee_fp,
4786 MASK_IEEE_FP),
4788 IX86_ATTR_YES ("inline-all-stringops",
4789 OPT_minline_all_stringops,
4790 MASK_INLINE_ALL_STRINGOPS),
4792 IX86_ATTR_YES ("inline-stringops-dynamically",
4793 OPT_minline_stringops_dynamically,
4794 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4796 IX86_ATTR_NO ("align-stringops",
4797 OPT_mno_align_stringops,
4798 MASK_NO_ALIGN_STRINGOPS),
4800 IX86_ATTR_YES ("recip",
4801 OPT_mrecip,
4802 MASK_RECIP),
4806 /* If this is a list, recurse to get the options. */
4807 if (TREE_CODE (args) == TREE_LIST)
4809 bool ret = true;
4811 for (; args; args = TREE_CHAIN (args))
4812 if (TREE_VALUE (args)
4813 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4814 p_strings, opts, opts_set,
4815 enum_opts_set))
4816 ret = false;
4818 return ret;
4821 else if (TREE_CODE (args) != STRING_CST)
4823 error ("attribute %<target%> argument not a string");
4824 return false;
4827 /* Handle multiple arguments separated by commas. */
4828 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4830 while (next_optstr && *next_optstr != '\0')
4832 char *p = next_optstr;
4833 char *orig_p = p;
4834 char *comma = strchr (next_optstr, ',');
4835 const char *opt_string;
4836 size_t len, opt_len;
4837 int opt;
4838 bool opt_set_p;
4839 char ch;
4840 unsigned i;
4841 enum ix86_opt_type type = ix86_opt_unknown;
4842 int mask = 0;
4844 if (comma)
4846 *comma = '\0';
4847 len = comma - next_optstr;
4848 next_optstr = comma + 1;
4850 else
4852 len = strlen (p);
4853 next_optstr = NULL;
4856 /* Recognize no-xxx. */
4857 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4859 opt_set_p = false;
4860 p += 3;
4861 len -= 3;
4863 else
4864 opt_set_p = true;
4866 /* Find the option. */
4867 ch = *p;
4868 opt = N_OPTS;
4869 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4871 type = attrs[i].type;
4872 opt_len = attrs[i].len;
4873 if (ch == attrs[i].string[0]
4874 && ((type != ix86_opt_str && type != ix86_opt_enum)
4875 ? len == opt_len
4876 : len > opt_len)
4877 && memcmp (p, attrs[i].string, opt_len) == 0)
4879 opt = attrs[i].opt;
4880 mask = attrs[i].mask;
4881 opt_string = attrs[i].string;
4882 break;
4886 /* Process the option. */
4887 if (opt == N_OPTS)
4889 error ("attribute(target(\"%s\")) is unknown", orig_p);
4890 ret = false;
4893 else if (type == ix86_opt_isa)
4895 struct cl_decoded_option decoded;
4897 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4898 ix86_handle_option (opts, opts_set,
4899 &decoded, input_location);
4902 else if (type == ix86_opt_yes || type == ix86_opt_no)
4904 if (type == ix86_opt_no)
4905 opt_set_p = !opt_set_p;
4907 if (opt_set_p)
4908 opts->x_target_flags |= mask;
4909 else
4910 opts->x_target_flags &= ~mask;
4913 else if (type == ix86_opt_str)
4915 if (p_strings[opt])
4917 error ("option(\"%s\") was already specified", opt_string);
4918 ret = false;
4920 else
4921 p_strings[opt] = xstrdup (p + opt_len);
4924 else if (type == ix86_opt_enum)
4926 bool arg_ok;
4927 int value;
4929 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4930 if (arg_ok)
4931 set_option (opts, enum_opts_set, opt, value,
4932 p + opt_len, DK_UNSPECIFIED, input_location,
4933 global_dc);
4934 else
4936 error ("attribute(target(\"%s\")) is unknown", orig_p);
4937 ret = false;
4941 else
4942 gcc_unreachable ();
4945 return ret;
4948 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4950 tree
4951 ix86_valid_target_attribute_tree (tree args,
4952 struct gcc_options *opts,
4953 struct gcc_options *opts_set)
4955 const char *orig_arch_string = opts->x_ix86_arch_string;
4956 const char *orig_tune_string = opts->x_ix86_tune_string;
4957 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4958 int orig_tune_defaulted = ix86_tune_defaulted;
4959 int orig_arch_specified = ix86_arch_specified;
4960 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4961 tree t = NULL_TREE;
4962 int i;
4963 struct cl_target_option *def
4964 = TREE_TARGET_OPTION (target_option_default_node);
4965 struct gcc_options enum_opts_set;
4967 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4969 /* Process each of the options on the chain. */
4970 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4971 opts_set, &enum_opts_set))
4972 return error_mark_node;
4974 /* If the changed options are different from the default, rerun
4975 ix86_option_override_internal, and then save the options away.
4976 The string options are are attribute options, and will be undone
4977 when we copy the save structure. */
4978 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4979 || opts->x_target_flags != def->x_target_flags
4980 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4981 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4982 || enum_opts_set.x_ix86_fpmath)
4984 /* If we are using the default tune= or arch=, undo the string assigned,
4985 and use the default. */
4986 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4987 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4988 else if (!orig_arch_specified)
4989 opts->x_ix86_arch_string = NULL;
4991 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4992 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4993 else if (orig_tune_defaulted)
4994 opts->x_ix86_tune_string = NULL;
4996 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4997 if (enum_opts_set.x_ix86_fpmath)
4998 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4999 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
5000 && TARGET_SSE_P (opts->x_ix86_isa_flags))
5002 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
5003 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5006 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
5007 ix86_option_override_internal (false, opts, opts_set);
5009 /* Add any builtin functions with the new isa if any. */
5010 ix86_add_new_builtins (opts->x_ix86_isa_flags);
5012 /* Save the current options unless we are validating options for
5013 #pragma. */
5014 t = build_target_option_node (opts);
5016 opts->x_ix86_arch_string = orig_arch_string;
5017 opts->x_ix86_tune_string = orig_tune_string;
5018 opts_set->x_ix86_fpmath = orig_fpmath_set;
5020 /* Free up memory allocated to hold the strings */
5021 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
5022 free (option_strings[i]);
5025 return t;
5028 /* Hook to validate attribute((target("string"))). */
5030 static bool
5031 ix86_valid_target_attribute_p (tree fndecl,
5032 tree ARG_UNUSED (name),
5033 tree args,
5034 int ARG_UNUSED (flags))
5036 struct gcc_options func_options;
5037 tree new_target, new_optimize;
5038 bool ret = true;
5040 /* attribute((target("default"))) does nothing, beyond
5041 affecting multi-versioning. */
5042 if (TREE_VALUE (args)
5043 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
5044 && TREE_CHAIN (args) == NULL_TREE
5045 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
5046 return true;
5048 tree old_optimize = build_optimization_node (&global_options);
5050 /* Get the optimization options of the current function. */
5051 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
5053 if (!func_optimize)
5054 func_optimize = old_optimize;
5056 /* Init func_options. */
5057 memset (&func_options, 0, sizeof (func_options));
5058 init_options_struct (&func_options, NULL);
5059 lang_hooks.init_options_struct (&func_options);
5061 cl_optimization_restore (&func_options,
5062 TREE_OPTIMIZATION (func_optimize));
5064 /* Initialize func_options to the default before its target options can
5065 be set. */
5066 cl_target_option_restore (&func_options,
5067 TREE_TARGET_OPTION (target_option_default_node));
5069 new_target = ix86_valid_target_attribute_tree (args, &func_options,
5070 &global_options_set);
5072 new_optimize = build_optimization_node (&func_options);
5074 if (new_target == error_mark_node)
5075 ret = false;
5077 else if (fndecl && new_target)
5079 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
5081 if (old_optimize != new_optimize)
5082 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5085 return ret;
5089 /* Hook to determine if one function can safely inline another. */
5091 static bool
5092 ix86_can_inline_p (tree caller, tree callee)
5094 bool ret = false;
5095 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5096 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5098 /* If callee has no option attributes, then it is ok to inline. */
5099 if (!callee_tree)
5100 ret = true;
5102 /* If caller has no option attributes, but callee does then it is not ok to
5103 inline. */
5104 else if (!caller_tree)
5105 ret = false;
5107 else
5109 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5110 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5112 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
5113 can inline a SSE2 function but a SSE2 function can't inline a SSE4
5114 function. */
5115 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5116 != callee_opts->x_ix86_isa_flags)
5117 ret = false;
5119 /* See if we have the same non-isa options. */
5120 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5121 ret = false;
5123 /* See if arch, tune, etc. are the same. */
5124 else if (caller_opts->arch != callee_opts->arch)
5125 ret = false;
5127 else if (caller_opts->tune != callee_opts->tune)
5128 ret = false;
5130 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
5131 ret = false;
5133 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5134 ret = false;
5136 else
5137 ret = true;
5140 return ret;
5144 /* Remember the last target of ix86_set_current_function. */
5145 static GTY(()) tree ix86_previous_fndecl;
5147 /* Set targets globals to the default (or current #pragma GCC target
5148 if active). Invalidate ix86_previous_fndecl cache. */
5150 void
5151 ix86_reset_previous_fndecl (void)
5153 tree new_tree = target_option_current_node;
5154 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5155 if (TREE_TARGET_GLOBALS (new_tree))
5156 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5157 else if (new_tree == target_option_default_node)
5158 restore_target_globals (&default_target_globals);
5159 else
5160 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5161 ix86_previous_fndecl = NULL_TREE;
5164 /* Establish appropriate back-end context for processing the function
5165 FNDECL. The argument might be NULL to indicate processing at top
5166 level, outside of any function scope. */
5167 static void
5168 ix86_set_current_function (tree fndecl)
5170 /* Only change the context if the function changes. This hook is called
5171 several times in the course of compiling a function, and we don't want to
5172 slow things down too much or call target_reinit when it isn't safe. */
5173 if (fndecl == ix86_previous_fndecl)
5174 return;
5176 tree old_tree;
5177 if (ix86_previous_fndecl == NULL_TREE)
5178 old_tree = target_option_current_node;
5179 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
5180 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
5181 else
5182 old_tree = target_option_default_node;
5184 if (fndecl == NULL_TREE)
5186 if (old_tree != target_option_current_node)
5187 ix86_reset_previous_fndecl ();
5188 return;
5191 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
5192 if (new_tree == NULL_TREE)
5193 new_tree = target_option_default_node;
5195 if (old_tree != new_tree)
5197 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5198 if (TREE_TARGET_GLOBALS (new_tree))
5199 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5200 else if (new_tree == target_option_default_node)
5201 restore_target_globals (&default_target_globals);
5202 else
5203 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5205 ix86_previous_fndecl = fndecl;
5209 /* Return true if this goes in large data/bss. */
5211 static bool
5212 ix86_in_large_data_p (tree exp)
5214 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5215 return false;
5217 /* Functions are never large data. */
5218 if (TREE_CODE (exp) == FUNCTION_DECL)
5219 return false;
5221 /* Automatic variables are never large data. */
5222 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5223 return false;
5225 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5227 const char *section = DECL_SECTION_NAME (exp);
5228 if (strcmp (section, ".ldata") == 0
5229 || strcmp (section, ".lbss") == 0)
5230 return true;
5231 return false;
5233 else
5235 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5237 /* If this is an incomplete type with size 0, then we can't put it
5238 in data because it might be too big when completed. Also,
5239 int_size_in_bytes returns -1 if size can vary or is larger than
5240 an integer in which case also it is safer to assume that it goes in
5241 large data. */
5242 if (size <= 0 || size > ix86_section_threshold)
5243 return true;
5246 return false;
5249 /* Switch to the appropriate section for output of DECL.
5250 DECL is either a `VAR_DECL' node or a constant of some sort.
5251 RELOC indicates whether forming the initial value of DECL requires
5252 link-time relocations. */
5254 ATTRIBUTE_UNUSED static section *
5255 x86_64_elf_select_section (tree decl, int reloc,
5256 unsigned HOST_WIDE_INT align)
5258 if (ix86_in_large_data_p (decl))
5260 const char *sname = NULL;
5261 unsigned int flags = SECTION_WRITE;
5262 switch (categorize_decl_for_section (decl, reloc))
5264 case SECCAT_DATA:
5265 sname = ".ldata";
5266 break;
5267 case SECCAT_DATA_REL:
5268 sname = ".ldata.rel";
5269 break;
5270 case SECCAT_DATA_REL_LOCAL:
5271 sname = ".ldata.rel.local";
5272 break;
5273 case SECCAT_DATA_REL_RO:
5274 sname = ".ldata.rel.ro";
5275 break;
5276 case SECCAT_DATA_REL_RO_LOCAL:
5277 sname = ".ldata.rel.ro.local";
5278 break;
5279 case SECCAT_BSS:
5280 sname = ".lbss";
5281 flags |= SECTION_BSS;
5282 break;
5283 case SECCAT_RODATA:
5284 case SECCAT_RODATA_MERGE_STR:
5285 case SECCAT_RODATA_MERGE_STR_INIT:
5286 case SECCAT_RODATA_MERGE_CONST:
5287 sname = ".lrodata";
5288 flags = 0;
5289 break;
5290 case SECCAT_SRODATA:
5291 case SECCAT_SDATA:
5292 case SECCAT_SBSS:
5293 gcc_unreachable ();
5294 case SECCAT_TEXT:
5295 case SECCAT_TDATA:
5296 case SECCAT_TBSS:
5297 /* We don't split these for medium model. Place them into
5298 default sections and hope for best. */
5299 break;
5301 if (sname)
5303 /* We might get called with string constants, but get_named_section
5304 doesn't like them as they are not DECLs. Also, we need to set
5305 flags in that case. */
5306 if (!DECL_P (decl))
5307 return get_section (sname, flags, NULL);
5308 return get_named_section (decl, sname, reloc);
5311 return default_elf_select_section (decl, reloc, align);
5314 /* Select a set of attributes for section NAME based on the properties
5315 of DECL and whether or not RELOC indicates that DECL's initializer
5316 might contain runtime relocations. */
5318 static unsigned int ATTRIBUTE_UNUSED
5319 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5321 unsigned int flags = default_section_type_flags (decl, name, reloc);
5323 if (decl == NULL_TREE
5324 && (strcmp (name, ".ldata.rel.ro") == 0
5325 || strcmp (name, ".ldata.rel.ro.local") == 0))
5326 flags |= SECTION_RELRO;
5328 if (strcmp (name, ".lbss") == 0
5329 || strncmp (name, ".lbss.", 5) == 0
5330 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5331 flags |= SECTION_BSS;
5333 return flags;
5336 /* Build up a unique section name, expressed as a
5337 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5338 RELOC indicates whether the initial value of EXP requires
5339 link-time relocations. */
5341 static void ATTRIBUTE_UNUSED
5342 x86_64_elf_unique_section (tree decl, int reloc)
5344 if (ix86_in_large_data_p (decl))
5346 const char *prefix = NULL;
5347 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5348 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5350 switch (categorize_decl_for_section (decl, reloc))
5352 case SECCAT_DATA:
5353 case SECCAT_DATA_REL:
5354 case SECCAT_DATA_REL_LOCAL:
5355 case SECCAT_DATA_REL_RO:
5356 case SECCAT_DATA_REL_RO_LOCAL:
5357 prefix = one_only ? ".ld" : ".ldata";
5358 break;
5359 case SECCAT_BSS:
5360 prefix = one_only ? ".lb" : ".lbss";
5361 break;
5362 case SECCAT_RODATA:
5363 case SECCAT_RODATA_MERGE_STR:
5364 case SECCAT_RODATA_MERGE_STR_INIT:
5365 case SECCAT_RODATA_MERGE_CONST:
5366 prefix = one_only ? ".lr" : ".lrodata";
5367 break;
5368 case SECCAT_SRODATA:
5369 case SECCAT_SDATA:
5370 case SECCAT_SBSS:
5371 gcc_unreachable ();
5372 case SECCAT_TEXT:
5373 case SECCAT_TDATA:
5374 case SECCAT_TBSS:
5375 /* We don't split these for medium model. Place them into
5376 default sections and hope for best. */
5377 break;
5379 if (prefix)
5381 const char *name, *linkonce;
5382 char *string;
5384 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5385 name = targetm.strip_name_encoding (name);
5387 /* If we're using one_only, then there needs to be a .gnu.linkonce
5388 prefix to the section name. */
5389 linkonce = one_only ? ".gnu.linkonce" : "";
5391 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5393 set_decl_section_name (decl, string);
5394 return;
5397 default_unique_section (decl, reloc);
5400 #ifdef COMMON_ASM_OP
5401 /* This says how to output assembler code to declare an
5402 uninitialized external linkage data object.
5404 For medium model x86-64 we need to use .largecomm opcode for
5405 large objects. */
5406 void
5407 x86_elf_aligned_common (FILE *file,
5408 const char *name, unsigned HOST_WIDE_INT size,
5409 int align)
5411 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5412 && size > (unsigned int)ix86_section_threshold)
5413 fputs ("\t.largecomm\t", file);
5414 else
5415 fputs (COMMON_ASM_OP, file);
5416 assemble_name (file, name);
5417 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5418 size, align / BITS_PER_UNIT);
5420 #endif
5422 /* Utility function for targets to use in implementing
5423 ASM_OUTPUT_ALIGNED_BSS. */
5425 void
5426 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5427 unsigned HOST_WIDE_INT size, int align)
5429 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5430 && size > (unsigned int)ix86_section_threshold)
5431 switch_to_section (get_named_section (decl, ".lbss", 0));
5432 else
5433 switch_to_section (bss_section);
5434 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5435 #ifdef ASM_DECLARE_OBJECT_NAME
5436 last_assemble_variable_decl = decl;
5437 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5438 #else
5439 /* Standard thing is just output label for the object. */
5440 ASM_OUTPUT_LABEL (file, name);
5441 #endif /* ASM_DECLARE_OBJECT_NAME */
5442 ASM_OUTPUT_SKIP (file, size ? size : 1);
5445 /* Decide whether we must probe the stack before any space allocation
5446 on this target. It's essentially TARGET_STACK_PROBE except when
5447 -fstack-check causes the stack to be already probed differently. */
5449 bool
5450 ix86_target_stack_probe (void)
5452 /* Do not probe the stack twice if static stack checking is enabled. */
5453 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5454 return false;
5456 return TARGET_STACK_PROBE;
5459 /* Decide whether we can make a sibling call to a function. DECL is the
5460 declaration of the function being targeted by the call and EXP is the
5461 CALL_EXPR representing the call. */
5463 static bool
5464 ix86_function_ok_for_sibcall (tree decl, tree exp)
5466 tree type, decl_or_type;
5467 rtx a, b;
5469 /* If we are generating position-independent code, we cannot sibcall
5470 optimize direct calls to global functions, as the PLT requires
5471 %ebx be live. (Darwin does not have a PLT.) */
5472 if (!TARGET_MACHO
5473 && !TARGET_64BIT
5474 && flag_pic
5475 && flag_plt
5476 && decl && !targetm.binds_local_p (decl))
5477 return false;
5479 /* If we need to align the outgoing stack, then sibcalling would
5480 unalign the stack, which may break the called function. */
5481 if (ix86_minimum_incoming_stack_boundary (true)
5482 < PREFERRED_STACK_BOUNDARY)
5483 return false;
5485 if (decl)
5487 decl_or_type = decl;
5488 type = TREE_TYPE (decl);
5490 else
5492 /* We're looking at the CALL_EXPR, we need the type of the function. */
5493 type = CALL_EXPR_FN (exp); /* pointer expression */
5494 type = TREE_TYPE (type); /* pointer type */
5495 type = TREE_TYPE (type); /* function type */
5496 decl_or_type = type;
5499 /* Check that the return value locations are the same. Like
5500 if we are returning floats on the 80387 register stack, we cannot
5501 make a sibcall from a function that doesn't return a float to a
5502 function that does or, conversely, from a function that does return
5503 a float to a function that doesn't; the necessary stack adjustment
5504 would not be executed. This is also the place we notice
5505 differences in the return value ABI. Note that it is ok for one
5506 of the functions to have void return type as long as the return
5507 value of the other is passed in a register. */
5508 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5509 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5510 cfun->decl, false);
5511 if (STACK_REG_P (a) || STACK_REG_P (b))
5513 if (!rtx_equal_p (a, b))
5514 return false;
5516 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5518 else if (!rtx_equal_p (a, b))
5519 return false;
5521 if (TARGET_64BIT)
5523 /* The SYSV ABI has more call-clobbered registers;
5524 disallow sibcalls from MS to SYSV. */
5525 if (cfun->machine->call_abi == MS_ABI
5526 && ix86_function_type_abi (type) == SYSV_ABI)
5527 return false;
5529 else
5531 /* If this call is indirect, we'll need to be able to use a
5532 call-clobbered register for the address of the target function.
5533 Make sure that all such registers are not used for passing
5534 parameters. Note that DLLIMPORT functions are indirect. */
5535 if (!decl
5536 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5538 if (ix86_function_regparm (type, NULL) >= 3)
5540 /* ??? Need to count the actual number of registers to be used,
5541 not the possible number of registers. Fix later. */
5542 return false;
5547 /* Otherwise okay. That also includes certain types of indirect calls. */
5548 return true;
5551 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5552 and "sseregparm" calling convention attributes;
5553 arguments as in struct attribute_spec.handler. */
5555 static tree
5556 ix86_handle_cconv_attribute (tree *node, tree name,
5557 tree args,
5558 int,
5559 bool *no_add_attrs)
5561 if (TREE_CODE (*node) != FUNCTION_TYPE
5562 && TREE_CODE (*node) != METHOD_TYPE
5563 && TREE_CODE (*node) != FIELD_DECL
5564 && TREE_CODE (*node) != TYPE_DECL)
5566 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5567 name);
5568 *no_add_attrs = true;
5569 return NULL_TREE;
5572 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5573 if (is_attribute_p ("regparm", name))
5575 tree cst;
5577 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5579 error ("fastcall and regparm attributes are not compatible");
5582 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5584 error ("regparam and thiscall attributes are not compatible");
5587 cst = TREE_VALUE (args);
5588 if (TREE_CODE (cst) != INTEGER_CST)
5590 warning (OPT_Wattributes,
5591 "%qE attribute requires an integer constant argument",
5592 name);
5593 *no_add_attrs = true;
5595 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5597 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5598 name, REGPARM_MAX);
5599 *no_add_attrs = true;
5602 return NULL_TREE;
5605 if (TARGET_64BIT)
5607 /* Do not warn when emulating the MS ABI. */
5608 if ((TREE_CODE (*node) != FUNCTION_TYPE
5609 && TREE_CODE (*node) != METHOD_TYPE)
5610 || ix86_function_type_abi (*node) != MS_ABI)
5611 warning (OPT_Wattributes, "%qE attribute ignored",
5612 name);
5613 *no_add_attrs = true;
5614 return NULL_TREE;
5617 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5618 if (is_attribute_p ("fastcall", name))
5620 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5622 error ("fastcall and cdecl attributes are not compatible");
5624 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5626 error ("fastcall and stdcall attributes are not compatible");
5628 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5630 error ("fastcall and regparm attributes are not compatible");
5632 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5634 error ("fastcall and thiscall attributes are not compatible");
5638 /* Can combine stdcall with fastcall (redundant), regparm and
5639 sseregparm. */
5640 else if (is_attribute_p ("stdcall", name))
5642 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5644 error ("stdcall and cdecl attributes are not compatible");
5646 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5648 error ("stdcall and fastcall attributes are not compatible");
5650 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5652 error ("stdcall and thiscall attributes are not compatible");
5656 /* Can combine cdecl with regparm and sseregparm. */
5657 else if (is_attribute_p ("cdecl", name))
5659 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5661 error ("stdcall and cdecl attributes are not compatible");
5663 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5665 error ("fastcall and cdecl attributes are not compatible");
5667 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5669 error ("cdecl and thiscall attributes are not compatible");
5672 else if (is_attribute_p ("thiscall", name))
5674 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5675 warning (OPT_Wattributes, "%qE attribute is used for non-class method",
5676 name);
5677 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5679 error ("stdcall and thiscall attributes are not compatible");
5681 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5683 error ("fastcall and thiscall attributes are not compatible");
5685 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5687 error ("cdecl and thiscall attributes are not compatible");
5691 /* Can combine sseregparm with all attributes. */
5693 return NULL_TREE;
5696 /* The transactional memory builtins are implicitly regparm or fastcall
5697 depending on the ABI. Override the generic do-nothing attribute that
5698 these builtins were declared with, and replace it with one of the two
5699 attributes that we expect elsewhere. */
5701 static tree
5702 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5703 int flags, bool *no_add_attrs)
5705 tree alt;
5707 /* In no case do we want to add the placeholder attribute. */
5708 *no_add_attrs = true;
5710 /* The 64-bit ABI is unchanged for transactional memory. */
5711 if (TARGET_64BIT)
5712 return NULL_TREE;
5714 /* ??? Is there a better way to validate 32-bit windows? We have
5715 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5716 if (CHECK_STACK_LIMIT > 0)
5717 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5718 else
5720 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5721 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5723 decl_attributes (node, alt, flags);
5725 return NULL_TREE;
5728 /* This function determines from TYPE the calling-convention. */
5730 unsigned int
5731 ix86_get_callcvt (const_tree type)
5733 unsigned int ret = 0;
5734 bool is_stdarg;
5735 tree attrs;
5737 if (TARGET_64BIT)
5738 return IX86_CALLCVT_CDECL;
5740 attrs = TYPE_ATTRIBUTES (type);
5741 if (attrs != NULL_TREE)
5743 if (lookup_attribute ("cdecl", attrs))
5744 ret |= IX86_CALLCVT_CDECL;
5745 else if (lookup_attribute ("stdcall", attrs))
5746 ret |= IX86_CALLCVT_STDCALL;
5747 else if (lookup_attribute ("fastcall", attrs))
5748 ret |= IX86_CALLCVT_FASTCALL;
5749 else if (lookup_attribute ("thiscall", attrs))
5750 ret |= IX86_CALLCVT_THISCALL;
5752 /* Regparam isn't allowed for thiscall and fastcall. */
5753 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5755 if (lookup_attribute ("regparm", attrs))
5756 ret |= IX86_CALLCVT_REGPARM;
5757 if (lookup_attribute ("sseregparm", attrs))
5758 ret |= IX86_CALLCVT_SSEREGPARM;
5761 if (IX86_BASE_CALLCVT(ret) != 0)
5762 return ret;
5765 is_stdarg = stdarg_p (type);
5766 if (TARGET_RTD && !is_stdarg)
5767 return IX86_CALLCVT_STDCALL | ret;
5769 if (ret != 0
5770 || is_stdarg
5771 || TREE_CODE (type) != METHOD_TYPE
5772 || ix86_function_type_abi (type) != MS_ABI)
5773 return IX86_CALLCVT_CDECL | ret;
5775 return IX86_CALLCVT_THISCALL;
5778 /* Return 0 if the attributes for two types are incompatible, 1 if they
5779 are compatible, and 2 if they are nearly compatible (which causes a
5780 warning to be generated). */
5782 static int
5783 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5785 unsigned int ccvt1, ccvt2;
5787 if (TREE_CODE (type1) != FUNCTION_TYPE
5788 && TREE_CODE (type1) != METHOD_TYPE)
5789 return 1;
5791 ccvt1 = ix86_get_callcvt (type1);
5792 ccvt2 = ix86_get_callcvt (type2);
5793 if (ccvt1 != ccvt2)
5794 return 0;
5795 if (ix86_function_regparm (type1, NULL)
5796 != ix86_function_regparm (type2, NULL))
5797 return 0;
5799 return 1;
5802 /* Return the regparm value for a function with the indicated TYPE and DECL.
5803 DECL may be NULL when calling function indirectly
5804 or considering a libcall. */
5806 static int
5807 ix86_function_regparm (const_tree type, const_tree decl)
5809 tree attr;
5810 int regparm;
5811 unsigned int ccvt;
5813 if (TARGET_64BIT)
5814 return (ix86_function_type_abi (type) == SYSV_ABI
5815 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5816 ccvt = ix86_get_callcvt (type);
5817 regparm = ix86_regparm;
5819 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5821 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5822 if (attr)
5824 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5825 return regparm;
5828 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5829 return 2;
5830 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5831 return 1;
5833 /* Use register calling convention for local functions when possible. */
5834 if (decl
5835 && TREE_CODE (decl) == FUNCTION_DECL)
5837 cgraph_node *target = cgraph_node::get (decl);
5838 if (target)
5839 target = target->function_symbol ();
5841 /* Caller and callee must agree on the calling convention, so
5842 checking here just optimize means that with
5843 __attribute__((optimize (...))) caller could use regparm convention
5844 and callee not, or vice versa. Instead look at whether the callee
5845 is optimized or not. */
5846 if (target && opt_for_fn (target->decl, optimize)
5847 && !(profile_flag && !flag_fentry))
5849 cgraph_local_info *i = &target->local;
5850 if (i && i->local && i->can_change_signature)
5852 int local_regparm, globals = 0, regno;
5854 /* Make sure no regparm register is taken by a
5855 fixed register variable. */
5856 for (local_regparm = 0; local_regparm < REGPARM_MAX;
5857 local_regparm++)
5858 if (fixed_regs[local_regparm])
5859 break;
5861 /* We don't want to use regparm(3) for nested functions as
5862 these use a static chain pointer in the third argument. */
5863 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
5864 local_regparm = 2;
5866 /* Save a register for the split stack. */
5867 if (local_regparm == 3 && flag_split_stack)
5868 local_regparm = 2;
5870 /* Each fixed register usage increases register pressure,
5871 so less registers should be used for argument passing.
5872 This functionality can be overriden by an explicit
5873 regparm value. */
5874 for (regno = AX_REG; regno <= DI_REG; regno++)
5875 if (fixed_regs[regno])
5876 globals++;
5878 local_regparm
5879 = globals < local_regparm ? local_regparm - globals : 0;
5881 if (local_regparm > regparm)
5882 regparm = local_regparm;
5887 return regparm;
5890 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5891 DFmode (2) arguments in SSE registers for a function with the
5892 indicated TYPE and DECL. DECL may be NULL when calling function
5893 indirectly or considering a libcall. Return -1 if any FP parameter
5894 should be rejected by error. This is used in siutation we imply SSE
5895 calling convetion but the function is called from another function with
5896 SSE disabled. Otherwise return 0. */
5898 static int
5899 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5901 gcc_assert (!TARGET_64BIT);
5903 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5904 by the sseregparm attribute. */
5905 if (TARGET_SSEREGPARM
5906 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5908 if (!TARGET_SSE)
5910 if (warn)
5912 if (decl)
5913 error ("calling %qD with attribute sseregparm without "
5914 "SSE/SSE2 enabled", decl);
5915 else
5916 error ("calling %qT with attribute sseregparm without "
5917 "SSE/SSE2 enabled", type);
5919 return 0;
5922 return 2;
5925 if (!decl)
5926 return 0;
5928 cgraph_node *target = cgraph_node::get (decl);
5929 if (target)
5930 target = target->function_symbol ();
5932 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5933 (and DFmode for SSE2) arguments in SSE registers. */
5934 if (target
5935 /* TARGET_SSE_MATH */
5936 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
5937 && opt_for_fn (target->decl, optimize)
5938 && !(profile_flag && !flag_fentry))
5940 cgraph_local_info *i = &target->local;
5941 if (i && i->local && i->can_change_signature)
5943 /* Refuse to produce wrong code when local function with SSE enabled
5944 is called from SSE disabled function.
5945 FIXME: We need a way to detect these cases cross-ltrans partition
5946 and avoid using SSE calling conventions on local functions called
5947 from function with SSE disabled. For now at least delay the
5948 warning until we know we are going to produce wrong code.
5949 See PR66047 */
5950 if (!TARGET_SSE && warn)
5951 return -1;
5952 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
5953 ->x_ix86_isa_flags) ? 2 : 1;
5957 return 0;
5960 /* Return true if EAX is live at the start of the function. Used by
5961 ix86_expand_prologue to determine if we need special help before
5962 calling allocate_stack_worker. */
5964 static bool
5965 ix86_eax_live_at_start_p (void)
5967 /* Cheat. Don't bother working forward from ix86_function_regparm
5968 to the function type to whether an actual argument is located in
5969 eax. Instead just look at cfg info, which is still close enough
5970 to correct at this point. This gives false positives for broken
5971 functions that might use uninitialized data that happens to be
5972 allocated in eax, but who cares? */
5973 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5976 static bool
5977 ix86_keep_aggregate_return_pointer (tree fntype)
5979 tree attr;
5981 if (!TARGET_64BIT)
5983 attr = lookup_attribute ("callee_pop_aggregate_return",
5984 TYPE_ATTRIBUTES (fntype));
5985 if (attr)
5986 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5988 /* For 32-bit MS-ABI the default is to keep aggregate
5989 return pointer. */
5990 if (ix86_function_type_abi (fntype) == MS_ABI)
5991 return true;
5993 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5996 /* Value is the number of bytes of arguments automatically
5997 popped when returning from a subroutine call.
5998 FUNDECL is the declaration node of the function (as a tree),
5999 FUNTYPE is the data type of the function (as a tree),
6000 or for a library call it is an identifier node for the subroutine name.
6001 SIZE is the number of bytes of arguments passed on the stack.
6003 On the 80386, the RTD insn may be used to pop them if the number
6004 of args is fixed, but if the number is variable then the caller
6005 must pop them all. RTD can't be used for library calls now
6006 because the library is compiled with the Unix compiler.
6007 Use of RTD is a selectable option, since it is incompatible with
6008 standard Unix calling sequences. If the option is not selected,
6009 the caller must always pop the args.
6011 The attribute stdcall is equivalent to RTD on a per module basis. */
6013 static int
6014 ix86_return_pops_args (tree fundecl, tree funtype, int size)
6016 unsigned int ccvt;
6018 /* None of the 64-bit ABIs pop arguments. */
6019 if (TARGET_64BIT)
6020 return 0;
6022 ccvt = ix86_get_callcvt (funtype);
6024 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
6025 | IX86_CALLCVT_THISCALL)) != 0
6026 && ! stdarg_p (funtype))
6027 return size;
6029 /* Lose any fake structure return argument if it is passed on the stack. */
6030 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
6031 && !ix86_keep_aggregate_return_pointer (funtype))
6033 int nregs = ix86_function_regparm (funtype, fundecl);
6034 if (nregs == 0)
6035 return GET_MODE_SIZE (Pmode);
6038 return 0;
6041 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
6043 static bool
6044 ix86_legitimate_combined_insn (rtx_insn *insn)
6046 /* Check operand constraints in case hard registers were propagated
6047 into insn pattern. This check prevents combine pass from
6048 generating insn patterns with invalid hard register operands.
6049 These invalid insns can eventually confuse reload to error out
6050 with a spill failure. See also PRs 46829 and 46843. */
6051 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
6053 int i;
6055 extract_insn (insn);
6056 preprocess_constraints (insn);
6058 int n_operands = recog_data.n_operands;
6059 int n_alternatives = recog_data.n_alternatives;
6060 for (i = 0; i < n_operands; i++)
6062 rtx op = recog_data.operand[i];
6063 machine_mode mode = GET_MODE (op);
6064 const operand_alternative *op_alt;
6065 int offset = 0;
6066 bool win;
6067 int j;
6069 /* For pre-AVX disallow unaligned loads/stores where the
6070 instructions don't support it. */
6071 if (!TARGET_AVX
6072 && VECTOR_MODE_P (GET_MODE (op))
6073 && misaligned_operand (op, GET_MODE (op)))
6075 int min_align = get_attr_ssememalign (insn);
6076 if (min_align == 0)
6077 return false;
6080 /* A unary operator may be accepted by the predicate, but it
6081 is irrelevant for matching constraints. */
6082 if (UNARY_P (op))
6083 op = XEXP (op, 0);
6085 if (GET_CODE (op) == SUBREG)
6087 if (REG_P (SUBREG_REG (op))
6088 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
6089 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
6090 GET_MODE (SUBREG_REG (op)),
6091 SUBREG_BYTE (op),
6092 GET_MODE (op));
6093 op = SUBREG_REG (op);
6096 if (!(REG_P (op) && HARD_REGISTER_P (op)))
6097 continue;
6099 op_alt = recog_op_alt;
6101 /* Operand has no constraints, anything is OK. */
6102 win = !n_alternatives;
6104 alternative_mask preferred = get_preferred_alternatives (insn);
6105 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6107 if (!TEST_BIT (preferred, j))
6108 continue;
6109 if (op_alt[i].anything_ok
6110 || (op_alt[i].matches != -1
6111 && operands_match_p
6112 (recog_data.operand[i],
6113 recog_data.operand[op_alt[i].matches]))
6114 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6116 win = true;
6117 break;
6121 if (!win)
6122 return false;
6126 return true;
6129 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6131 static unsigned HOST_WIDE_INT
6132 ix86_asan_shadow_offset (void)
6134 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6135 : HOST_WIDE_INT_C (0x7fff8000))
6136 : (HOST_WIDE_INT_1 << 29);
6139 /* Argument support functions. */
6141 /* Return true when register may be used to pass function parameters. */
6142 bool
6143 ix86_function_arg_regno_p (int regno)
6145 int i;
6146 enum calling_abi call_abi;
6147 const int *parm_regs;
6149 if (TARGET_MPX && BND_REGNO_P (regno))
6150 return true;
6152 if (!TARGET_64BIT)
6154 if (TARGET_MACHO)
6155 return (regno < REGPARM_MAX
6156 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6157 else
6158 return (regno < REGPARM_MAX
6159 || (TARGET_MMX && MMX_REGNO_P (regno)
6160 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6161 || (TARGET_SSE && SSE_REGNO_P (regno)
6162 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6165 if (TARGET_SSE && SSE_REGNO_P (regno)
6166 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6167 return true;
6169 /* TODO: The function should depend on current function ABI but
6170 builtins.c would need updating then. Therefore we use the
6171 default ABI. */
6172 call_abi = ix86_cfun_abi ();
6174 /* RAX is used as hidden argument to va_arg functions. */
6175 if (call_abi == SYSV_ABI && regno == AX_REG)
6176 return true;
6178 if (call_abi == MS_ABI)
6179 parm_regs = x86_64_ms_abi_int_parameter_registers;
6180 else
6181 parm_regs = x86_64_int_parameter_registers;
6183 for (i = 0; i < (call_abi == MS_ABI
6184 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6185 if (regno == parm_regs[i])
6186 return true;
6187 return false;
6190 /* Return if we do not know how to pass TYPE solely in registers. */
6192 static bool
6193 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6195 if (must_pass_in_stack_var_size_or_pad (mode, type))
6196 return true;
6198 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6199 The layout_type routine is crafty and tries to trick us into passing
6200 currently unsupported vector types on the stack by using TImode. */
6201 return (!TARGET_64BIT && mode == TImode
6202 && type && TREE_CODE (type) != VECTOR_TYPE);
6205 /* It returns the size, in bytes, of the area reserved for arguments passed
6206 in registers for the function represented by fndecl dependent to the used
6207 abi format. */
6209 ix86_reg_parm_stack_space (const_tree fndecl)
6211 enum calling_abi call_abi = SYSV_ABI;
6212 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6213 call_abi = ix86_function_abi (fndecl);
6214 else
6215 call_abi = ix86_function_type_abi (fndecl);
6216 if (TARGET_64BIT && call_abi == MS_ABI)
6217 return 32;
6218 return 0;
6221 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6222 call abi used. */
6223 enum calling_abi
6224 ix86_function_type_abi (const_tree fntype)
6226 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6228 enum calling_abi abi = ix86_abi;
6229 if (abi == SYSV_ABI)
6231 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6233 if (TARGET_X32)
6235 static bool warned = false;
6236 if (!warned)
6238 error ("X32 does not support ms_abi attribute");
6239 warned = true;
6242 abi = MS_ABI;
6245 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6246 abi = SYSV_ABI;
6247 return abi;
6249 return ix86_abi;
6252 /* We add this as a workaround in order to use libc_has_function
6253 hook in i386.md. */
6254 bool
6255 ix86_libc_has_function (enum function_class fn_class)
6257 return targetm.libc_has_function (fn_class);
6260 static bool
6261 ix86_function_ms_hook_prologue (const_tree fn)
6263 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6265 if (decl_function_context (fn) != NULL_TREE)
6266 error_at (DECL_SOURCE_LOCATION (fn),
6267 "ms_hook_prologue is not compatible with nested function");
6268 else
6269 return true;
6271 return false;
6274 static enum calling_abi
6275 ix86_function_abi (const_tree fndecl)
6277 if (! fndecl)
6278 return ix86_abi;
6279 return ix86_function_type_abi (TREE_TYPE (fndecl));
6282 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6283 call abi used. */
6284 enum calling_abi
6285 ix86_cfun_abi (void)
6287 if (! cfun)
6288 return ix86_abi;
6289 return cfun->machine->call_abi;
6292 /* Write the extra assembler code needed to declare a function properly. */
6294 void
6295 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6296 tree decl)
6298 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6300 if (is_ms_hook)
6302 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6303 unsigned int filler_cc = 0xcccccccc;
6305 for (i = 0; i < filler_count; i += 4)
6306 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6309 #ifdef SUBTARGET_ASM_UNWIND_INIT
6310 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6311 #endif
6313 ASM_OUTPUT_LABEL (asm_out_file, fname);
6315 /* Output magic byte marker, if hot-patch attribute is set. */
6316 if (is_ms_hook)
6318 if (TARGET_64BIT)
6320 /* leaq [%rsp + 0], %rsp */
6321 asm_fprintf (asm_out_file, ASM_BYTE
6322 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6324 else
6326 /* movl.s %edi, %edi
6327 push %ebp
6328 movl.s %esp, %ebp */
6329 asm_fprintf (asm_out_file, ASM_BYTE
6330 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6335 /* regclass.c */
6336 extern void init_regs (void);
6338 /* Implementation of call abi switching target hook. Specific to FNDECL
6339 the specific call register sets are set. See also
6340 ix86_conditional_register_usage for more details. */
6341 void
6342 ix86_call_abi_override (const_tree fndecl)
6344 if (fndecl == NULL_TREE)
6345 cfun->machine->call_abi = ix86_abi;
6346 else
6347 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6350 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6351 expensive re-initialization of init_regs each time we switch function context
6352 since this is needed only during RTL expansion. */
6353 static void
6354 ix86_maybe_switch_abi (void)
6356 if (TARGET_64BIT &&
6357 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6358 reinit_regs ();
6361 /* Return 1 if pseudo register should be created and used to hold
6362 GOT address for PIC code. */
6363 bool
6364 ix86_use_pseudo_pic_reg (void)
6366 if ((TARGET_64BIT
6367 && (ix86_cmodel == CM_SMALL_PIC
6368 || TARGET_PECOFF))
6369 || !flag_pic)
6370 return false;
6371 return true;
6374 /* Initialize large model PIC register. */
6376 static void
6377 ix86_init_large_pic_reg (unsigned int tmp_regno)
6379 rtx_code_label *label;
6380 rtx tmp_reg;
6382 gcc_assert (Pmode == DImode);
6383 label = gen_label_rtx ();
6384 emit_label (label);
6385 LABEL_PRESERVE_P (label) = 1;
6386 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
6387 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
6388 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6389 label));
6390 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6391 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6392 pic_offset_table_rtx, tmp_reg));
6395 /* Create and initialize PIC register if required. */
6396 static void
6397 ix86_init_pic_reg (void)
6399 edge entry_edge;
6400 rtx_insn *seq;
6402 if (!ix86_use_pseudo_pic_reg ())
6403 return;
6405 start_sequence ();
6407 if (TARGET_64BIT)
6409 if (ix86_cmodel == CM_LARGE_PIC)
6410 ix86_init_large_pic_reg (R11_REG);
6411 else
6412 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6414 else
6416 /* If there is future mcount call in the function it is more profitable
6417 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
6418 rtx reg = crtl->profile
6419 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
6420 : pic_offset_table_rtx;
6421 rtx_insn *insn = emit_insn (gen_set_got (reg));
6422 RTX_FRAME_RELATED_P (insn) = 1;
6423 if (crtl->profile)
6424 emit_move_insn (pic_offset_table_rtx, reg);
6425 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6428 seq = get_insns ();
6429 end_sequence ();
6431 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6432 insert_insn_on_edge (seq, entry_edge);
6433 commit_one_edge_insertion (entry_edge);
6436 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6437 for a call to a function whose data type is FNTYPE.
6438 For a library call, FNTYPE is 0. */
6440 void
6441 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6442 tree fntype, /* tree ptr for function decl */
6443 rtx libname, /* SYMBOL_REF of library name or 0 */
6444 tree fndecl,
6445 int caller)
6447 struct cgraph_local_info *i = NULL;
6448 struct cgraph_node *target = NULL;
6450 memset (cum, 0, sizeof (*cum));
6452 if (fndecl)
6454 target = cgraph_node::get (fndecl);
6455 if (target)
6457 target = target->function_symbol ();
6458 i = cgraph_node::local_info (target->decl);
6459 cum->call_abi = ix86_function_abi (target->decl);
6461 else
6462 cum->call_abi = ix86_function_abi (fndecl);
6464 else
6465 cum->call_abi = ix86_function_type_abi (fntype);
6467 cum->caller = caller;
6469 /* Set up the number of registers to use for passing arguments. */
6470 cum->nregs = ix86_regparm;
6471 if (TARGET_64BIT)
6473 cum->nregs = (cum->call_abi == SYSV_ABI
6474 ? X86_64_REGPARM_MAX
6475 : X86_64_MS_REGPARM_MAX);
6477 if (TARGET_SSE)
6479 cum->sse_nregs = SSE_REGPARM_MAX;
6480 if (TARGET_64BIT)
6482 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6483 ? X86_64_SSE_REGPARM_MAX
6484 : X86_64_MS_SSE_REGPARM_MAX);
6487 if (TARGET_MMX)
6488 cum->mmx_nregs = MMX_REGPARM_MAX;
6489 cum->warn_avx512f = true;
6490 cum->warn_avx = true;
6491 cum->warn_sse = true;
6492 cum->warn_mmx = true;
6494 /* Because type might mismatch in between caller and callee, we need to
6495 use actual type of function for local calls.
6496 FIXME: cgraph_analyze can be told to actually record if function uses
6497 va_start so for local functions maybe_vaarg can be made aggressive
6498 helping K&R code.
6499 FIXME: once typesytem is fixed, we won't need this code anymore. */
6500 if (i && i->local && i->can_change_signature)
6501 fntype = TREE_TYPE (target->decl);
6502 cum->stdarg = stdarg_p (fntype);
6503 cum->maybe_vaarg = (fntype
6504 ? (!prototype_p (fntype) || stdarg_p (fntype))
6505 : !libname);
6507 cum->bnd_regno = FIRST_BND_REG;
6508 cum->bnds_in_bt = 0;
6509 cum->force_bnd_pass = 0;
6510 cum->decl = fndecl;
6512 if (!TARGET_64BIT)
6514 /* If there are variable arguments, then we won't pass anything
6515 in registers in 32-bit mode. */
6516 if (stdarg_p (fntype))
6518 cum->nregs = 0;
6519 cum->sse_nregs = 0;
6520 cum->mmx_nregs = 0;
6521 cum->warn_avx512f = false;
6522 cum->warn_avx = false;
6523 cum->warn_sse = false;
6524 cum->warn_mmx = false;
6525 return;
6528 /* Use ecx and edx registers if function has fastcall attribute,
6529 else look for regparm information. */
6530 if (fntype)
6532 unsigned int ccvt = ix86_get_callcvt (fntype);
6533 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6535 cum->nregs = 1;
6536 cum->fastcall = 1; /* Same first register as in fastcall. */
6538 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6540 cum->nregs = 2;
6541 cum->fastcall = 1;
6543 else
6544 cum->nregs = ix86_function_regparm (fntype, fndecl);
6547 /* Set up the number of SSE registers used for passing SFmode
6548 and DFmode arguments. Warn for mismatching ABI. */
6549 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6553 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6554 But in the case of vector types, it is some vector mode.
6556 When we have only some of our vector isa extensions enabled, then there
6557 are some modes for which vector_mode_supported_p is false. For these
6558 modes, the generic vector support in gcc will choose some non-vector mode
6559 in order to implement the type. By computing the natural mode, we'll
6560 select the proper ABI location for the operand and not depend on whatever
6561 the middle-end decides to do with these vector types.
6563 The midde-end can't deal with the vector types > 16 bytes. In this
6564 case, we return the original mode and warn ABI change if CUM isn't
6565 NULL.
6567 If INT_RETURN is true, warn ABI change if the vector mode isn't
6568 available for function return value. */
6570 static machine_mode
6571 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6572 bool in_return)
6574 machine_mode mode = TYPE_MODE (type);
6576 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6578 HOST_WIDE_INT size = int_size_in_bytes (type);
6579 if ((size == 8 || size == 16 || size == 32 || size == 64)
6580 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6581 && TYPE_VECTOR_SUBPARTS (type) > 1)
6583 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6585 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6586 mode = MIN_MODE_VECTOR_FLOAT;
6587 else
6588 mode = MIN_MODE_VECTOR_INT;
6590 /* Get the mode which has this inner mode and number of units. */
6591 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6592 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6593 && GET_MODE_INNER (mode) == innermode)
6595 if (size == 64 && !TARGET_AVX512F)
6597 static bool warnedavx512f;
6598 static bool warnedavx512f_ret;
6600 if (cum && cum->warn_avx512f && !warnedavx512f)
6602 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6603 "without AVX512F enabled changes the ABI"))
6604 warnedavx512f = true;
6606 else if (in_return && !warnedavx512f_ret)
6608 if (warning (OPT_Wpsabi, "AVX512F vector return "
6609 "without AVX512F enabled changes the ABI"))
6610 warnedavx512f_ret = true;
6613 return TYPE_MODE (type);
6615 else if (size == 32 && !TARGET_AVX)
6617 static bool warnedavx;
6618 static bool warnedavx_ret;
6620 if (cum && cum->warn_avx && !warnedavx)
6622 if (warning (OPT_Wpsabi, "AVX vector argument "
6623 "without AVX enabled changes the ABI"))
6624 warnedavx = true;
6626 else if (in_return && !warnedavx_ret)
6628 if (warning (OPT_Wpsabi, "AVX vector return "
6629 "without AVX enabled changes the ABI"))
6630 warnedavx_ret = true;
6633 return TYPE_MODE (type);
6635 else if (((size == 8 && TARGET_64BIT) || size == 16)
6636 && !TARGET_SSE)
6638 static bool warnedsse;
6639 static bool warnedsse_ret;
6641 if (cum && cum->warn_sse && !warnedsse)
6643 if (warning (OPT_Wpsabi, "SSE vector argument "
6644 "without SSE enabled changes the ABI"))
6645 warnedsse = true;
6647 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6649 if (warning (OPT_Wpsabi, "SSE vector return "
6650 "without SSE enabled changes the ABI"))
6651 warnedsse_ret = true;
6654 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6656 static bool warnedmmx;
6657 static bool warnedmmx_ret;
6659 if (cum && cum->warn_mmx && !warnedmmx)
6661 if (warning (OPT_Wpsabi, "MMX vector argument "
6662 "without MMX enabled changes the ABI"))
6663 warnedmmx = true;
6665 else if (in_return && !warnedmmx_ret)
6667 if (warning (OPT_Wpsabi, "MMX vector return "
6668 "without MMX enabled changes the ABI"))
6669 warnedmmx_ret = true;
6672 return mode;
6675 gcc_unreachable ();
6679 return mode;
6682 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6683 this may not agree with the mode that the type system has chosen for the
6684 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6685 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6687 static rtx
6688 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
6689 unsigned int regno)
6691 rtx tmp;
6693 if (orig_mode != BLKmode)
6694 tmp = gen_rtx_REG (orig_mode, regno);
6695 else
6697 tmp = gen_rtx_REG (mode, regno);
6698 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6699 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6702 return tmp;
6705 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6706 of this code is to classify each 8bytes of incoming argument by the register
6707 class and assign registers accordingly. */
6709 /* Return the union class of CLASS1 and CLASS2.
6710 See the x86-64 PS ABI for details. */
6712 static enum x86_64_reg_class
6713 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6715 /* Rule #1: If both classes are equal, this is the resulting class. */
6716 if (class1 == class2)
6717 return class1;
6719 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6720 the other class. */
6721 if (class1 == X86_64_NO_CLASS)
6722 return class2;
6723 if (class2 == X86_64_NO_CLASS)
6724 return class1;
6726 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6727 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6728 return X86_64_MEMORY_CLASS;
6730 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6731 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6732 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6733 return X86_64_INTEGERSI_CLASS;
6734 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6735 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6736 return X86_64_INTEGER_CLASS;
6738 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6739 MEMORY is used. */
6740 if (class1 == X86_64_X87_CLASS
6741 || class1 == X86_64_X87UP_CLASS
6742 || class1 == X86_64_COMPLEX_X87_CLASS
6743 || class2 == X86_64_X87_CLASS
6744 || class2 == X86_64_X87UP_CLASS
6745 || class2 == X86_64_COMPLEX_X87_CLASS)
6746 return X86_64_MEMORY_CLASS;
6748 /* Rule #6: Otherwise class SSE is used. */
6749 return X86_64_SSE_CLASS;
6752 /* Classify the argument of type TYPE and mode MODE.
6753 CLASSES will be filled by the register class used to pass each word
6754 of the operand. The number of words is returned. In case the parameter
6755 should be passed in memory, 0 is returned. As a special case for zero
6756 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6758 BIT_OFFSET is used internally for handling records and specifies offset
6759 of the offset in bits modulo 512 to avoid overflow cases.
6761 See the x86-64 PS ABI for details.
6764 static int
6765 classify_argument (machine_mode mode, const_tree type,
6766 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6768 HOST_WIDE_INT bytes =
6769 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6770 int words
6771 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6773 /* Variable sized entities are always passed/returned in memory. */
6774 if (bytes < 0)
6775 return 0;
6777 if (mode != VOIDmode
6778 && targetm.calls.must_pass_in_stack (mode, type))
6779 return 0;
6781 if (type && AGGREGATE_TYPE_P (type))
6783 int i;
6784 tree field;
6785 enum x86_64_reg_class subclasses[MAX_CLASSES];
6787 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6788 if (bytes > 64)
6789 return 0;
6791 for (i = 0; i < words; i++)
6792 classes[i] = X86_64_NO_CLASS;
6794 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6795 signalize memory class, so handle it as special case. */
6796 if (!words)
6798 classes[0] = X86_64_NO_CLASS;
6799 return 1;
6802 /* Classify each field of record and merge classes. */
6803 switch (TREE_CODE (type))
6805 case RECORD_TYPE:
6806 /* And now merge the fields of structure. */
6807 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6809 if (TREE_CODE (field) == FIELD_DECL)
6811 int num;
6813 if (TREE_TYPE (field) == error_mark_node)
6814 continue;
6816 /* Bitfields are always classified as integer. Handle them
6817 early, since later code would consider them to be
6818 misaligned integers. */
6819 if (DECL_BIT_FIELD (field))
6821 for (i = (int_bit_position (field)
6822 + (bit_offset % 64)) / 8 / 8;
6823 i < ((int_bit_position (field) + (bit_offset % 64))
6824 + tree_to_shwi (DECL_SIZE (field))
6825 + 63) / 8 / 8; i++)
6826 classes[i] =
6827 merge_classes (X86_64_INTEGER_CLASS,
6828 classes[i]);
6830 else
6832 int pos;
6834 type = TREE_TYPE (field);
6836 /* Flexible array member is ignored. */
6837 if (TYPE_MODE (type) == BLKmode
6838 && TREE_CODE (type) == ARRAY_TYPE
6839 && TYPE_SIZE (type) == NULL_TREE
6840 && TYPE_DOMAIN (type) != NULL_TREE
6841 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6842 == NULL_TREE))
6844 static bool warned;
6846 if (!warned && warn_psabi)
6848 warned = true;
6849 inform (input_location,
6850 "the ABI of passing struct with"
6851 " a flexible array member has"
6852 " changed in GCC 4.4");
6854 continue;
6856 num = classify_argument (TYPE_MODE (type), type,
6857 subclasses,
6858 (int_bit_position (field)
6859 + bit_offset) % 512);
6860 if (!num)
6861 return 0;
6862 pos = (int_bit_position (field)
6863 + (bit_offset % 64)) / 8 / 8;
6864 for (i = 0; i < num && (i + pos) < words; i++)
6865 classes[i + pos] =
6866 merge_classes (subclasses[i], classes[i + pos]);
6870 break;
6872 case ARRAY_TYPE:
6873 /* Arrays are handled as small records. */
6875 int num;
6876 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6877 TREE_TYPE (type), subclasses, bit_offset);
6878 if (!num)
6879 return 0;
6881 /* The partial classes are now full classes. */
6882 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6883 subclasses[0] = X86_64_SSE_CLASS;
6884 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6885 && !((bit_offset % 64) == 0 && bytes == 4))
6886 subclasses[0] = X86_64_INTEGER_CLASS;
6888 for (i = 0; i < words; i++)
6889 classes[i] = subclasses[i % num];
6891 break;
6893 case UNION_TYPE:
6894 case QUAL_UNION_TYPE:
6895 /* Unions are similar to RECORD_TYPE but offset is always 0.
6897 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6899 if (TREE_CODE (field) == FIELD_DECL)
6901 int num;
6903 if (TREE_TYPE (field) == error_mark_node)
6904 continue;
6906 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6907 TREE_TYPE (field), subclasses,
6908 bit_offset);
6909 if (!num)
6910 return 0;
6911 for (i = 0; i < num && i < words; i++)
6912 classes[i] = merge_classes (subclasses[i], classes[i]);
6915 break;
6917 default:
6918 gcc_unreachable ();
6921 if (words > 2)
6923 /* When size > 16 bytes, if the first one isn't
6924 X86_64_SSE_CLASS or any other ones aren't
6925 X86_64_SSEUP_CLASS, everything should be passed in
6926 memory. */
6927 if (classes[0] != X86_64_SSE_CLASS)
6928 return 0;
6930 for (i = 1; i < words; i++)
6931 if (classes[i] != X86_64_SSEUP_CLASS)
6932 return 0;
6935 /* Final merger cleanup. */
6936 for (i = 0; i < words; i++)
6938 /* If one class is MEMORY, everything should be passed in
6939 memory. */
6940 if (classes[i] == X86_64_MEMORY_CLASS)
6941 return 0;
6943 /* The X86_64_SSEUP_CLASS should be always preceded by
6944 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6945 if (classes[i] == X86_64_SSEUP_CLASS
6946 && classes[i - 1] != X86_64_SSE_CLASS
6947 && classes[i - 1] != X86_64_SSEUP_CLASS)
6949 /* The first one should never be X86_64_SSEUP_CLASS. */
6950 gcc_assert (i != 0);
6951 classes[i] = X86_64_SSE_CLASS;
6954 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6955 everything should be passed in memory. */
6956 if (classes[i] == X86_64_X87UP_CLASS
6957 && (classes[i - 1] != X86_64_X87_CLASS))
6959 static bool warned;
6961 /* The first one should never be X86_64_X87UP_CLASS. */
6962 gcc_assert (i != 0);
6963 if (!warned && warn_psabi)
6965 warned = true;
6966 inform (input_location,
6967 "the ABI of passing union with long double"
6968 " has changed in GCC 4.4");
6970 return 0;
6973 return words;
6976 /* Compute alignment needed. We align all types to natural boundaries with
6977 exception of XFmode that is aligned to 64bits. */
6978 if (mode != VOIDmode && mode != BLKmode)
6980 int mode_alignment = GET_MODE_BITSIZE (mode);
6982 if (mode == XFmode)
6983 mode_alignment = 128;
6984 else if (mode == XCmode)
6985 mode_alignment = 256;
6986 if (COMPLEX_MODE_P (mode))
6987 mode_alignment /= 2;
6988 /* Misaligned fields are always returned in memory. */
6989 if (bit_offset % mode_alignment)
6990 return 0;
6993 /* for V1xx modes, just use the base mode */
6994 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6995 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6996 mode = GET_MODE_INNER (mode);
6998 /* Classification of atomic types. */
6999 switch (mode)
7001 case SDmode:
7002 case DDmode:
7003 classes[0] = X86_64_SSE_CLASS;
7004 return 1;
7005 case TDmode:
7006 classes[0] = X86_64_SSE_CLASS;
7007 classes[1] = X86_64_SSEUP_CLASS;
7008 return 2;
7009 case DImode:
7010 case SImode:
7011 case HImode:
7012 case QImode:
7013 case CSImode:
7014 case CHImode:
7015 case CQImode:
7017 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
7019 /* Analyze last 128 bits only. */
7020 size = (size - 1) & 0x7f;
7022 if (size < 32)
7024 classes[0] = X86_64_INTEGERSI_CLASS;
7025 return 1;
7027 else if (size < 64)
7029 classes[0] = X86_64_INTEGER_CLASS;
7030 return 1;
7032 else if (size < 64+32)
7034 classes[0] = X86_64_INTEGER_CLASS;
7035 classes[1] = X86_64_INTEGERSI_CLASS;
7036 return 2;
7038 else if (size < 64+64)
7040 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7041 return 2;
7043 else
7044 gcc_unreachable ();
7046 case CDImode:
7047 case TImode:
7048 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7049 return 2;
7050 case COImode:
7051 case OImode:
7052 /* OImode shouldn't be used directly. */
7053 gcc_unreachable ();
7054 case CTImode:
7055 return 0;
7056 case SFmode:
7057 if (!(bit_offset % 64))
7058 classes[0] = X86_64_SSESF_CLASS;
7059 else
7060 classes[0] = X86_64_SSE_CLASS;
7061 return 1;
7062 case DFmode:
7063 classes[0] = X86_64_SSEDF_CLASS;
7064 return 1;
7065 case XFmode:
7066 classes[0] = X86_64_X87_CLASS;
7067 classes[1] = X86_64_X87UP_CLASS;
7068 return 2;
7069 case TFmode:
7070 classes[0] = X86_64_SSE_CLASS;
7071 classes[1] = X86_64_SSEUP_CLASS;
7072 return 2;
7073 case SCmode:
7074 classes[0] = X86_64_SSE_CLASS;
7075 if (!(bit_offset % 64))
7076 return 1;
7077 else
7079 static bool warned;
7081 if (!warned && warn_psabi)
7083 warned = true;
7084 inform (input_location,
7085 "the ABI of passing structure with complex float"
7086 " member has changed in GCC 4.4");
7088 classes[1] = X86_64_SSESF_CLASS;
7089 return 2;
7091 case DCmode:
7092 classes[0] = X86_64_SSEDF_CLASS;
7093 classes[1] = X86_64_SSEDF_CLASS;
7094 return 2;
7095 case XCmode:
7096 classes[0] = X86_64_COMPLEX_X87_CLASS;
7097 return 1;
7098 case TCmode:
7099 /* This modes is larger than 16 bytes. */
7100 return 0;
7101 case V8SFmode:
7102 case V8SImode:
7103 case V32QImode:
7104 case V16HImode:
7105 case V4DFmode:
7106 case V4DImode:
7107 classes[0] = X86_64_SSE_CLASS;
7108 classes[1] = X86_64_SSEUP_CLASS;
7109 classes[2] = X86_64_SSEUP_CLASS;
7110 classes[3] = X86_64_SSEUP_CLASS;
7111 return 4;
7112 case V8DFmode:
7113 case V16SFmode:
7114 case V8DImode:
7115 case V16SImode:
7116 case V32HImode:
7117 case V64QImode:
7118 classes[0] = X86_64_SSE_CLASS;
7119 classes[1] = X86_64_SSEUP_CLASS;
7120 classes[2] = X86_64_SSEUP_CLASS;
7121 classes[3] = X86_64_SSEUP_CLASS;
7122 classes[4] = X86_64_SSEUP_CLASS;
7123 classes[5] = X86_64_SSEUP_CLASS;
7124 classes[6] = X86_64_SSEUP_CLASS;
7125 classes[7] = X86_64_SSEUP_CLASS;
7126 return 8;
7127 case V4SFmode:
7128 case V4SImode:
7129 case V16QImode:
7130 case V8HImode:
7131 case V2DFmode:
7132 case V2DImode:
7133 classes[0] = X86_64_SSE_CLASS;
7134 classes[1] = X86_64_SSEUP_CLASS;
7135 return 2;
7136 case V1TImode:
7137 case V1DImode:
7138 case V2SFmode:
7139 case V2SImode:
7140 case V4HImode:
7141 case V8QImode:
7142 classes[0] = X86_64_SSE_CLASS;
7143 return 1;
7144 case BLKmode:
7145 case VOIDmode:
7146 return 0;
7147 default:
7148 gcc_assert (VECTOR_MODE_P (mode));
7150 if (bytes > 16)
7151 return 0;
7153 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7155 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7156 classes[0] = X86_64_INTEGERSI_CLASS;
7157 else
7158 classes[0] = X86_64_INTEGER_CLASS;
7159 classes[1] = X86_64_INTEGER_CLASS;
7160 return 1 + (bytes > 8);
7164 /* Examine the argument and return set number of register required in each
7165 class. Return true iff parameter should be passed in memory. */
7167 static bool
7168 examine_argument (machine_mode mode, const_tree type, int in_return,
7169 int *int_nregs, int *sse_nregs)
7171 enum x86_64_reg_class regclass[MAX_CLASSES];
7172 int n = classify_argument (mode, type, regclass, 0);
7174 *int_nregs = 0;
7175 *sse_nregs = 0;
7177 if (!n)
7178 return true;
7179 for (n--; n >= 0; n--)
7180 switch (regclass[n])
7182 case X86_64_INTEGER_CLASS:
7183 case X86_64_INTEGERSI_CLASS:
7184 (*int_nregs)++;
7185 break;
7186 case X86_64_SSE_CLASS:
7187 case X86_64_SSESF_CLASS:
7188 case X86_64_SSEDF_CLASS:
7189 (*sse_nregs)++;
7190 break;
7191 case X86_64_NO_CLASS:
7192 case X86_64_SSEUP_CLASS:
7193 break;
7194 case X86_64_X87_CLASS:
7195 case X86_64_X87UP_CLASS:
7196 case X86_64_COMPLEX_X87_CLASS:
7197 if (!in_return)
7198 return true;
7199 break;
7200 case X86_64_MEMORY_CLASS:
7201 gcc_unreachable ();
7204 return false;
7207 /* Construct container for the argument used by GCC interface. See
7208 FUNCTION_ARG for the detailed description. */
7210 static rtx
7211 construct_container (machine_mode mode, machine_mode orig_mode,
7212 const_tree type, int in_return, int nintregs, int nsseregs,
7213 const int *intreg, int sse_regno)
7215 /* The following variables hold the static issued_error state. */
7216 static bool issued_sse_arg_error;
7217 static bool issued_sse_ret_error;
7218 static bool issued_x87_ret_error;
7220 machine_mode tmpmode;
7221 int bytes =
7222 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7223 enum x86_64_reg_class regclass[MAX_CLASSES];
7224 int n;
7225 int i;
7226 int nexps = 0;
7227 int needed_sseregs, needed_intregs;
7228 rtx exp[MAX_CLASSES];
7229 rtx ret;
7231 n = classify_argument (mode, type, regclass, 0);
7232 if (!n)
7233 return NULL;
7234 if (examine_argument (mode, type, in_return, &needed_intregs,
7235 &needed_sseregs))
7236 return NULL;
7237 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7238 return NULL;
7240 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7241 some less clueful developer tries to use floating-point anyway. */
7242 if (needed_sseregs && !TARGET_SSE)
7244 if (in_return)
7246 if (!issued_sse_ret_error)
7248 error ("SSE register return with SSE disabled");
7249 issued_sse_ret_error = true;
7252 else if (!issued_sse_arg_error)
7254 error ("SSE register argument with SSE disabled");
7255 issued_sse_arg_error = true;
7257 return NULL;
7260 /* Likewise, error if the ABI requires us to return values in the
7261 x87 registers and the user specified -mno-80387. */
7262 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7263 for (i = 0; i < n; i++)
7264 if (regclass[i] == X86_64_X87_CLASS
7265 || regclass[i] == X86_64_X87UP_CLASS
7266 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7268 if (!issued_x87_ret_error)
7270 error ("x87 register return with x87 disabled");
7271 issued_x87_ret_error = true;
7273 return NULL;
7276 /* First construct simple cases. Avoid SCmode, since we want to use
7277 single register to pass this type. */
7278 if (n == 1 && mode != SCmode)
7279 switch (regclass[0])
7281 case X86_64_INTEGER_CLASS:
7282 case X86_64_INTEGERSI_CLASS:
7283 return gen_rtx_REG (mode, intreg[0]);
7284 case X86_64_SSE_CLASS:
7285 case X86_64_SSESF_CLASS:
7286 case X86_64_SSEDF_CLASS:
7287 if (mode != BLKmode)
7288 return gen_reg_or_parallel (mode, orig_mode,
7289 SSE_REGNO (sse_regno));
7290 break;
7291 case X86_64_X87_CLASS:
7292 case X86_64_COMPLEX_X87_CLASS:
7293 return gen_rtx_REG (mode, FIRST_STACK_REG);
7294 case X86_64_NO_CLASS:
7295 /* Zero sized array, struct or class. */
7296 return NULL;
7297 default:
7298 gcc_unreachable ();
7300 if (n == 2
7301 && regclass[0] == X86_64_SSE_CLASS
7302 && regclass[1] == X86_64_SSEUP_CLASS
7303 && mode != BLKmode)
7304 return gen_reg_or_parallel (mode, orig_mode,
7305 SSE_REGNO (sse_regno));
7306 if (n == 4
7307 && regclass[0] == X86_64_SSE_CLASS
7308 && regclass[1] == X86_64_SSEUP_CLASS
7309 && regclass[2] == X86_64_SSEUP_CLASS
7310 && regclass[3] == X86_64_SSEUP_CLASS
7311 && mode != BLKmode)
7312 return gen_reg_or_parallel (mode, orig_mode,
7313 SSE_REGNO (sse_regno));
7314 if (n == 8
7315 && regclass[0] == X86_64_SSE_CLASS
7316 && regclass[1] == X86_64_SSEUP_CLASS
7317 && regclass[2] == X86_64_SSEUP_CLASS
7318 && regclass[3] == X86_64_SSEUP_CLASS
7319 && regclass[4] == X86_64_SSEUP_CLASS
7320 && regclass[5] == X86_64_SSEUP_CLASS
7321 && regclass[6] == X86_64_SSEUP_CLASS
7322 && regclass[7] == X86_64_SSEUP_CLASS
7323 && mode != BLKmode)
7324 return gen_reg_or_parallel (mode, orig_mode,
7325 SSE_REGNO (sse_regno));
7326 if (n == 2
7327 && regclass[0] == X86_64_X87_CLASS
7328 && regclass[1] == X86_64_X87UP_CLASS)
7329 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7331 if (n == 2
7332 && regclass[0] == X86_64_INTEGER_CLASS
7333 && regclass[1] == X86_64_INTEGER_CLASS
7334 && (mode == CDImode || mode == TImode)
7335 && intreg[0] + 1 == intreg[1])
7336 return gen_rtx_REG (mode, intreg[0]);
7338 /* Otherwise figure out the entries of the PARALLEL. */
7339 for (i = 0; i < n; i++)
7341 int pos;
7343 switch (regclass[i])
7345 case X86_64_NO_CLASS:
7346 break;
7347 case X86_64_INTEGER_CLASS:
7348 case X86_64_INTEGERSI_CLASS:
7349 /* Merge TImodes on aligned occasions here too. */
7350 if (i * 8 + 8 > bytes)
7351 tmpmode
7352 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7353 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7354 tmpmode = SImode;
7355 else
7356 tmpmode = DImode;
7357 /* We've requested 24 bytes we
7358 don't have mode for. Use DImode. */
7359 if (tmpmode == BLKmode)
7360 tmpmode = DImode;
7361 exp [nexps++]
7362 = gen_rtx_EXPR_LIST (VOIDmode,
7363 gen_rtx_REG (tmpmode, *intreg),
7364 GEN_INT (i*8));
7365 intreg++;
7366 break;
7367 case X86_64_SSESF_CLASS:
7368 exp [nexps++]
7369 = gen_rtx_EXPR_LIST (VOIDmode,
7370 gen_rtx_REG (SFmode,
7371 SSE_REGNO (sse_regno)),
7372 GEN_INT (i*8));
7373 sse_regno++;
7374 break;
7375 case X86_64_SSEDF_CLASS:
7376 exp [nexps++]
7377 = gen_rtx_EXPR_LIST (VOIDmode,
7378 gen_rtx_REG (DFmode,
7379 SSE_REGNO (sse_regno)),
7380 GEN_INT (i*8));
7381 sse_regno++;
7382 break;
7383 case X86_64_SSE_CLASS:
7384 pos = i;
7385 switch (n)
7387 case 1:
7388 tmpmode = DImode;
7389 break;
7390 case 2:
7391 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7393 tmpmode = TImode;
7394 i++;
7396 else
7397 tmpmode = DImode;
7398 break;
7399 case 4:
7400 gcc_assert (i == 0
7401 && regclass[1] == X86_64_SSEUP_CLASS
7402 && regclass[2] == X86_64_SSEUP_CLASS
7403 && regclass[3] == X86_64_SSEUP_CLASS);
7404 tmpmode = OImode;
7405 i += 3;
7406 break;
7407 case 8:
7408 gcc_assert (i == 0
7409 && regclass[1] == X86_64_SSEUP_CLASS
7410 && regclass[2] == X86_64_SSEUP_CLASS
7411 && regclass[3] == X86_64_SSEUP_CLASS
7412 && regclass[4] == X86_64_SSEUP_CLASS
7413 && regclass[5] == X86_64_SSEUP_CLASS
7414 && regclass[6] == X86_64_SSEUP_CLASS
7415 && regclass[7] == X86_64_SSEUP_CLASS);
7416 tmpmode = XImode;
7417 i += 7;
7418 break;
7419 default:
7420 gcc_unreachable ();
7422 exp [nexps++]
7423 = gen_rtx_EXPR_LIST (VOIDmode,
7424 gen_rtx_REG (tmpmode,
7425 SSE_REGNO (sse_regno)),
7426 GEN_INT (pos*8));
7427 sse_regno++;
7428 break;
7429 default:
7430 gcc_unreachable ();
7434 /* Empty aligned struct, union or class. */
7435 if (nexps == 0)
7436 return NULL;
7438 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7439 for (i = 0; i < nexps; i++)
7440 XVECEXP (ret, 0, i) = exp [i];
7441 return ret;
7444 /* Update the data in CUM to advance over an argument of mode MODE
7445 and data type TYPE. (TYPE is null for libcalls where that information
7446 may not be available.)
7448 Return a number of integer regsiters advanced over. */
7450 static int
7451 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7452 const_tree type, HOST_WIDE_INT bytes,
7453 HOST_WIDE_INT words)
7455 int res = 0;
7456 bool error_p = NULL;
7458 switch (mode)
7460 default:
7461 break;
7463 case BLKmode:
7464 if (bytes < 0)
7465 break;
7466 /* FALLTHRU */
7468 case DImode:
7469 case SImode:
7470 case HImode:
7471 case QImode:
7472 cum->words += words;
7473 cum->nregs -= words;
7474 cum->regno += words;
7475 if (cum->nregs >= 0)
7476 res = words;
7477 if (cum->nregs <= 0)
7479 cum->nregs = 0;
7480 cum->regno = 0;
7482 break;
7484 case OImode:
7485 /* OImode shouldn't be used directly. */
7486 gcc_unreachable ();
7488 case DFmode:
7489 if (cum->float_in_sse == -1)
7490 error_p = 1;
7491 if (cum->float_in_sse < 2)
7492 break;
7493 case SFmode:
7494 if (cum->float_in_sse == -1)
7495 error_p = 1;
7496 if (cum->float_in_sse < 1)
7497 break;
7498 /* FALLTHRU */
7500 case V8SFmode:
7501 case V8SImode:
7502 case V64QImode:
7503 case V32HImode:
7504 case V16SImode:
7505 case V8DImode:
7506 case V16SFmode:
7507 case V8DFmode:
7508 case V32QImode:
7509 case V16HImode:
7510 case V4DFmode:
7511 case V4DImode:
7512 case TImode:
7513 case V16QImode:
7514 case V8HImode:
7515 case V4SImode:
7516 case V2DImode:
7517 case V4SFmode:
7518 case V2DFmode:
7519 if (!type || !AGGREGATE_TYPE_P (type))
7521 cum->sse_words += words;
7522 cum->sse_nregs -= 1;
7523 cum->sse_regno += 1;
7524 if (cum->sse_nregs <= 0)
7526 cum->sse_nregs = 0;
7527 cum->sse_regno = 0;
7530 break;
7532 case V8QImode:
7533 case V4HImode:
7534 case V2SImode:
7535 case V2SFmode:
7536 case V1TImode:
7537 case V1DImode:
7538 if (!type || !AGGREGATE_TYPE_P (type))
7540 cum->mmx_words += words;
7541 cum->mmx_nregs -= 1;
7542 cum->mmx_regno += 1;
7543 if (cum->mmx_nregs <= 0)
7545 cum->mmx_nregs = 0;
7546 cum->mmx_regno = 0;
7549 break;
7551 if (error_p)
7553 cum->float_in_sse = 0;
7554 error ("calling %qD with SSE calling convention without "
7555 "SSE/SSE2 enabled", cum->decl);
7556 sorry ("this is a GCC bug that can be worked around by adding "
7557 "attribute used to function called");
7560 return res;
7563 static int
7564 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
7565 const_tree type, HOST_WIDE_INT words, bool named)
7567 int int_nregs, sse_nregs;
7569 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7570 if (!named && (VALID_AVX512F_REG_MODE (mode)
7571 || VALID_AVX256_REG_MODE (mode)))
7572 return 0;
7574 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7575 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7577 cum->nregs -= int_nregs;
7578 cum->sse_nregs -= sse_nregs;
7579 cum->regno += int_nregs;
7580 cum->sse_regno += sse_nregs;
7581 return int_nregs;
7583 else
7585 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7586 cum->words = (cum->words + align - 1) & ~(align - 1);
7587 cum->words += words;
7588 return 0;
7592 static int
7593 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7594 HOST_WIDE_INT words)
7596 /* Otherwise, this should be passed indirect. */
7597 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7599 cum->words += words;
7600 if (cum->nregs > 0)
7602 cum->nregs -= 1;
7603 cum->regno += 1;
7604 return 1;
7606 return 0;
7609 /* Update the data in CUM to advance over an argument of mode MODE and
7610 data type TYPE. (TYPE is null for libcalls where that information
7611 may not be available.) */
7613 static void
7614 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7615 const_tree type, bool named)
7617 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7618 HOST_WIDE_INT bytes, words;
7619 int nregs;
7621 if (mode == BLKmode)
7622 bytes = int_size_in_bytes (type);
7623 else
7624 bytes = GET_MODE_SIZE (mode);
7625 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7627 if (type)
7628 mode = type_natural_mode (type, NULL, false);
7630 if ((type && POINTER_BOUNDS_TYPE_P (type))
7631 || POINTER_BOUNDS_MODE_P (mode))
7633 /* If we pass bounds in BT then just update remained bounds count. */
7634 if (cum->bnds_in_bt)
7636 cum->bnds_in_bt--;
7637 return;
7640 /* Update remained number of bounds to force. */
7641 if (cum->force_bnd_pass)
7642 cum->force_bnd_pass--;
7644 cum->bnd_regno++;
7646 return;
7649 /* The first arg not going to Bounds Tables resets this counter. */
7650 cum->bnds_in_bt = 0;
7651 /* For unnamed args we always pass bounds to avoid bounds mess when
7652 passed and received types do not match. If bounds do not follow
7653 unnamed arg, still pretend required number of bounds were passed. */
7654 if (cum->force_bnd_pass)
7656 cum->bnd_regno += cum->force_bnd_pass;
7657 cum->force_bnd_pass = 0;
7660 if (TARGET_64BIT)
7662 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
7664 if (call_abi == MS_ABI)
7665 nregs = function_arg_advance_ms_64 (cum, bytes, words);
7666 else
7667 nregs = function_arg_advance_64 (cum, mode, type, words, named);
7669 else
7670 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
7672 /* For stdarg we expect bounds to be passed for each value passed
7673 in register. */
7674 if (cum->stdarg)
7675 cum->force_bnd_pass = nregs;
7676 /* For pointers passed in memory we expect bounds passed in Bounds
7677 Table. */
7678 if (!nregs)
7679 cum->bnds_in_bt = chkp_type_bounds_count (type);
7682 /* Define where to put the arguments to a function.
7683 Value is zero to push the argument on the stack,
7684 or a hard register in which to store the argument.
7686 MODE is the argument's machine mode.
7687 TYPE is the data type of the argument (as a tree).
7688 This is null for libcalls where that information may
7689 not be available.
7690 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7691 the preceding args and about the function being called.
7692 NAMED is nonzero if this argument is a named parameter
7693 (otherwise it is an extra parameter matching an ellipsis). */
7695 static rtx
7696 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7697 machine_mode orig_mode, const_tree type,
7698 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7700 bool error_p = false;
7701 /* Avoid the AL settings for the Unix64 ABI. */
7702 if (mode == VOIDmode)
7703 return constm1_rtx;
7705 switch (mode)
7707 default:
7708 break;
7710 case BLKmode:
7711 if (bytes < 0)
7712 break;
7713 /* FALLTHRU */
7714 case DImode:
7715 case SImode:
7716 case HImode:
7717 case QImode:
7718 if (words <= cum->nregs)
7720 int regno = cum->regno;
7722 /* Fastcall allocates the first two DWORD (SImode) or
7723 smaller arguments to ECX and EDX if it isn't an
7724 aggregate type . */
7725 if (cum->fastcall)
7727 if (mode == BLKmode
7728 || mode == DImode
7729 || (type && AGGREGATE_TYPE_P (type)))
7730 break;
7732 /* ECX not EAX is the first allocated register. */
7733 if (regno == AX_REG)
7734 regno = CX_REG;
7736 return gen_rtx_REG (mode, regno);
7738 break;
7740 case DFmode:
7741 if (cum->float_in_sse == -1)
7742 error_p = 1;
7743 if (cum->float_in_sse < 2)
7744 break;
7745 case SFmode:
7746 if (cum->float_in_sse == -1)
7747 error_p = 1;
7748 if (cum->float_in_sse < 1)
7749 break;
7750 /* FALLTHRU */
7751 case TImode:
7752 /* In 32bit, we pass TImode in xmm registers. */
7753 case V16QImode:
7754 case V8HImode:
7755 case V4SImode:
7756 case V2DImode:
7757 case V4SFmode:
7758 case V2DFmode:
7759 if (!type || !AGGREGATE_TYPE_P (type))
7761 if (cum->sse_nregs)
7762 return gen_reg_or_parallel (mode, orig_mode,
7763 cum->sse_regno + FIRST_SSE_REG);
7765 break;
7767 case OImode:
7768 case XImode:
7769 /* OImode and XImode shouldn't be used directly. */
7770 gcc_unreachable ();
7772 case V64QImode:
7773 case V32HImode:
7774 case V16SImode:
7775 case V8DImode:
7776 case V16SFmode:
7777 case V8DFmode:
7778 case V8SFmode:
7779 case V8SImode:
7780 case V32QImode:
7781 case V16HImode:
7782 case V4DFmode:
7783 case V4DImode:
7784 if (!type || !AGGREGATE_TYPE_P (type))
7786 if (cum->sse_nregs)
7787 return gen_reg_or_parallel (mode, orig_mode,
7788 cum->sse_regno + FIRST_SSE_REG);
7790 break;
7792 case V8QImode:
7793 case V4HImode:
7794 case V2SImode:
7795 case V2SFmode:
7796 case V1TImode:
7797 case V1DImode:
7798 if (!type || !AGGREGATE_TYPE_P (type))
7800 if (cum->mmx_nregs)
7801 return gen_reg_or_parallel (mode, orig_mode,
7802 cum->mmx_regno + FIRST_MMX_REG);
7804 break;
7806 if (error_p)
7808 cum->float_in_sse = 0;
7809 error ("calling %qD with SSE calling convention without "
7810 "SSE/SSE2 enabled", cum->decl);
7811 sorry ("this is a GCC bug that can be worked around by adding "
7812 "attribute used to function called");
7815 return NULL_RTX;
7818 static rtx
7819 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7820 machine_mode orig_mode, const_tree type, bool named)
7822 /* Handle a hidden AL argument containing number of registers
7823 for varargs x86-64 functions. */
7824 if (mode == VOIDmode)
7825 return GEN_INT (cum->maybe_vaarg
7826 ? (cum->sse_nregs < 0
7827 ? X86_64_SSE_REGPARM_MAX
7828 : cum->sse_regno)
7829 : -1);
7831 switch (mode)
7833 default:
7834 break;
7836 case V8SFmode:
7837 case V8SImode:
7838 case V32QImode:
7839 case V16HImode:
7840 case V4DFmode:
7841 case V4DImode:
7842 case V16SFmode:
7843 case V16SImode:
7844 case V64QImode:
7845 case V32HImode:
7846 case V8DFmode:
7847 case V8DImode:
7848 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7849 if (!named)
7850 return NULL;
7851 break;
7854 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7855 cum->sse_nregs,
7856 &x86_64_int_parameter_registers [cum->regno],
7857 cum->sse_regno);
7860 static rtx
7861 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7862 machine_mode orig_mode, bool named,
7863 HOST_WIDE_INT bytes)
7865 unsigned int regno;
7867 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7868 We use value of -2 to specify that current function call is MSABI. */
7869 if (mode == VOIDmode)
7870 return GEN_INT (-2);
7872 /* If we've run out of registers, it goes on the stack. */
7873 if (cum->nregs == 0)
7874 return NULL_RTX;
7876 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7878 /* Only floating point modes are passed in anything but integer regs. */
7879 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7881 if (named)
7882 regno = cum->regno + FIRST_SSE_REG;
7883 else
7885 rtx t1, t2;
7887 /* Unnamed floating parameters are passed in both the
7888 SSE and integer registers. */
7889 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7890 t2 = gen_rtx_REG (mode, regno);
7891 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7892 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7893 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7896 /* Handle aggregated types passed in register. */
7897 if (orig_mode == BLKmode)
7899 if (bytes > 0 && bytes <= 8)
7900 mode = (bytes > 4 ? DImode : SImode);
7901 if (mode == BLKmode)
7902 mode = DImode;
7905 return gen_reg_or_parallel (mode, orig_mode, regno);
7908 /* Return where to put the arguments to a function.
7909 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7911 MODE is the argument's machine mode. TYPE is the data type of the
7912 argument. It is null for libcalls where that information may not be
7913 available. CUM gives information about the preceding args and about
7914 the function being called. NAMED is nonzero if this argument is a
7915 named parameter (otherwise it is an extra parameter matching an
7916 ellipsis). */
7918 static rtx
7919 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
7920 const_tree type, bool named)
7922 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7923 machine_mode mode = omode;
7924 HOST_WIDE_INT bytes, words;
7925 rtx arg;
7927 /* All pointer bounds argumntas are handled separately here. */
7928 if ((type && POINTER_BOUNDS_TYPE_P (type))
7929 || POINTER_BOUNDS_MODE_P (mode))
7931 /* Return NULL if bounds are forced to go in Bounds Table. */
7932 if (cum->bnds_in_bt)
7933 arg = NULL;
7934 /* Return the next available bound reg if any. */
7935 else if (cum->bnd_regno <= LAST_BND_REG)
7936 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
7937 /* Return the next special slot number otherwise. */
7938 else
7939 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
7941 return arg;
7944 if (mode == BLKmode)
7945 bytes = int_size_in_bytes (type);
7946 else
7947 bytes = GET_MODE_SIZE (mode);
7948 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7950 /* To simplify the code below, represent vector types with a vector mode
7951 even if MMX/SSE are not active. */
7952 if (type && TREE_CODE (type) == VECTOR_TYPE)
7953 mode = type_natural_mode (type, cum, false);
7955 if (TARGET_64BIT)
7957 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
7959 if (call_abi == MS_ABI)
7960 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7961 else
7962 arg = function_arg_64 (cum, mode, omode, type, named);
7964 else
7965 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7967 return arg;
7970 /* A C expression that indicates when an argument must be passed by
7971 reference. If nonzero for an argument, a copy of that argument is
7972 made in memory and a pointer to the argument is passed instead of
7973 the argument itself. The pointer is passed in whatever way is
7974 appropriate for passing a pointer to that type. */
7976 static bool
7977 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7978 const_tree type, bool)
7980 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7982 /* Bounds are never passed by reference. */
7983 if ((type && POINTER_BOUNDS_TYPE_P (type))
7984 || POINTER_BOUNDS_MODE_P (mode))
7985 return false;
7987 if (TARGET_64BIT)
7989 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
7991 /* See Windows x64 Software Convention. */
7992 if (call_abi == MS_ABI)
7994 HOST_WIDE_INT msize = GET_MODE_SIZE (mode);
7996 if (type)
7998 /* Arrays are passed by reference. */
7999 if (TREE_CODE (type) == ARRAY_TYPE)
8000 return true;
8002 if (RECORD_OR_UNION_TYPE_P (type))
8004 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
8005 are passed by reference. */
8006 msize = int_size_in_bytes (type);
8010 /* __m128 is passed by reference. */
8011 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
8013 else if (type && int_size_in_bytes (type) == -1)
8014 return true;
8017 return false;
8020 /* Return true when TYPE should be 128bit aligned for 32bit argument
8021 passing ABI. XXX: This function is obsolete and is only used for
8022 checking psABI compatibility with previous versions of GCC. */
8024 static bool
8025 ix86_compat_aligned_value_p (const_tree type)
8027 machine_mode mode = TYPE_MODE (type);
8028 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
8029 || mode == TDmode
8030 || mode == TFmode
8031 || mode == TCmode)
8032 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
8033 return true;
8034 if (TYPE_ALIGN (type) < 128)
8035 return false;
8037 if (AGGREGATE_TYPE_P (type))
8039 /* Walk the aggregates recursively. */
8040 switch (TREE_CODE (type))
8042 case RECORD_TYPE:
8043 case UNION_TYPE:
8044 case QUAL_UNION_TYPE:
8046 tree field;
8048 /* Walk all the structure fields. */
8049 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8051 if (TREE_CODE (field) == FIELD_DECL
8052 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
8053 return true;
8055 break;
8058 case ARRAY_TYPE:
8059 /* Just for use if some languages passes arrays by value. */
8060 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
8061 return true;
8062 break;
8064 default:
8065 gcc_unreachable ();
8068 return false;
8071 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
8072 XXX: This function is obsolete and is only used for checking psABI
8073 compatibility with previous versions of GCC. */
8075 static unsigned int
8076 ix86_compat_function_arg_boundary (machine_mode mode,
8077 const_tree type, unsigned int align)
8079 /* In 32bit, only _Decimal128 and __float128 are aligned to their
8080 natural boundaries. */
8081 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
8083 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
8084 make an exception for SSE modes since these require 128bit
8085 alignment.
8087 The handling here differs from field_alignment. ICC aligns MMX
8088 arguments to 4 byte boundaries, while structure fields are aligned
8089 to 8 byte boundaries. */
8090 if (!type)
8092 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
8093 align = PARM_BOUNDARY;
8095 else
8097 if (!ix86_compat_aligned_value_p (type))
8098 align = PARM_BOUNDARY;
8101 if (align > BIGGEST_ALIGNMENT)
8102 align = BIGGEST_ALIGNMENT;
8103 return align;
8106 /* Return true when TYPE should be 128bit aligned for 32bit argument
8107 passing ABI. */
8109 static bool
8110 ix86_contains_aligned_value_p (const_tree type)
8112 machine_mode mode = TYPE_MODE (type);
8114 if (mode == XFmode || mode == XCmode)
8115 return false;
8117 if (TYPE_ALIGN (type) < 128)
8118 return false;
8120 if (AGGREGATE_TYPE_P (type))
8122 /* Walk the aggregates recursively. */
8123 switch (TREE_CODE (type))
8125 case RECORD_TYPE:
8126 case UNION_TYPE:
8127 case QUAL_UNION_TYPE:
8129 tree field;
8131 /* Walk all the structure fields. */
8132 for (field = TYPE_FIELDS (type);
8133 field;
8134 field = DECL_CHAIN (field))
8136 if (TREE_CODE (field) == FIELD_DECL
8137 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
8138 return true;
8140 break;
8143 case ARRAY_TYPE:
8144 /* Just for use if some languages passes arrays by value. */
8145 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
8146 return true;
8147 break;
8149 default:
8150 gcc_unreachable ();
8153 else
8154 return TYPE_ALIGN (type) >= 128;
8156 return false;
8159 /* Gives the alignment boundary, in bits, of an argument with the
8160 specified mode and type. */
8162 static unsigned int
8163 ix86_function_arg_boundary (machine_mode mode, const_tree type)
8165 unsigned int align;
8166 if (type)
8168 /* Since the main variant type is used for call, we convert it to
8169 the main variant type. */
8170 type = TYPE_MAIN_VARIANT (type);
8171 align = TYPE_ALIGN (type);
8173 else
8174 align = GET_MODE_ALIGNMENT (mode);
8175 if (align < PARM_BOUNDARY)
8176 align = PARM_BOUNDARY;
8177 else
8179 static bool warned;
8180 unsigned int saved_align = align;
8182 if (!TARGET_64BIT)
8184 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8185 if (!type)
8187 if (mode == XFmode || mode == XCmode)
8188 align = PARM_BOUNDARY;
8190 else if (!ix86_contains_aligned_value_p (type))
8191 align = PARM_BOUNDARY;
8193 if (align < 128)
8194 align = PARM_BOUNDARY;
8197 if (warn_psabi
8198 && !warned
8199 && align != ix86_compat_function_arg_boundary (mode, type,
8200 saved_align))
8202 warned = true;
8203 inform (input_location,
8204 "The ABI for passing parameters with %d-byte"
8205 " alignment has changed in GCC 4.6",
8206 align / BITS_PER_UNIT);
8210 return align;
8213 /* Return true if N is a possible register number of function value. */
8215 static bool
8216 ix86_function_value_regno_p (const unsigned int regno)
8218 switch (regno)
8220 case AX_REG:
8221 return true;
8222 case DX_REG:
8223 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
8224 case DI_REG:
8225 case SI_REG:
8226 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
8228 case BND0_REG:
8229 case BND1_REG:
8230 return chkp_function_instrumented_p (current_function_decl);
8232 /* Complex values are returned in %st(0)/%st(1) pair. */
8233 case ST0_REG:
8234 case ST1_REG:
8235 /* TODO: The function should depend on current function ABI but
8236 builtins.c would need updating then. Therefore we use the
8237 default ABI. */
8238 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
8239 return false;
8240 return TARGET_FLOAT_RETURNS_IN_80387;
8242 /* Complex values are returned in %xmm0/%xmm1 pair. */
8243 case XMM0_REG:
8244 case XMM1_REG:
8245 return TARGET_SSE;
8247 case MM0_REG:
8248 if (TARGET_MACHO || TARGET_64BIT)
8249 return false;
8250 return TARGET_MMX;
8253 return false;
8256 /* Define how to find the value returned by a function.
8257 VALTYPE is the data type of the value (as a tree).
8258 If the precise function being called is known, FUNC is its FUNCTION_DECL;
8259 otherwise, FUNC is 0. */
8261 static rtx
8262 function_value_32 (machine_mode orig_mode, machine_mode mode,
8263 const_tree fntype, const_tree fn)
8265 unsigned int regno;
8267 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8268 we normally prevent this case when mmx is not available. However
8269 some ABIs may require the result to be returned like DImode. */
8270 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8271 regno = FIRST_MMX_REG;
8273 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
8274 we prevent this case when sse is not available. However some ABIs
8275 may require the result to be returned like integer TImode. */
8276 else if (mode == TImode
8277 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8278 regno = FIRST_SSE_REG;
8280 /* 32-byte vector modes in %ymm0. */
8281 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8282 regno = FIRST_SSE_REG;
8284 /* 64-byte vector modes in %zmm0. */
8285 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8286 regno = FIRST_SSE_REG;
8288 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
8289 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8290 regno = FIRST_FLOAT_REG;
8291 else
8292 /* Most things go in %eax. */
8293 regno = AX_REG;
8295 /* Override FP return register with %xmm0 for local functions when
8296 SSE math is enabled or for functions with sseregparm attribute. */
8297 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8299 int sse_level = ix86_function_sseregparm (fntype, fn, false);
8300 if (sse_level == -1)
8302 error ("calling %qD with SSE caling convention without "
8303 "SSE/SSE2 enabled", fn);
8304 sorry ("this is a GCC bug that can be worked around by adding "
8305 "attribute used to function called");
8307 else if ((sse_level >= 1 && mode == SFmode)
8308 || (sse_level == 2 && mode == DFmode))
8309 regno = FIRST_SSE_REG;
8312 /* OImode shouldn't be used directly. */
8313 gcc_assert (mode != OImode);
8315 return gen_rtx_REG (orig_mode, regno);
8318 static rtx
8319 function_value_64 (machine_mode orig_mode, machine_mode mode,
8320 const_tree valtype)
8322 rtx ret;
8324 /* Handle libcalls, which don't provide a type node. */
8325 if (valtype == NULL)
8327 unsigned int regno;
8329 switch (mode)
8331 case SFmode:
8332 case SCmode:
8333 case DFmode:
8334 case DCmode:
8335 case TFmode:
8336 case SDmode:
8337 case DDmode:
8338 case TDmode:
8339 regno = FIRST_SSE_REG;
8340 break;
8341 case XFmode:
8342 case XCmode:
8343 regno = FIRST_FLOAT_REG;
8344 break;
8345 case TCmode:
8346 return NULL;
8347 default:
8348 regno = AX_REG;
8351 return gen_rtx_REG (mode, regno);
8353 else if (POINTER_TYPE_P (valtype))
8355 /* Pointers are always returned in word_mode. */
8356 mode = word_mode;
8359 ret = construct_container (mode, orig_mode, valtype, 1,
8360 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8361 x86_64_int_return_registers, 0);
8363 /* For zero sized structures, construct_container returns NULL, but we
8364 need to keep rest of compiler happy by returning meaningful value. */
8365 if (!ret)
8366 ret = gen_rtx_REG (orig_mode, AX_REG);
8368 return ret;
8371 static rtx
8372 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
8373 const_tree valtype)
8375 unsigned int regno = AX_REG;
8377 if (TARGET_SSE)
8379 switch (GET_MODE_SIZE (mode))
8381 case 16:
8382 if (valtype != NULL_TREE
8383 && !VECTOR_INTEGER_TYPE_P (valtype)
8384 && !VECTOR_INTEGER_TYPE_P (valtype)
8385 && !INTEGRAL_TYPE_P (valtype)
8386 && !VECTOR_FLOAT_TYPE_P (valtype))
8387 break;
8388 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8389 && !COMPLEX_MODE_P (mode))
8390 regno = FIRST_SSE_REG;
8391 break;
8392 case 8:
8393 case 4:
8394 if (mode == SFmode || mode == DFmode)
8395 regno = FIRST_SSE_REG;
8396 break;
8397 default:
8398 break;
8401 return gen_rtx_REG (orig_mode, regno);
8404 static rtx
8405 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8406 machine_mode orig_mode, machine_mode mode)
8408 const_tree fn, fntype;
8410 fn = NULL_TREE;
8411 if (fntype_or_decl && DECL_P (fntype_or_decl))
8412 fn = fntype_or_decl;
8413 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8415 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8416 || POINTER_BOUNDS_MODE_P (mode))
8417 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8418 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8419 return function_value_ms_64 (orig_mode, mode, valtype);
8420 else if (TARGET_64BIT)
8421 return function_value_64 (orig_mode, mode, valtype);
8422 else
8423 return function_value_32 (orig_mode, mode, fntype, fn);
8426 static rtx
8427 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8429 machine_mode mode, orig_mode;
8431 orig_mode = TYPE_MODE (valtype);
8432 mode = type_natural_mode (valtype, NULL, true);
8433 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8436 /* Return an RTX representing a place where a function returns
8437 or recieves pointer bounds or NULL if no bounds are returned.
8439 VALTYPE is a data type of a value returned by the function.
8441 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
8442 or FUNCTION_TYPE of the function.
8444 If OUTGOING is false, return a place in which the caller will
8445 see the return value. Otherwise, return a place where a
8446 function returns a value. */
8448 static rtx
8449 ix86_function_value_bounds (const_tree valtype,
8450 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
8451 bool outgoing ATTRIBUTE_UNUSED)
8453 rtx res = NULL_RTX;
8455 if (BOUNDED_TYPE_P (valtype))
8456 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
8457 else if (chkp_type_has_pointer (valtype))
8459 bitmap slots;
8460 rtx bounds[2];
8461 bitmap_iterator bi;
8462 unsigned i, bnd_no = 0;
8464 bitmap_obstack_initialize (NULL);
8465 slots = BITMAP_ALLOC (NULL);
8466 chkp_find_bound_slots (valtype, slots);
8468 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
8470 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
8471 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
8472 gcc_assert (bnd_no < 2);
8473 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
8476 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
8478 BITMAP_FREE (slots);
8479 bitmap_obstack_release (NULL);
8481 else
8482 res = NULL_RTX;
8484 return res;
8487 /* Pointer function arguments and return values are promoted to
8488 word_mode. */
8490 static machine_mode
8491 ix86_promote_function_mode (const_tree type, machine_mode mode,
8492 int *punsignedp, const_tree fntype,
8493 int for_return)
8495 if (type != NULL_TREE && POINTER_TYPE_P (type))
8497 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8498 return word_mode;
8500 return default_promote_function_mode (type, mode, punsignedp, fntype,
8501 for_return);
8504 /* Return true if a structure, union or array with MODE containing FIELD
8505 should be accessed using BLKmode. */
8507 static bool
8508 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
8510 /* Union with XFmode must be in BLKmode. */
8511 return (mode == XFmode
8512 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8513 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8517 ix86_libcall_value (machine_mode mode)
8519 return ix86_function_value_1 (NULL, NULL, mode, mode);
8522 /* Return true iff type is returned in memory. */
8524 static bool
8525 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8527 #ifdef SUBTARGET_RETURN_IN_MEMORY
8528 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8529 #else
8530 const machine_mode mode = type_natural_mode (type, NULL, true);
8531 HOST_WIDE_INT size;
8533 if (POINTER_BOUNDS_TYPE_P (type))
8534 return false;
8536 if (TARGET_64BIT)
8538 if (ix86_function_type_abi (fntype) == MS_ABI)
8540 size = int_size_in_bytes (type);
8542 /* __m128 is returned in xmm0. */
8543 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8544 || INTEGRAL_TYPE_P (type)
8545 || VECTOR_FLOAT_TYPE_P (type))
8546 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8547 && !COMPLEX_MODE_P (mode)
8548 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8549 return false;
8551 /* Otherwise, the size must be exactly in [1248]. */
8552 return size != 1 && size != 2 && size != 4 && size != 8;
8554 else
8556 int needed_intregs, needed_sseregs;
8558 return examine_argument (mode, type, 1,
8559 &needed_intregs, &needed_sseregs);
8562 else
8564 if (mode == BLKmode)
8565 return true;
8567 size = int_size_in_bytes (type);
8569 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8570 return false;
8572 if (VECTOR_MODE_P (mode) || mode == TImode)
8574 /* User-created vectors small enough to fit in EAX. */
8575 if (size < 8)
8576 return false;
8578 /* Unless ABI prescibes otherwise,
8579 MMX/3dNow values are returned in MM0 if available. */
8581 if (size == 8)
8582 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8584 /* SSE values are returned in XMM0 if available. */
8585 if (size == 16)
8586 return !TARGET_SSE;
8588 /* AVX values are returned in YMM0 if available. */
8589 if (size == 32)
8590 return !TARGET_AVX;
8592 /* AVX512F values are returned in ZMM0 if available. */
8593 if (size == 64)
8594 return !TARGET_AVX512F;
8597 if (mode == XFmode)
8598 return false;
8600 if (size > 12)
8601 return true;
8603 /* OImode shouldn't be used directly. */
8604 gcc_assert (mode != OImode);
8606 return false;
8608 #endif
8612 /* Create the va_list data type. */
8614 /* Returns the calling convention specific va_list date type.
8615 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8617 static tree
8618 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8620 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8622 /* For i386 we use plain pointer to argument area. */
8623 if (!TARGET_64BIT || abi == MS_ABI)
8624 return build_pointer_type (char_type_node);
8626 record = lang_hooks.types.make_type (RECORD_TYPE);
8627 type_decl = build_decl (BUILTINS_LOCATION,
8628 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8630 f_gpr = build_decl (BUILTINS_LOCATION,
8631 FIELD_DECL, get_identifier ("gp_offset"),
8632 unsigned_type_node);
8633 f_fpr = build_decl (BUILTINS_LOCATION,
8634 FIELD_DECL, get_identifier ("fp_offset"),
8635 unsigned_type_node);
8636 f_ovf = build_decl (BUILTINS_LOCATION,
8637 FIELD_DECL, get_identifier ("overflow_arg_area"),
8638 ptr_type_node);
8639 f_sav = build_decl (BUILTINS_LOCATION,
8640 FIELD_DECL, get_identifier ("reg_save_area"),
8641 ptr_type_node);
8643 va_list_gpr_counter_field = f_gpr;
8644 va_list_fpr_counter_field = f_fpr;
8646 DECL_FIELD_CONTEXT (f_gpr) = record;
8647 DECL_FIELD_CONTEXT (f_fpr) = record;
8648 DECL_FIELD_CONTEXT (f_ovf) = record;
8649 DECL_FIELD_CONTEXT (f_sav) = record;
8651 TYPE_STUB_DECL (record) = type_decl;
8652 TYPE_NAME (record) = type_decl;
8653 TYPE_FIELDS (record) = f_gpr;
8654 DECL_CHAIN (f_gpr) = f_fpr;
8655 DECL_CHAIN (f_fpr) = f_ovf;
8656 DECL_CHAIN (f_ovf) = f_sav;
8658 layout_type (record);
8660 /* The correct type is an array type of one element. */
8661 return build_array_type (record, build_index_type (size_zero_node));
8664 /* Setup the builtin va_list data type and for 64-bit the additional
8665 calling convention specific va_list data types. */
8667 static tree
8668 ix86_build_builtin_va_list (void)
8670 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8672 /* Initialize abi specific va_list builtin types. */
8673 if (TARGET_64BIT)
8675 tree t;
8676 if (ix86_abi == MS_ABI)
8678 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8679 if (TREE_CODE (t) != RECORD_TYPE)
8680 t = build_variant_type_copy (t);
8681 sysv_va_list_type_node = t;
8683 else
8685 t = ret;
8686 if (TREE_CODE (t) != RECORD_TYPE)
8687 t = build_variant_type_copy (t);
8688 sysv_va_list_type_node = t;
8690 if (ix86_abi != MS_ABI)
8692 t = ix86_build_builtin_va_list_abi (MS_ABI);
8693 if (TREE_CODE (t) != RECORD_TYPE)
8694 t = build_variant_type_copy (t);
8695 ms_va_list_type_node = t;
8697 else
8699 t = ret;
8700 if (TREE_CODE (t) != RECORD_TYPE)
8701 t = build_variant_type_copy (t);
8702 ms_va_list_type_node = t;
8706 return ret;
8709 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8711 static void
8712 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8714 rtx save_area, mem;
8715 alias_set_type set;
8716 int i, max;
8718 /* GPR size of varargs save area. */
8719 if (cfun->va_list_gpr_size)
8720 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8721 else
8722 ix86_varargs_gpr_size = 0;
8724 /* FPR size of varargs save area. We don't need it if we don't pass
8725 anything in SSE registers. */
8726 if (TARGET_SSE && cfun->va_list_fpr_size)
8727 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8728 else
8729 ix86_varargs_fpr_size = 0;
8731 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8732 return;
8734 save_area = frame_pointer_rtx;
8735 set = get_varargs_alias_set ();
8737 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8738 if (max > X86_64_REGPARM_MAX)
8739 max = X86_64_REGPARM_MAX;
8741 for (i = cum->regno; i < max; i++)
8743 mem = gen_rtx_MEM (word_mode,
8744 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8745 MEM_NOTRAP_P (mem) = 1;
8746 set_mem_alias_set (mem, set);
8747 emit_move_insn (mem,
8748 gen_rtx_REG (word_mode,
8749 x86_64_int_parameter_registers[i]));
8752 if (ix86_varargs_fpr_size)
8754 machine_mode smode;
8755 rtx_code_label *label;
8756 rtx test;
8758 /* Now emit code to save SSE registers. The AX parameter contains number
8759 of SSE parameter registers used to call this function, though all we
8760 actually check here is the zero/non-zero status. */
8762 label = gen_label_rtx ();
8763 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8764 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8765 label));
8767 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8768 we used movdqa (i.e. TImode) instead? Perhaps even better would
8769 be if we could determine the real mode of the data, via a hook
8770 into pass_stdarg. Ignore all that for now. */
8771 smode = V4SFmode;
8772 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8773 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8775 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8776 if (max > X86_64_SSE_REGPARM_MAX)
8777 max = X86_64_SSE_REGPARM_MAX;
8779 for (i = cum->sse_regno; i < max; ++i)
8781 mem = plus_constant (Pmode, save_area,
8782 i * 16 + ix86_varargs_gpr_size);
8783 mem = gen_rtx_MEM (smode, mem);
8784 MEM_NOTRAP_P (mem) = 1;
8785 set_mem_alias_set (mem, set);
8786 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8788 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8791 emit_label (label);
8795 static void
8796 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8798 alias_set_type set = get_varargs_alias_set ();
8799 int i;
8801 /* Reset to zero, as there might be a sysv vaarg used
8802 before. */
8803 ix86_varargs_gpr_size = 0;
8804 ix86_varargs_fpr_size = 0;
8806 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8808 rtx reg, mem;
8810 mem = gen_rtx_MEM (Pmode,
8811 plus_constant (Pmode, virtual_incoming_args_rtx,
8812 i * UNITS_PER_WORD));
8813 MEM_NOTRAP_P (mem) = 1;
8814 set_mem_alias_set (mem, set);
8816 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8817 emit_move_insn (mem, reg);
8821 static void
8822 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
8823 tree type, int *, int no_rtl)
8825 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8826 CUMULATIVE_ARGS next_cum;
8827 tree fntype;
8829 /* This argument doesn't appear to be used anymore. Which is good,
8830 because the old code here didn't suppress rtl generation. */
8831 gcc_assert (!no_rtl);
8833 if (!TARGET_64BIT)
8834 return;
8836 fntype = TREE_TYPE (current_function_decl);
8838 /* For varargs, we do not want to skip the dummy va_dcl argument.
8839 For stdargs, we do want to skip the last named argument. */
8840 next_cum = *cum;
8841 if (stdarg_p (fntype))
8842 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8843 true);
8845 if (cum->call_abi == MS_ABI)
8846 setup_incoming_varargs_ms_64 (&next_cum);
8847 else
8848 setup_incoming_varargs_64 (&next_cum);
8851 static void
8852 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
8853 enum machine_mode mode,
8854 tree type,
8855 int *pretend_size ATTRIBUTE_UNUSED,
8856 int no_rtl)
8858 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8859 CUMULATIVE_ARGS next_cum;
8860 tree fntype;
8861 rtx save_area;
8862 int bnd_reg, i, max;
8864 gcc_assert (!no_rtl);
8866 /* Do nothing if we use plain pointer to argument area. */
8867 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
8868 return;
8870 fntype = TREE_TYPE (current_function_decl);
8872 /* For varargs, we do not want to skip the dummy va_dcl argument.
8873 For stdargs, we do want to skip the last named argument. */
8874 next_cum = *cum;
8875 if (stdarg_p (fntype))
8876 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8877 true);
8878 save_area = frame_pointer_rtx;
8880 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8881 if (max > X86_64_REGPARM_MAX)
8882 max = X86_64_REGPARM_MAX;
8884 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
8885 if (chkp_function_instrumented_p (current_function_decl))
8886 for (i = cum->regno; i < max; i++)
8888 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
8889 rtx reg = gen_rtx_REG (DImode,
8890 x86_64_int_parameter_registers[i]);
8891 rtx ptr = reg;
8892 rtx bounds;
8894 if (bnd_reg <= LAST_BND_REG)
8895 bounds = gen_rtx_REG (BNDmode, bnd_reg);
8896 else
8898 rtx ldx_addr =
8899 plus_constant (Pmode, arg_pointer_rtx,
8900 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
8901 bounds = gen_reg_rtx (BNDmode);
8902 emit_insn (BNDmode == BND64mode
8903 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
8904 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
8907 emit_insn (BNDmode == BND64mode
8908 ? gen_bnd64_stx (addr, ptr, bounds)
8909 : gen_bnd32_stx (addr, ptr, bounds));
8911 bnd_reg++;
8916 /* Checks if TYPE is of kind va_list char *. */
8918 static bool
8919 is_va_list_char_pointer (tree type)
8921 tree canonic;
8923 /* For 32-bit it is always true. */
8924 if (!TARGET_64BIT)
8925 return true;
8926 canonic = ix86_canonical_va_list_type (type);
8927 return (canonic == ms_va_list_type_node
8928 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8931 /* Implement va_start. */
8933 static void
8934 ix86_va_start (tree valist, rtx nextarg)
8936 HOST_WIDE_INT words, n_gpr, n_fpr;
8937 tree f_gpr, f_fpr, f_ovf, f_sav;
8938 tree gpr, fpr, ovf, sav, t;
8939 tree type;
8940 rtx ovf_rtx;
8942 if (flag_split_stack
8943 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8945 unsigned int scratch_regno;
8947 /* When we are splitting the stack, we can't refer to the stack
8948 arguments using internal_arg_pointer, because they may be on
8949 the old stack. The split stack prologue will arrange to
8950 leave a pointer to the old stack arguments in a scratch
8951 register, which we here copy to a pseudo-register. The split
8952 stack prologue can't set the pseudo-register directly because
8953 it (the prologue) runs before any registers have been saved. */
8955 scratch_regno = split_stack_prologue_scratch_regno ();
8956 if (scratch_regno != INVALID_REGNUM)
8958 rtx reg;
8959 rtx_insn *seq;
8961 reg = gen_reg_rtx (Pmode);
8962 cfun->machine->split_stack_varargs_pointer = reg;
8964 start_sequence ();
8965 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8966 seq = get_insns ();
8967 end_sequence ();
8969 push_topmost_sequence ();
8970 emit_insn_after (seq, entry_of_function ());
8971 pop_topmost_sequence ();
8975 /* Only 64bit target needs something special. */
8976 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8978 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8979 std_expand_builtin_va_start (valist, nextarg);
8980 else
8982 rtx va_r, next;
8984 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8985 next = expand_binop (ptr_mode, add_optab,
8986 cfun->machine->split_stack_varargs_pointer,
8987 crtl->args.arg_offset_rtx,
8988 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8989 convert_move (va_r, next, 0);
8991 /* Store zero bounds for va_list. */
8992 if (chkp_function_instrumented_p (current_function_decl))
8993 chkp_expand_bounds_reset_for_mem (valist,
8994 make_tree (TREE_TYPE (valist),
8995 next));
8998 return;
9001 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9002 f_fpr = DECL_CHAIN (f_gpr);
9003 f_ovf = DECL_CHAIN (f_fpr);
9004 f_sav = DECL_CHAIN (f_ovf);
9006 valist = build_simple_mem_ref (valist);
9007 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
9008 /* The following should be folded into the MEM_REF offset. */
9009 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
9010 f_gpr, NULL_TREE);
9011 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
9012 f_fpr, NULL_TREE);
9013 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
9014 f_ovf, NULL_TREE);
9015 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
9016 f_sav, NULL_TREE);
9018 /* Count number of gp and fp argument registers used. */
9019 words = crtl->args.info.words;
9020 n_gpr = crtl->args.info.regno;
9021 n_fpr = crtl->args.info.sse_regno;
9023 if (cfun->va_list_gpr_size)
9025 type = TREE_TYPE (gpr);
9026 t = build2 (MODIFY_EXPR, type,
9027 gpr, build_int_cst (type, n_gpr * 8));
9028 TREE_SIDE_EFFECTS (t) = 1;
9029 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9032 if (TARGET_SSE && cfun->va_list_fpr_size)
9034 type = TREE_TYPE (fpr);
9035 t = build2 (MODIFY_EXPR, type, fpr,
9036 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
9037 TREE_SIDE_EFFECTS (t) = 1;
9038 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9041 /* Find the overflow area. */
9042 type = TREE_TYPE (ovf);
9043 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9044 ovf_rtx = crtl->args.internal_arg_pointer;
9045 else
9046 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
9047 t = make_tree (type, ovf_rtx);
9048 if (words != 0)
9049 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
9051 /* Store zero bounds for overflow area pointer. */
9052 if (chkp_function_instrumented_p (current_function_decl))
9053 chkp_expand_bounds_reset_for_mem (ovf, t);
9055 t = build2 (MODIFY_EXPR, type, ovf, t);
9056 TREE_SIDE_EFFECTS (t) = 1;
9057 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9059 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
9061 /* Find the register save area.
9062 Prologue of the function save it right above stack frame. */
9063 type = TREE_TYPE (sav);
9064 t = make_tree (type, frame_pointer_rtx);
9065 if (!ix86_varargs_gpr_size)
9066 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
9068 /* Store zero bounds for save area pointer. */
9069 if (chkp_function_instrumented_p (current_function_decl))
9070 chkp_expand_bounds_reset_for_mem (sav, t);
9072 t = build2 (MODIFY_EXPR, type, sav, t);
9073 TREE_SIDE_EFFECTS (t) = 1;
9074 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9078 /* Implement va_arg. */
9080 static tree
9081 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9082 gimple_seq *post_p)
9084 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
9085 tree f_gpr, f_fpr, f_ovf, f_sav;
9086 tree gpr, fpr, ovf, sav, t;
9087 int size, rsize;
9088 tree lab_false, lab_over = NULL_TREE;
9089 tree addr, t2;
9090 rtx container;
9091 int indirect_p = 0;
9092 tree ptrtype;
9093 machine_mode nat_mode;
9094 unsigned int arg_boundary;
9096 /* Only 64bit target needs something special. */
9097 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
9098 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
9100 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9101 f_fpr = DECL_CHAIN (f_gpr);
9102 f_ovf = DECL_CHAIN (f_fpr);
9103 f_sav = DECL_CHAIN (f_ovf);
9105 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
9106 valist, f_gpr, NULL_TREE);
9108 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9109 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9110 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9112 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
9113 if (indirect_p)
9114 type = build_pointer_type (type);
9115 size = int_size_in_bytes (type);
9116 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
9118 nat_mode = type_natural_mode (type, NULL, false);
9119 switch (nat_mode)
9121 case V8SFmode:
9122 case V8SImode:
9123 case V32QImode:
9124 case V16HImode:
9125 case V4DFmode:
9126 case V4DImode:
9127 case V16SFmode:
9128 case V16SImode:
9129 case V64QImode:
9130 case V32HImode:
9131 case V8DFmode:
9132 case V8DImode:
9133 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9134 if (!TARGET_64BIT_MS_ABI)
9136 container = NULL;
9137 break;
9140 default:
9141 container = construct_container (nat_mode, TYPE_MODE (type),
9142 type, 0, X86_64_REGPARM_MAX,
9143 X86_64_SSE_REGPARM_MAX, intreg,
9145 break;
9148 /* Pull the value out of the saved registers. */
9150 addr = create_tmp_var (ptr_type_node, "addr");
9152 if (container)
9154 int needed_intregs, needed_sseregs;
9155 bool need_temp;
9156 tree int_addr, sse_addr;
9158 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9159 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9161 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
9163 need_temp = (!REG_P (container)
9164 && ((needed_intregs && TYPE_ALIGN (type) > 64)
9165 || TYPE_ALIGN (type) > 128));
9167 /* In case we are passing structure, verify that it is consecutive block
9168 on the register save area. If not we need to do moves. */
9169 if (!need_temp && !REG_P (container))
9171 /* Verify that all registers are strictly consecutive */
9172 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
9174 int i;
9176 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9178 rtx slot = XVECEXP (container, 0, i);
9179 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
9180 || INTVAL (XEXP (slot, 1)) != i * 16)
9181 need_temp = true;
9184 else
9186 int i;
9188 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9190 rtx slot = XVECEXP (container, 0, i);
9191 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9192 || INTVAL (XEXP (slot, 1)) != i * 8)
9193 need_temp = true;
9197 if (!need_temp)
9199 int_addr = addr;
9200 sse_addr = addr;
9202 else
9204 int_addr = create_tmp_var (ptr_type_node, "int_addr");
9205 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9208 /* First ensure that we fit completely in registers. */
9209 if (needed_intregs)
9211 t = build_int_cst (TREE_TYPE (gpr),
9212 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9213 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9214 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9215 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9216 gimplify_and_add (t, pre_p);
9218 if (needed_sseregs)
9220 t = build_int_cst (TREE_TYPE (fpr),
9221 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9222 + X86_64_REGPARM_MAX * 8);
9223 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9224 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9225 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9226 gimplify_and_add (t, pre_p);
9229 /* Compute index to start of area used for integer regs. */
9230 if (needed_intregs)
9232 /* int_addr = gpr + sav; */
9233 t = fold_build_pointer_plus (sav, gpr);
9234 gimplify_assign (int_addr, t, pre_p);
9236 if (needed_sseregs)
9238 /* sse_addr = fpr + sav; */
9239 t = fold_build_pointer_plus (sav, fpr);
9240 gimplify_assign (sse_addr, t, pre_p);
9242 if (need_temp)
9244 int i, prev_size = 0;
9245 tree temp = create_tmp_var (type, "va_arg_tmp");
9247 /* addr = &temp; */
9248 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9249 gimplify_assign (addr, t, pre_p);
9251 for (i = 0; i < XVECLEN (container, 0); i++)
9253 rtx slot = XVECEXP (container, 0, i);
9254 rtx reg = XEXP (slot, 0);
9255 machine_mode mode = GET_MODE (reg);
9256 tree piece_type;
9257 tree addr_type;
9258 tree daddr_type;
9259 tree src_addr, src;
9260 int src_offset;
9261 tree dest_addr, dest;
9262 int cur_size = GET_MODE_SIZE (mode);
9264 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9265 prev_size = INTVAL (XEXP (slot, 1));
9266 if (prev_size + cur_size > size)
9268 cur_size = size - prev_size;
9269 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
9270 if (mode == BLKmode)
9271 mode = QImode;
9273 piece_type = lang_hooks.types.type_for_mode (mode, 1);
9274 if (mode == GET_MODE (reg))
9275 addr_type = build_pointer_type (piece_type);
9276 else
9277 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9278 true);
9279 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9280 true);
9282 if (SSE_REGNO_P (REGNO (reg)))
9284 src_addr = sse_addr;
9285 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9287 else
9289 src_addr = int_addr;
9290 src_offset = REGNO (reg) * 8;
9292 src_addr = fold_convert (addr_type, src_addr);
9293 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9295 dest_addr = fold_convert (daddr_type, addr);
9296 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9297 if (cur_size == GET_MODE_SIZE (mode))
9299 src = build_va_arg_indirect_ref (src_addr);
9300 dest = build_va_arg_indirect_ref (dest_addr);
9302 gimplify_assign (dest, src, pre_p);
9304 else
9306 tree copy
9307 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9308 3, dest_addr, src_addr,
9309 size_int (cur_size));
9310 gimplify_and_add (copy, pre_p);
9312 prev_size += cur_size;
9316 if (needed_intregs)
9318 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9319 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9320 gimplify_assign (gpr, t, pre_p);
9323 if (needed_sseregs)
9325 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9326 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9327 gimplify_assign (unshare_expr (fpr), t, pre_p);
9330 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9332 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9335 /* ... otherwise out of the overflow area. */
9337 /* When we align parameter on stack for caller, if the parameter
9338 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9339 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
9340 here with caller. */
9341 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9342 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9343 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9345 /* Care for on-stack alignment if needed. */
9346 if (arg_boundary <= 64 || size == 0)
9347 t = ovf;
9348 else
9350 HOST_WIDE_INT align = arg_boundary / 8;
9351 t = fold_build_pointer_plus_hwi (ovf, align - 1);
9352 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9353 build_int_cst (TREE_TYPE (t), -align));
9356 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9357 gimplify_assign (addr, t, pre_p);
9359 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9360 gimplify_assign (unshare_expr (ovf), t, pre_p);
9362 if (container)
9363 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9365 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9366 addr = fold_convert (ptrtype, addr);
9368 if (indirect_p)
9369 addr = build_va_arg_indirect_ref (addr);
9370 return build_va_arg_indirect_ref (addr);
9373 /* Return true if OPNUM's MEM should be matched
9374 in movabs* patterns. */
9376 bool
9377 ix86_check_movabs (rtx insn, int opnum)
9379 rtx set, mem;
9381 set = PATTERN (insn);
9382 if (GET_CODE (set) == PARALLEL)
9383 set = XVECEXP (set, 0, 0);
9384 gcc_assert (GET_CODE (set) == SET);
9385 mem = XEXP (set, opnum);
9386 while (GET_CODE (mem) == SUBREG)
9387 mem = SUBREG_REG (mem);
9388 gcc_assert (MEM_P (mem));
9389 return volatile_ok || !MEM_VOLATILE_P (mem);
9392 /* Initialize the table of extra 80387 mathematical constants. */
9394 static void
9395 init_ext_80387_constants (void)
9397 static const char * cst[5] =
9399 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
9400 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
9401 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
9402 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
9403 "3.1415926535897932385128089594061862044", /* 4: fldpi */
9405 int i;
9407 for (i = 0; i < 5; i++)
9409 real_from_string (&ext_80387_constants_table[i], cst[i]);
9410 /* Ensure each constant is rounded to XFmode precision. */
9411 real_convert (&ext_80387_constants_table[i],
9412 XFmode, &ext_80387_constants_table[i]);
9415 ext_80387_constants_init = 1;
9418 /* Return non-zero if the constant is something that
9419 can be loaded with a special instruction. */
9422 standard_80387_constant_p (rtx x)
9424 machine_mode mode = GET_MODE (x);
9426 REAL_VALUE_TYPE r;
9428 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
9429 return -1;
9431 if (x == CONST0_RTX (mode))
9432 return 1;
9433 if (x == CONST1_RTX (mode))
9434 return 2;
9436 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9438 /* For XFmode constants, try to find a special 80387 instruction when
9439 optimizing for size or on those CPUs that benefit from them. */
9440 if (mode == XFmode
9441 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
9443 int i;
9445 if (! ext_80387_constants_init)
9446 init_ext_80387_constants ();
9448 for (i = 0; i < 5; i++)
9449 if (real_identical (&r, &ext_80387_constants_table[i]))
9450 return i + 3;
9453 /* Load of the constant -0.0 or -1.0 will be split as
9454 fldz;fchs or fld1;fchs sequence. */
9455 if (real_isnegzero (&r))
9456 return 8;
9457 if (real_identical (&r, &dconstm1))
9458 return 9;
9460 return 0;
9463 /* Return the opcode of the special instruction to be used to load
9464 the constant X. */
9466 const char *
9467 standard_80387_constant_opcode (rtx x)
9469 switch (standard_80387_constant_p (x))
9471 case 1:
9472 return "fldz";
9473 case 2:
9474 return "fld1";
9475 case 3:
9476 return "fldlg2";
9477 case 4:
9478 return "fldln2";
9479 case 5:
9480 return "fldl2e";
9481 case 6:
9482 return "fldl2t";
9483 case 7:
9484 return "fldpi";
9485 case 8:
9486 case 9:
9487 return "#";
9488 default:
9489 gcc_unreachable ();
9493 /* Return the CONST_DOUBLE representing the 80387 constant that is
9494 loaded by the specified special instruction. The argument IDX
9495 matches the return value from standard_80387_constant_p. */
9498 standard_80387_constant_rtx (int idx)
9500 int i;
9502 if (! ext_80387_constants_init)
9503 init_ext_80387_constants ();
9505 switch (idx)
9507 case 3:
9508 case 4:
9509 case 5:
9510 case 6:
9511 case 7:
9512 i = idx - 3;
9513 break;
9515 default:
9516 gcc_unreachable ();
9519 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9520 XFmode);
9523 /* Return 1 if X is all 0s and 2 if x is all 1s
9524 in supported SSE/AVX vector mode. */
9527 standard_sse_constant_p (rtx x)
9529 machine_mode mode;
9531 if (!TARGET_SSE)
9532 return 0;
9534 mode = GET_MODE (x);
9536 if (x == const0_rtx || x == CONST0_RTX (mode))
9537 return 1;
9538 if (vector_all_ones_operand (x, mode))
9539 switch (mode)
9541 case V16QImode:
9542 case V8HImode:
9543 case V4SImode:
9544 case V2DImode:
9545 if (TARGET_SSE2)
9546 return 2;
9547 case V32QImode:
9548 case V16HImode:
9549 case V8SImode:
9550 case V4DImode:
9551 if (TARGET_AVX2)
9552 return 2;
9553 case V64QImode:
9554 case V32HImode:
9555 case V16SImode:
9556 case V8DImode:
9557 if (TARGET_AVX512F)
9558 return 2;
9559 default:
9560 break;
9563 return 0;
9566 /* Return the opcode of the special instruction to be used to load
9567 the constant X. */
9569 const char *
9570 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9572 switch (standard_sse_constant_p (x))
9574 case 1:
9575 switch (get_attr_mode (insn))
9577 case MODE_XI:
9578 return "vpxord\t%g0, %g0, %g0";
9579 case MODE_V16SF:
9580 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9581 : "vpxord\t%g0, %g0, %g0";
9582 case MODE_V8DF:
9583 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9584 : "vpxorq\t%g0, %g0, %g0";
9585 case MODE_TI:
9586 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9587 : "%vpxor\t%0, %d0";
9588 case MODE_V2DF:
9589 return "%vxorpd\t%0, %d0";
9590 case MODE_V4SF:
9591 return "%vxorps\t%0, %d0";
9593 case MODE_OI:
9594 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9595 : "vpxor\t%x0, %x0, %x0";
9596 case MODE_V4DF:
9597 return "vxorpd\t%x0, %x0, %x0";
9598 case MODE_V8SF:
9599 return "vxorps\t%x0, %x0, %x0";
9601 default:
9602 break;
9605 case 2:
9606 if (TARGET_AVX512VL
9607 || get_attr_mode (insn) == MODE_XI
9608 || get_attr_mode (insn) == MODE_V8DF
9609 || get_attr_mode (insn) == MODE_V16SF)
9610 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9611 if (TARGET_AVX)
9612 return "vpcmpeqd\t%0, %0, %0";
9613 else
9614 return "pcmpeqd\t%0, %0";
9616 default:
9617 break;
9619 gcc_unreachable ();
9622 /* Returns true if OP contains a symbol reference */
9624 bool
9625 symbolic_reference_mentioned_p (rtx op)
9627 const char *fmt;
9628 int i;
9630 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9631 return true;
9633 fmt = GET_RTX_FORMAT (GET_CODE (op));
9634 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9636 if (fmt[i] == 'E')
9638 int j;
9640 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9641 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9642 return true;
9645 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9646 return true;
9649 return false;
9652 /* Return true if it is appropriate to emit `ret' instructions in the
9653 body of a function. Do this only if the epilogue is simple, needing a
9654 couple of insns. Prior to reloading, we can't tell how many registers
9655 must be saved, so return false then. Return false if there is no frame
9656 marker to de-allocate. */
9658 bool
9659 ix86_can_use_return_insn_p (void)
9661 struct ix86_frame frame;
9663 if (! reload_completed || frame_pointer_needed)
9664 return 0;
9666 /* Don't allow more than 32k pop, since that's all we can do
9667 with one instruction. */
9668 if (crtl->args.pops_args && crtl->args.size >= 32768)
9669 return 0;
9671 ix86_compute_frame_layout (&frame);
9672 return (frame.stack_pointer_offset == UNITS_PER_WORD
9673 && (frame.nregs + frame.nsseregs) == 0);
9676 /* Value should be nonzero if functions must have frame pointers.
9677 Zero means the frame pointer need not be set up (and parms may
9678 be accessed via the stack pointer) in functions that seem suitable. */
9680 static bool
9681 ix86_frame_pointer_required (void)
9683 /* If we accessed previous frames, then the generated code expects
9684 to be able to access the saved ebp value in our frame. */
9685 if (cfun->machine->accesses_prev_frame)
9686 return true;
9688 /* Several x86 os'es need a frame pointer for other reasons,
9689 usually pertaining to setjmp. */
9690 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9691 return true;
9693 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9694 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9695 return true;
9697 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9698 allocation is 4GB. */
9699 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9700 return true;
9702 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9703 turns off the frame pointer by default. Turn it back on now if
9704 we've not got a leaf function. */
9705 if (TARGET_OMIT_LEAF_FRAME_POINTER
9706 && (!crtl->is_leaf
9707 || ix86_current_function_calls_tls_descriptor))
9708 return true;
9710 if (crtl->profile && !flag_fentry)
9711 return true;
9713 return false;
9716 /* Record that the current function accesses previous call frames. */
9718 void
9719 ix86_setup_frame_addresses (void)
9721 cfun->machine->accesses_prev_frame = 1;
9724 #ifndef USE_HIDDEN_LINKONCE
9725 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9726 # define USE_HIDDEN_LINKONCE 1
9727 # else
9728 # define USE_HIDDEN_LINKONCE 0
9729 # endif
9730 #endif
9732 static int pic_labels_used;
9734 /* Fills in the label name that should be used for a pc thunk for
9735 the given register. */
9737 static void
9738 get_pc_thunk_name (char name[32], unsigned int regno)
9740 gcc_assert (!TARGET_64BIT);
9742 if (USE_HIDDEN_LINKONCE)
9743 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9744 else
9745 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9749 /* This function generates code for -fpic that loads %ebx with
9750 the return address of the caller and then returns. */
9752 static void
9753 ix86_code_end (void)
9755 rtx xops[2];
9756 int regno;
9758 for (regno = AX_REG; regno <= SP_REG; regno++)
9760 char name[32];
9761 tree decl;
9763 if (!(pic_labels_used & (1 << regno)))
9764 continue;
9766 get_pc_thunk_name (name, regno);
9768 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9769 get_identifier (name),
9770 build_function_type_list (void_type_node, NULL_TREE));
9771 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9772 NULL_TREE, void_type_node);
9773 TREE_PUBLIC (decl) = 1;
9774 TREE_STATIC (decl) = 1;
9775 DECL_IGNORED_P (decl) = 1;
9777 #if TARGET_MACHO
9778 if (TARGET_MACHO)
9780 switch_to_section (darwin_sections[text_coal_section]);
9781 fputs ("\t.weak_definition\t", asm_out_file);
9782 assemble_name (asm_out_file, name);
9783 fputs ("\n\t.private_extern\t", asm_out_file);
9784 assemble_name (asm_out_file, name);
9785 putc ('\n', asm_out_file);
9786 ASM_OUTPUT_LABEL (asm_out_file, name);
9787 DECL_WEAK (decl) = 1;
9789 else
9790 #endif
9791 if (USE_HIDDEN_LINKONCE)
9793 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9795 targetm.asm_out.unique_section (decl, 0);
9796 switch_to_section (get_named_section (decl, NULL, 0));
9798 targetm.asm_out.globalize_label (asm_out_file, name);
9799 fputs ("\t.hidden\t", asm_out_file);
9800 assemble_name (asm_out_file, name);
9801 putc ('\n', asm_out_file);
9802 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9804 else
9806 switch_to_section (text_section);
9807 ASM_OUTPUT_LABEL (asm_out_file, name);
9810 DECL_INITIAL (decl) = make_node (BLOCK);
9811 current_function_decl = decl;
9812 init_function_start (decl);
9813 first_function_block_is_cold = false;
9814 /* Make sure unwind info is emitted for the thunk if needed. */
9815 final_start_function (emit_barrier (), asm_out_file, 1);
9817 /* Pad stack IP move with 4 instructions (two NOPs count
9818 as one instruction). */
9819 if (TARGET_PAD_SHORT_FUNCTION)
9821 int i = 8;
9823 while (i--)
9824 fputs ("\tnop\n", asm_out_file);
9827 xops[0] = gen_rtx_REG (Pmode, regno);
9828 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9829 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9830 output_asm_insn ("%!ret", NULL);
9831 final_end_function ();
9832 init_insn_lengths ();
9833 free_after_compilation (cfun);
9834 set_cfun (NULL);
9835 current_function_decl = NULL;
9838 if (flag_split_stack)
9839 file_end_indicate_split_stack ();
9842 /* Emit code for the SET_GOT patterns. */
9844 const char *
9845 output_set_got (rtx dest, rtx label)
9847 rtx xops[3];
9849 xops[0] = dest;
9851 if (TARGET_VXWORKS_RTP && flag_pic)
9853 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9854 xops[2] = gen_rtx_MEM (Pmode,
9855 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9856 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9858 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9859 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9860 an unadorned address. */
9861 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9862 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9863 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9864 return "";
9867 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9869 if (!flag_pic)
9871 if (TARGET_MACHO)
9872 /* We don't need a pic base, we're not producing pic. */
9873 gcc_unreachable ();
9875 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9876 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9877 targetm.asm_out.internal_label (asm_out_file, "L",
9878 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9880 else
9882 char name[32];
9883 get_pc_thunk_name (name, REGNO (dest));
9884 pic_labels_used |= 1 << REGNO (dest);
9886 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9887 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9888 output_asm_insn ("%!call\t%X2", xops);
9890 #if TARGET_MACHO
9891 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9892 This is what will be referenced by the Mach-O PIC subsystem. */
9893 if (machopic_should_output_picbase_label () || !label)
9894 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9896 /* When we are restoring the pic base at the site of a nonlocal label,
9897 and we decided to emit the pic base above, we will still output a
9898 local label used for calculating the correction offset (even though
9899 the offset will be 0 in that case). */
9900 if (label)
9901 targetm.asm_out.internal_label (asm_out_file, "L",
9902 CODE_LABEL_NUMBER (label));
9903 #endif
9906 if (!TARGET_MACHO)
9907 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9909 return "";
9912 /* Generate an "push" pattern for input ARG. */
9914 static rtx
9915 gen_push (rtx arg)
9917 struct machine_function *m = cfun->machine;
9919 if (m->fs.cfa_reg == stack_pointer_rtx)
9920 m->fs.cfa_offset += UNITS_PER_WORD;
9921 m->fs.sp_offset += UNITS_PER_WORD;
9923 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9924 arg = gen_rtx_REG (word_mode, REGNO (arg));
9926 return gen_rtx_SET (gen_rtx_MEM (word_mode,
9927 gen_rtx_PRE_DEC (Pmode,
9928 stack_pointer_rtx)),
9929 arg);
9932 /* Generate an "pop" pattern for input ARG. */
9934 static rtx
9935 gen_pop (rtx arg)
9937 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9938 arg = gen_rtx_REG (word_mode, REGNO (arg));
9940 return gen_rtx_SET (arg,
9941 gen_rtx_MEM (word_mode,
9942 gen_rtx_POST_INC (Pmode,
9943 stack_pointer_rtx)));
9946 /* Return >= 0 if there is an unused call-clobbered register available
9947 for the entire function. */
9949 static unsigned int
9950 ix86_select_alt_pic_regnum (void)
9952 if (ix86_use_pseudo_pic_reg ())
9953 return INVALID_REGNUM;
9955 if (crtl->is_leaf
9956 && !crtl->profile
9957 && !ix86_current_function_calls_tls_descriptor)
9959 int i, drap;
9960 /* Can't use the same register for both PIC and DRAP. */
9961 if (crtl->drap_reg)
9962 drap = REGNO (crtl->drap_reg);
9963 else
9964 drap = -1;
9965 for (i = 2; i >= 0; --i)
9966 if (i != drap && !df_regs_ever_live_p (i))
9967 return i;
9970 return INVALID_REGNUM;
9973 /* Return TRUE if we need to save REGNO. */
9975 static bool
9976 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9978 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
9979 && pic_offset_table_rtx)
9981 if (ix86_use_pseudo_pic_reg ())
9983 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
9984 _mcount in prologue. */
9985 if (!TARGET_64BIT && flag_pic && crtl->profile)
9986 return true;
9988 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9989 || crtl->profile
9990 || crtl->calls_eh_return
9991 || crtl->uses_const_pool
9992 || cfun->has_nonlocal_label)
9993 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9996 if (crtl->calls_eh_return && maybe_eh_return)
9998 unsigned i;
9999 for (i = 0; ; i++)
10001 unsigned test = EH_RETURN_DATA_REGNO (i);
10002 if (test == INVALID_REGNUM)
10003 break;
10004 if (test == regno)
10005 return true;
10009 if (crtl->drap_reg
10010 && regno == REGNO (crtl->drap_reg)
10011 && !cfun->machine->no_drap_save_restore)
10012 return true;
10014 return (df_regs_ever_live_p (regno)
10015 && !call_used_regs[regno]
10016 && !fixed_regs[regno]
10017 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
10020 /* Return number of saved general prupose registers. */
10022 static int
10023 ix86_nsaved_regs (void)
10025 int nregs = 0;
10026 int regno;
10028 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10029 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10030 nregs ++;
10031 return nregs;
10034 /* Return number of saved SSE registrers. */
10036 static int
10037 ix86_nsaved_sseregs (void)
10039 int nregs = 0;
10040 int regno;
10042 if (!TARGET_64BIT_MS_ABI)
10043 return 0;
10044 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10045 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10046 nregs ++;
10047 return nregs;
10050 /* Given FROM and TO register numbers, say whether this elimination is
10051 allowed. If stack alignment is needed, we can only replace argument
10052 pointer with hard frame pointer, or replace frame pointer with stack
10053 pointer. Otherwise, frame pointer elimination is automatically
10054 handled and all other eliminations are valid. */
10056 static bool
10057 ix86_can_eliminate (const int from, const int to)
10059 if (stack_realign_fp)
10060 return ((from == ARG_POINTER_REGNUM
10061 && to == HARD_FRAME_POINTER_REGNUM)
10062 || (from == FRAME_POINTER_REGNUM
10063 && to == STACK_POINTER_REGNUM));
10064 else
10065 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
10068 /* Return the offset between two registers, one to be eliminated, and the other
10069 its replacement, at the start of a routine. */
10071 HOST_WIDE_INT
10072 ix86_initial_elimination_offset (int from, int to)
10074 struct ix86_frame frame;
10075 ix86_compute_frame_layout (&frame);
10077 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10078 return frame.hard_frame_pointer_offset;
10079 else if (from == FRAME_POINTER_REGNUM
10080 && to == HARD_FRAME_POINTER_REGNUM)
10081 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
10082 else
10084 gcc_assert (to == STACK_POINTER_REGNUM);
10086 if (from == ARG_POINTER_REGNUM)
10087 return frame.stack_pointer_offset;
10089 gcc_assert (from == FRAME_POINTER_REGNUM);
10090 return frame.stack_pointer_offset - frame.frame_pointer_offset;
10094 /* In a dynamically-aligned function, we can't know the offset from
10095 stack pointer to frame pointer, so we must ensure that setjmp
10096 eliminates fp against the hard fp (%ebp) rather than trying to
10097 index from %esp up to the top of the frame across a gap that is
10098 of unknown (at compile-time) size. */
10099 static rtx
10100 ix86_builtin_setjmp_frame_value (void)
10102 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
10105 /* When using -fsplit-stack, the allocation routines set a field in
10106 the TCB to the bottom of the stack plus this much space, measured
10107 in bytes. */
10109 #define SPLIT_STACK_AVAILABLE 256
10111 /* Fill structure ix86_frame about frame of currently computed function. */
10113 static void
10114 ix86_compute_frame_layout (struct ix86_frame *frame)
10116 unsigned HOST_WIDE_INT stack_alignment_needed;
10117 HOST_WIDE_INT offset;
10118 unsigned HOST_WIDE_INT preferred_alignment;
10119 HOST_WIDE_INT size = get_frame_size ();
10120 HOST_WIDE_INT to_allocate;
10122 frame->nregs = ix86_nsaved_regs ();
10123 frame->nsseregs = ix86_nsaved_sseregs ();
10125 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
10126 function prologues and leaf. */
10127 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
10128 && (!crtl->is_leaf || cfun->calls_alloca != 0
10129 || ix86_current_function_calls_tls_descriptor))
10131 crtl->preferred_stack_boundary = 128;
10132 crtl->stack_alignment_needed = 128;
10134 /* preferred_stack_boundary is never updated for call
10135 expanded from tls descriptor. Update it here. We don't update it in
10136 expand stage because according to the comments before
10137 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
10138 away. */
10139 else if (ix86_current_function_calls_tls_descriptor
10140 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
10142 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
10143 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
10144 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
10147 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
10148 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
10150 gcc_assert (!size || stack_alignment_needed);
10151 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
10152 gcc_assert (preferred_alignment <= stack_alignment_needed);
10154 /* For SEH we have to limit the amount of code movement into the prologue.
10155 At present we do this via a BLOCKAGE, at which point there's very little
10156 scheduling that can be done, which means that there's very little point
10157 in doing anything except PUSHs. */
10158 if (TARGET_SEH)
10159 cfun->machine->use_fast_prologue_epilogue = false;
10161 /* During reload iteration the amount of registers saved can change.
10162 Recompute the value as needed. Do not recompute when amount of registers
10163 didn't change as reload does multiple calls to the function and does not
10164 expect the decision to change within single iteration. */
10165 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
10166 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
10168 int count = frame->nregs;
10169 struct cgraph_node *node = cgraph_node::get (current_function_decl);
10171 cfun->machine->use_fast_prologue_epilogue_nregs = count;
10173 /* The fast prologue uses move instead of push to save registers. This
10174 is significantly longer, but also executes faster as modern hardware
10175 can execute the moves in parallel, but can't do that for push/pop.
10177 Be careful about choosing what prologue to emit: When function takes
10178 many instructions to execute we may use slow version as well as in
10179 case function is known to be outside hot spot (this is known with
10180 feedback only). Weight the size of function by number of registers
10181 to save as it is cheap to use one or two push instructions but very
10182 slow to use many of them. */
10183 if (count)
10184 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
10185 if (node->frequency < NODE_FREQUENCY_NORMAL
10186 || (flag_branch_probabilities
10187 && node->frequency < NODE_FREQUENCY_HOT))
10188 cfun->machine->use_fast_prologue_epilogue = false;
10189 else
10190 cfun->machine->use_fast_prologue_epilogue
10191 = !expensive_function_p (count);
10194 frame->save_regs_using_mov
10195 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
10196 /* If static stack checking is enabled and done with probes,
10197 the registers need to be saved before allocating the frame. */
10198 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
10200 /* Skip return address. */
10201 offset = UNITS_PER_WORD;
10203 /* Skip pushed static chain. */
10204 if (ix86_static_chain_on_stack)
10205 offset += UNITS_PER_WORD;
10207 /* Skip saved base pointer. */
10208 if (frame_pointer_needed)
10209 offset += UNITS_PER_WORD;
10210 frame->hfp_save_offset = offset;
10212 /* The traditional frame pointer location is at the top of the frame. */
10213 frame->hard_frame_pointer_offset = offset;
10215 /* Register save area */
10216 offset += frame->nregs * UNITS_PER_WORD;
10217 frame->reg_save_offset = offset;
10219 /* On SEH target, registers are pushed just before the frame pointer
10220 location. */
10221 if (TARGET_SEH)
10222 frame->hard_frame_pointer_offset = offset;
10224 /* Align and set SSE register save area. */
10225 if (frame->nsseregs)
10227 /* The only ABI that has saved SSE registers (Win64) also has a
10228 16-byte aligned default stack, and thus we don't need to be
10229 within the re-aligned local stack frame to save them. */
10230 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
10231 offset = (offset + 16 - 1) & -16;
10232 offset += frame->nsseregs * 16;
10234 frame->sse_reg_save_offset = offset;
10236 /* The re-aligned stack starts here. Values before this point are not
10237 directly comparable with values below this point. In order to make
10238 sure that no value happens to be the same before and after, force
10239 the alignment computation below to add a non-zero value. */
10240 if (stack_realign_fp)
10241 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
10243 /* Va-arg area */
10244 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
10245 offset += frame->va_arg_size;
10247 /* Align start of frame for local function. */
10248 if (stack_realign_fp
10249 || offset != frame->sse_reg_save_offset
10250 || size != 0
10251 || !crtl->is_leaf
10252 || cfun->calls_alloca
10253 || ix86_current_function_calls_tls_descriptor)
10254 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10256 /* Frame pointer points here. */
10257 frame->frame_pointer_offset = offset;
10259 offset += size;
10261 /* Add outgoing arguments area. Can be skipped if we eliminated
10262 all the function calls as dead code.
10263 Skipping is however impossible when function calls alloca. Alloca
10264 expander assumes that last crtl->outgoing_args_size
10265 of stack frame are unused. */
10266 if (ACCUMULATE_OUTGOING_ARGS
10267 && (!crtl->is_leaf || cfun->calls_alloca
10268 || ix86_current_function_calls_tls_descriptor))
10270 offset += crtl->outgoing_args_size;
10271 frame->outgoing_arguments_size = crtl->outgoing_args_size;
10273 else
10274 frame->outgoing_arguments_size = 0;
10276 /* Align stack boundary. Only needed if we're calling another function
10277 or using alloca. */
10278 if (!crtl->is_leaf || cfun->calls_alloca
10279 || ix86_current_function_calls_tls_descriptor)
10280 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
10282 /* We've reached end of stack frame. */
10283 frame->stack_pointer_offset = offset;
10285 /* Size prologue needs to allocate. */
10286 to_allocate = offset - frame->sse_reg_save_offset;
10288 if ((!to_allocate && frame->nregs <= 1)
10289 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
10290 frame->save_regs_using_mov = false;
10292 if (ix86_using_red_zone ()
10293 && crtl->sp_is_unchanging
10294 && crtl->is_leaf
10295 && !ix86_current_function_calls_tls_descriptor)
10297 frame->red_zone_size = to_allocate;
10298 if (frame->save_regs_using_mov)
10299 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
10300 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
10301 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
10303 else
10304 frame->red_zone_size = 0;
10305 frame->stack_pointer_offset -= frame->red_zone_size;
10307 /* The SEH frame pointer location is near the bottom of the frame.
10308 This is enforced by the fact that the difference between the
10309 stack pointer and the frame pointer is limited to 240 bytes in
10310 the unwind data structure. */
10311 if (TARGET_SEH)
10313 HOST_WIDE_INT diff;
10315 /* If we can leave the frame pointer where it is, do so. Also, returns
10316 the establisher frame for __builtin_frame_address (0). */
10317 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
10318 if (diff <= SEH_MAX_FRAME_SIZE
10319 && (diff > 240 || (diff & 15) != 0)
10320 && !crtl->accesses_prior_frames)
10322 /* Ideally we'd determine what portion of the local stack frame
10323 (within the constraint of the lowest 240) is most heavily used.
10324 But without that complication, simply bias the frame pointer
10325 by 128 bytes so as to maximize the amount of the local stack
10326 frame that is addressable with 8-bit offsets. */
10327 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
10332 /* This is semi-inlined memory_address_length, but simplified
10333 since we know that we're always dealing with reg+offset, and
10334 to avoid having to create and discard all that rtl. */
10336 static inline int
10337 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
10339 int len = 4;
10341 if (offset == 0)
10343 /* EBP and R13 cannot be encoded without an offset. */
10344 len = (regno == BP_REG || regno == R13_REG);
10346 else if (IN_RANGE (offset, -128, 127))
10347 len = 1;
10349 /* ESP and R12 must be encoded with a SIB byte. */
10350 if (regno == SP_REG || regno == R12_REG)
10351 len++;
10353 return len;
10356 /* Return an RTX that points to CFA_OFFSET within the stack frame.
10357 The valid base registers are taken from CFUN->MACHINE->FS. */
10359 static rtx
10360 choose_baseaddr (HOST_WIDE_INT cfa_offset)
10362 const struct machine_function *m = cfun->machine;
10363 rtx base_reg = NULL;
10364 HOST_WIDE_INT base_offset = 0;
10366 if (m->use_fast_prologue_epilogue)
10368 /* Choose the base register most likely to allow the most scheduling
10369 opportunities. Generally FP is valid throughout the function,
10370 while DRAP must be reloaded within the epilogue. But choose either
10371 over the SP due to increased encoding size. */
10373 if (m->fs.fp_valid)
10375 base_reg = hard_frame_pointer_rtx;
10376 base_offset = m->fs.fp_offset - cfa_offset;
10378 else if (m->fs.drap_valid)
10380 base_reg = crtl->drap_reg;
10381 base_offset = 0 - cfa_offset;
10383 else if (m->fs.sp_valid)
10385 base_reg = stack_pointer_rtx;
10386 base_offset = m->fs.sp_offset - cfa_offset;
10389 else
10391 HOST_WIDE_INT toffset;
10392 int len = 16, tlen;
10394 /* Choose the base register with the smallest address encoding.
10395 With a tie, choose FP > DRAP > SP. */
10396 if (m->fs.sp_valid)
10398 base_reg = stack_pointer_rtx;
10399 base_offset = m->fs.sp_offset - cfa_offset;
10400 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
10402 if (m->fs.drap_valid)
10404 toffset = 0 - cfa_offset;
10405 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
10406 if (tlen <= len)
10408 base_reg = crtl->drap_reg;
10409 base_offset = toffset;
10410 len = tlen;
10413 if (m->fs.fp_valid)
10415 toffset = m->fs.fp_offset - cfa_offset;
10416 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
10417 if (tlen <= len)
10419 base_reg = hard_frame_pointer_rtx;
10420 base_offset = toffset;
10421 len = tlen;
10425 gcc_assert (base_reg != NULL);
10427 return plus_constant (Pmode, base_reg, base_offset);
10430 /* Emit code to save registers in the prologue. */
10432 static void
10433 ix86_emit_save_regs (void)
10435 unsigned int regno;
10436 rtx_insn *insn;
10438 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
10439 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10441 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
10442 RTX_FRAME_RELATED_P (insn) = 1;
10446 /* Emit a single register save at CFA - CFA_OFFSET. */
10448 static void
10449 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
10450 HOST_WIDE_INT cfa_offset)
10452 struct machine_function *m = cfun->machine;
10453 rtx reg = gen_rtx_REG (mode, regno);
10454 rtx mem, addr, base, insn;
10456 addr = choose_baseaddr (cfa_offset);
10457 mem = gen_frame_mem (mode, addr);
10459 /* For SSE saves, we need to indicate the 128-bit alignment. */
10460 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
10462 insn = emit_move_insn (mem, reg);
10463 RTX_FRAME_RELATED_P (insn) = 1;
10465 base = addr;
10466 if (GET_CODE (base) == PLUS)
10467 base = XEXP (base, 0);
10468 gcc_checking_assert (REG_P (base));
10470 /* When saving registers into a re-aligned local stack frame, avoid
10471 any tricky guessing by dwarf2out. */
10472 if (m->fs.realigned)
10474 gcc_checking_assert (stack_realign_drap);
10476 if (regno == REGNO (crtl->drap_reg))
10478 /* A bit of a hack. We force the DRAP register to be saved in
10479 the re-aligned stack frame, which provides us with a copy
10480 of the CFA that will last past the prologue. Install it. */
10481 gcc_checking_assert (cfun->machine->fs.fp_valid);
10482 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10483 cfun->machine->fs.fp_offset - cfa_offset);
10484 mem = gen_rtx_MEM (mode, addr);
10485 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
10487 else
10489 /* The frame pointer is a stable reference within the
10490 aligned frame. Use it. */
10491 gcc_checking_assert (cfun->machine->fs.fp_valid);
10492 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10493 cfun->machine->fs.fp_offset - cfa_offset);
10494 mem = gen_rtx_MEM (mode, addr);
10495 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
10499 /* The memory may not be relative to the current CFA register,
10500 which means that we may need to generate a new pattern for
10501 use by the unwind info. */
10502 else if (base != m->fs.cfa_reg)
10504 addr = plus_constant (Pmode, m->fs.cfa_reg,
10505 m->fs.cfa_offset - cfa_offset);
10506 mem = gen_rtx_MEM (mode, addr);
10507 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
10511 /* Emit code to save registers using MOV insns.
10512 First register is stored at CFA - CFA_OFFSET. */
10513 static void
10514 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10516 unsigned int regno;
10518 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10519 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10521 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10522 cfa_offset -= UNITS_PER_WORD;
10526 /* Emit code to save SSE registers using MOV insns.
10527 First register is stored at CFA - CFA_OFFSET. */
10528 static void
10529 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10531 unsigned int regno;
10533 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10534 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10536 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10537 cfa_offset -= 16;
10541 static GTY(()) rtx queued_cfa_restores;
10543 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10544 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10545 Don't add the note if the previously saved value will be left untouched
10546 within stack red-zone till return, as unwinders can find the same value
10547 in the register and on the stack. */
10549 static void
10550 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
10552 if (!crtl->shrink_wrapped
10553 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10554 return;
10556 if (insn)
10558 add_reg_note (insn, REG_CFA_RESTORE, reg);
10559 RTX_FRAME_RELATED_P (insn) = 1;
10561 else
10562 queued_cfa_restores
10563 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10566 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10568 static void
10569 ix86_add_queued_cfa_restore_notes (rtx insn)
10571 rtx last;
10572 if (!queued_cfa_restores)
10573 return;
10574 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10576 XEXP (last, 1) = REG_NOTES (insn);
10577 REG_NOTES (insn) = queued_cfa_restores;
10578 queued_cfa_restores = NULL_RTX;
10579 RTX_FRAME_RELATED_P (insn) = 1;
10582 /* Expand prologue or epilogue stack adjustment.
10583 The pattern exist to put a dependency on all ebp-based memory accesses.
10584 STYLE should be negative if instructions should be marked as frame related,
10585 zero if %r11 register is live and cannot be freely used and positive
10586 otherwise. */
10588 static void
10589 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10590 int style, bool set_cfa)
10592 struct machine_function *m = cfun->machine;
10593 rtx insn;
10594 bool add_frame_related_expr = false;
10596 if (Pmode == SImode)
10597 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10598 else if (x86_64_immediate_operand (offset, DImode))
10599 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10600 else
10602 rtx tmp;
10603 /* r11 is used by indirect sibcall return as well, set before the
10604 epilogue and used after the epilogue. */
10605 if (style)
10606 tmp = gen_rtx_REG (DImode, R11_REG);
10607 else
10609 gcc_assert (src != hard_frame_pointer_rtx
10610 && dest != hard_frame_pointer_rtx);
10611 tmp = hard_frame_pointer_rtx;
10613 insn = emit_insn (gen_rtx_SET (tmp, offset));
10614 if (style < 0)
10615 add_frame_related_expr = true;
10617 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10620 insn = emit_insn (insn);
10621 if (style >= 0)
10622 ix86_add_queued_cfa_restore_notes (insn);
10624 if (set_cfa)
10626 rtx r;
10628 gcc_assert (m->fs.cfa_reg == src);
10629 m->fs.cfa_offset += INTVAL (offset);
10630 m->fs.cfa_reg = dest;
10632 r = gen_rtx_PLUS (Pmode, src, offset);
10633 r = gen_rtx_SET (dest, r);
10634 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10635 RTX_FRAME_RELATED_P (insn) = 1;
10637 else if (style < 0)
10639 RTX_FRAME_RELATED_P (insn) = 1;
10640 if (add_frame_related_expr)
10642 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10643 r = gen_rtx_SET (dest, r);
10644 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10648 if (dest == stack_pointer_rtx)
10650 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10651 bool valid = m->fs.sp_valid;
10653 if (src == hard_frame_pointer_rtx)
10655 valid = m->fs.fp_valid;
10656 ooffset = m->fs.fp_offset;
10658 else if (src == crtl->drap_reg)
10660 valid = m->fs.drap_valid;
10661 ooffset = 0;
10663 else
10665 /* Else there are two possibilities: SP itself, which we set
10666 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10667 taken care of this by hand along the eh_return path. */
10668 gcc_checking_assert (src == stack_pointer_rtx
10669 || offset == const0_rtx);
10672 m->fs.sp_offset = ooffset - INTVAL (offset);
10673 m->fs.sp_valid = valid;
10677 /* Find an available register to be used as dynamic realign argument
10678 pointer regsiter. Such a register will be written in prologue and
10679 used in begin of body, so it must not be
10680 1. parameter passing register.
10681 2. GOT pointer.
10682 We reuse static-chain register if it is available. Otherwise, we
10683 use DI for i386 and R13 for x86-64. We chose R13 since it has
10684 shorter encoding.
10686 Return: the regno of chosen register. */
10688 static unsigned int
10689 find_drap_reg (void)
10691 tree decl = cfun->decl;
10693 if (TARGET_64BIT)
10695 /* Use R13 for nested function or function need static chain.
10696 Since function with tail call may use any caller-saved
10697 registers in epilogue, DRAP must not use caller-saved
10698 register in such case. */
10699 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10700 return R13_REG;
10702 return R10_REG;
10704 else
10706 /* Use DI for nested function or function need static chain.
10707 Since function with tail call may use any caller-saved
10708 registers in epilogue, DRAP must not use caller-saved
10709 register in such case. */
10710 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10711 return DI_REG;
10713 /* Reuse static chain register if it isn't used for parameter
10714 passing. */
10715 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10717 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10718 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10719 return CX_REG;
10721 return DI_REG;
10725 /* Return minimum incoming stack alignment. */
10727 static unsigned int
10728 ix86_minimum_incoming_stack_boundary (bool sibcall)
10730 unsigned int incoming_stack_boundary;
10732 /* Prefer the one specified at command line. */
10733 if (ix86_user_incoming_stack_boundary)
10734 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10735 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10736 if -mstackrealign is used, it isn't used for sibcall check and
10737 estimated stack alignment is 128bit. */
10738 else if (!sibcall
10739 && !TARGET_64BIT
10740 && ix86_force_align_arg_pointer
10741 && crtl->stack_alignment_estimated == 128)
10742 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10743 else
10744 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10746 /* Incoming stack alignment can be changed on individual functions
10747 via force_align_arg_pointer attribute. We use the smallest
10748 incoming stack boundary. */
10749 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10750 && lookup_attribute (ix86_force_align_arg_pointer_string,
10751 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10752 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10754 /* The incoming stack frame has to be aligned at least at
10755 parm_stack_boundary. */
10756 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10757 incoming_stack_boundary = crtl->parm_stack_boundary;
10759 /* Stack at entrance of main is aligned by runtime. We use the
10760 smallest incoming stack boundary. */
10761 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10762 && DECL_NAME (current_function_decl)
10763 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10764 && DECL_FILE_SCOPE_P (current_function_decl))
10765 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10767 return incoming_stack_boundary;
10770 /* Update incoming stack boundary and estimated stack alignment. */
10772 static void
10773 ix86_update_stack_boundary (void)
10775 ix86_incoming_stack_boundary
10776 = ix86_minimum_incoming_stack_boundary (false);
10778 /* x86_64 vararg needs 16byte stack alignment for register save
10779 area. */
10780 if (TARGET_64BIT
10781 && cfun->stdarg
10782 && crtl->stack_alignment_estimated < 128)
10783 crtl->stack_alignment_estimated = 128;
10786 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10787 needed or an rtx for DRAP otherwise. */
10789 static rtx
10790 ix86_get_drap_rtx (void)
10792 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10793 crtl->need_drap = true;
10795 if (stack_realign_drap)
10797 /* Assign DRAP to vDRAP and returns vDRAP */
10798 unsigned int regno = find_drap_reg ();
10799 rtx drap_vreg;
10800 rtx arg_ptr;
10801 rtx_insn *seq, *insn;
10803 arg_ptr = gen_rtx_REG (Pmode, regno);
10804 crtl->drap_reg = arg_ptr;
10806 start_sequence ();
10807 drap_vreg = copy_to_reg (arg_ptr);
10808 seq = get_insns ();
10809 end_sequence ();
10811 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10812 if (!optimize)
10814 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10815 RTX_FRAME_RELATED_P (insn) = 1;
10817 return drap_vreg;
10819 else
10820 return NULL;
10823 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10825 static rtx
10826 ix86_internal_arg_pointer (void)
10828 return virtual_incoming_args_rtx;
10831 struct scratch_reg {
10832 rtx reg;
10833 bool saved;
10836 /* Return a short-lived scratch register for use on function entry.
10837 In 32-bit mode, it is valid only after the registers are saved
10838 in the prologue. This register must be released by means of
10839 release_scratch_register_on_entry once it is dead. */
10841 static void
10842 get_scratch_register_on_entry (struct scratch_reg *sr)
10844 int regno;
10846 sr->saved = false;
10848 if (TARGET_64BIT)
10850 /* We always use R11 in 64-bit mode. */
10851 regno = R11_REG;
10853 else
10855 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10856 bool fastcall_p
10857 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10858 bool thiscall_p
10859 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10860 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10861 int regparm = ix86_function_regparm (fntype, decl);
10862 int drap_regno
10863 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10865 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10866 for the static chain register. */
10867 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10868 && drap_regno != AX_REG)
10869 regno = AX_REG;
10870 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10871 for the static chain register. */
10872 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10873 regno = AX_REG;
10874 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10875 regno = DX_REG;
10876 /* ecx is the static chain register. */
10877 else if (regparm < 3 && !fastcall_p && !thiscall_p
10878 && !static_chain_p
10879 && drap_regno != CX_REG)
10880 regno = CX_REG;
10881 else if (ix86_save_reg (BX_REG, true))
10882 regno = BX_REG;
10883 /* esi is the static chain register. */
10884 else if (!(regparm == 3 && static_chain_p)
10885 && ix86_save_reg (SI_REG, true))
10886 regno = SI_REG;
10887 else if (ix86_save_reg (DI_REG, true))
10888 regno = DI_REG;
10889 else
10891 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10892 sr->saved = true;
10896 sr->reg = gen_rtx_REG (Pmode, regno);
10897 if (sr->saved)
10899 rtx_insn *insn = emit_insn (gen_push (sr->reg));
10900 RTX_FRAME_RELATED_P (insn) = 1;
10904 /* Release a scratch register obtained from the preceding function. */
10906 static void
10907 release_scratch_register_on_entry (struct scratch_reg *sr)
10909 if (sr->saved)
10911 struct machine_function *m = cfun->machine;
10912 rtx x, insn = emit_insn (gen_pop (sr->reg));
10914 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10915 RTX_FRAME_RELATED_P (insn) = 1;
10916 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10917 x = gen_rtx_SET (stack_pointer_rtx, x);
10918 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10919 m->fs.sp_offset -= UNITS_PER_WORD;
10923 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10925 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10927 static void
10928 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10930 /* We skip the probe for the first interval + a small dope of 4 words and
10931 probe that many bytes past the specified size to maintain a protection
10932 area at the botton of the stack. */
10933 const int dope = 4 * UNITS_PER_WORD;
10934 rtx size_rtx = GEN_INT (size), last;
10936 /* See if we have a constant small number of probes to generate. If so,
10937 that's the easy case. The run-time loop is made up of 11 insns in the
10938 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10939 for n # of intervals. */
10940 if (size <= 5 * PROBE_INTERVAL)
10942 HOST_WIDE_INT i, adjust;
10943 bool first_probe = true;
10945 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10946 values of N from 1 until it exceeds SIZE. If only one probe is
10947 needed, this will not generate any code. Then adjust and probe
10948 to PROBE_INTERVAL + SIZE. */
10949 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10951 if (first_probe)
10953 adjust = 2 * PROBE_INTERVAL + dope;
10954 first_probe = false;
10956 else
10957 adjust = PROBE_INTERVAL;
10959 emit_insn (gen_rtx_SET (stack_pointer_rtx,
10960 plus_constant (Pmode, stack_pointer_rtx,
10961 -adjust)));
10962 emit_stack_probe (stack_pointer_rtx);
10965 if (first_probe)
10966 adjust = size + PROBE_INTERVAL + dope;
10967 else
10968 adjust = size + PROBE_INTERVAL - i;
10970 emit_insn (gen_rtx_SET (stack_pointer_rtx,
10971 plus_constant (Pmode, stack_pointer_rtx,
10972 -adjust)));
10973 emit_stack_probe (stack_pointer_rtx);
10975 /* Adjust back to account for the additional first interval. */
10976 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
10977 plus_constant (Pmode, stack_pointer_rtx,
10978 PROBE_INTERVAL + dope)));
10981 /* Otherwise, do the same as above, but in a loop. Note that we must be
10982 extra careful with variables wrapping around because we might be at
10983 the very top (or the very bottom) of the address space and we have
10984 to be able to handle this case properly; in particular, we use an
10985 equality test for the loop condition. */
10986 else
10988 HOST_WIDE_INT rounded_size;
10989 struct scratch_reg sr;
10991 get_scratch_register_on_entry (&sr);
10994 /* Step 1: round SIZE to the previous multiple of the interval. */
10996 rounded_size = size & -PROBE_INTERVAL;
10999 /* Step 2: compute initial and final value of the loop counter. */
11001 /* SP = SP_0 + PROBE_INTERVAL. */
11002 emit_insn (gen_rtx_SET (stack_pointer_rtx,
11003 plus_constant (Pmode, stack_pointer_rtx,
11004 - (PROBE_INTERVAL + dope))));
11006 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
11007 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
11008 emit_insn (gen_rtx_SET (sr.reg,
11009 gen_rtx_PLUS (Pmode, sr.reg,
11010 stack_pointer_rtx)));
11013 /* Step 3: the loop
11015 while (SP != LAST_ADDR)
11017 SP = SP + PROBE_INTERVAL
11018 probe at SP
11021 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
11022 values of N from 1 until it is equal to ROUNDED_SIZE. */
11024 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
11027 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
11028 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
11030 if (size != rounded_size)
11032 emit_insn (gen_rtx_SET (stack_pointer_rtx,
11033 plus_constant (Pmode, stack_pointer_rtx,
11034 rounded_size - size)));
11035 emit_stack_probe (stack_pointer_rtx);
11038 /* Adjust back to account for the additional first interval. */
11039 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
11040 plus_constant (Pmode, stack_pointer_rtx,
11041 PROBE_INTERVAL + dope)));
11043 release_scratch_register_on_entry (&sr);
11046 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
11048 /* Even if the stack pointer isn't the CFA register, we need to correctly
11049 describe the adjustments made to it, in particular differentiate the
11050 frame-related ones from the frame-unrelated ones. */
11051 if (size > 0)
11053 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
11054 XVECEXP (expr, 0, 0)
11055 = gen_rtx_SET (stack_pointer_rtx,
11056 plus_constant (Pmode, stack_pointer_rtx, -size));
11057 XVECEXP (expr, 0, 1)
11058 = gen_rtx_SET (stack_pointer_rtx,
11059 plus_constant (Pmode, stack_pointer_rtx,
11060 PROBE_INTERVAL + dope + size));
11061 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
11062 RTX_FRAME_RELATED_P (last) = 1;
11064 cfun->machine->fs.sp_offset += size;
11067 /* Make sure nothing is scheduled before we are done. */
11068 emit_insn (gen_blockage ());
11071 /* Adjust the stack pointer up to REG while probing it. */
11073 const char *
11074 output_adjust_stack_and_probe (rtx reg)
11076 static int labelno = 0;
11077 char loop_lab[32], end_lab[32];
11078 rtx xops[2];
11080 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11081 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11083 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11085 /* Jump to END_LAB if SP == LAST_ADDR. */
11086 xops[0] = stack_pointer_rtx;
11087 xops[1] = reg;
11088 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11089 fputs ("\tje\t", asm_out_file);
11090 assemble_name_raw (asm_out_file, end_lab);
11091 fputc ('\n', asm_out_file);
11093 /* SP = SP + PROBE_INTERVAL. */
11094 xops[1] = GEN_INT (PROBE_INTERVAL);
11095 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11097 /* Probe at SP. */
11098 xops[1] = const0_rtx;
11099 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
11101 fprintf (asm_out_file, "\tjmp\t");
11102 assemble_name_raw (asm_out_file, loop_lab);
11103 fputc ('\n', asm_out_file);
11105 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11107 return "";
11110 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
11111 inclusive. These are offsets from the current stack pointer. */
11113 static void
11114 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
11116 /* See if we have a constant small number of probes to generate. If so,
11117 that's the easy case. The run-time loop is made up of 7 insns in the
11118 generic case while the compile-time loop is made up of n insns for n #
11119 of intervals. */
11120 if (size <= 7 * PROBE_INTERVAL)
11122 HOST_WIDE_INT i;
11124 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
11125 it exceeds SIZE. If only one probe is needed, this will not
11126 generate any code. Then probe at FIRST + SIZE. */
11127 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
11128 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11129 -(first + i)));
11131 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11132 -(first + size)));
11135 /* Otherwise, do the same as above, but in a loop. Note that we must be
11136 extra careful with variables wrapping around because we might be at
11137 the very top (or the very bottom) of the address space and we have
11138 to be able to handle this case properly; in particular, we use an
11139 equality test for the loop condition. */
11140 else
11142 HOST_WIDE_INT rounded_size, last;
11143 struct scratch_reg sr;
11145 get_scratch_register_on_entry (&sr);
11148 /* Step 1: round SIZE to the previous multiple of the interval. */
11150 rounded_size = size & -PROBE_INTERVAL;
11153 /* Step 2: compute initial and final value of the loop counter. */
11155 /* TEST_OFFSET = FIRST. */
11156 emit_move_insn (sr.reg, GEN_INT (-first));
11158 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
11159 last = first + rounded_size;
11162 /* Step 3: the loop
11164 while (TEST_ADDR != LAST_ADDR)
11166 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
11167 probe at TEST_ADDR
11170 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
11171 until it is equal to ROUNDED_SIZE. */
11173 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
11176 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
11177 that SIZE is equal to ROUNDED_SIZE. */
11179 if (size != rounded_size)
11180 emit_stack_probe (plus_constant (Pmode,
11181 gen_rtx_PLUS (Pmode,
11182 stack_pointer_rtx,
11183 sr.reg),
11184 rounded_size - size));
11186 release_scratch_register_on_entry (&sr);
11189 /* Make sure nothing is scheduled before we are done. */
11190 emit_insn (gen_blockage ());
11193 /* Probe a range of stack addresses from REG to END, inclusive. These are
11194 offsets from the current stack pointer. */
11196 const char *
11197 output_probe_stack_range (rtx reg, rtx end)
11199 static int labelno = 0;
11200 char loop_lab[32], end_lab[32];
11201 rtx xops[3];
11203 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11204 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11206 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11208 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
11209 xops[0] = reg;
11210 xops[1] = end;
11211 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11212 fputs ("\tje\t", asm_out_file);
11213 assemble_name_raw (asm_out_file, end_lab);
11214 fputc ('\n', asm_out_file);
11216 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
11217 xops[1] = GEN_INT (PROBE_INTERVAL);
11218 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11220 /* Probe at TEST_ADDR. */
11221 xops[0] = stack_pointer_rtx;
11222 xops[1] = reg;
11223 xops[2] = const0_rtx;
11224 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
11226 fprintf (asm_out_file, "\tjmp\t");
11227 assemble_name_raw (asm_out_file, loop_lab);
11228 fputc ('\n', asm_out_file);
11230 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11232 return "";
11235 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
11236 to be generated in correct form. */
11237 static void
11238 ix86_finalize_stack_realign_flags (void)
11240 /* Check if stack realign is really needed after reload, and
11241 stores result in cfun */
11242 unsigned int incoming_stack_boundary
11243 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
11244 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
11245 unsigned int stack_realign = (incoming_stack_boundary
11246 < (crtl->is_leaf
11247 ? crtl->max_used_stack_slot_alignment
11248 : crtl->stack_alignment_needed));
11250 if (crtl->stack_realign_finalized)
11252 /* After stack_realign_needed is finalized, we can't no longer
11253 change it. */
11254 gcc_assert (crtl->stack_realign_needed == stack_realign);
11255 return;
11258 /* If the only reason for frame_pointer_needed is that we conservatively
11259 assumed stack realignment might be needed, but in the end nothing that
11260 needed the stack alignment had been spilled, clear frame_pointer_needed
11261 and say we don't need stack realignment. */
11262 if (stack_realign
11263 && frame_pointer_needed
11264 && crtl->is_leaf
11265 && flag_omit_frame_pointer
11266 && crtl->sp_is_unchanging
11267 && !ix86_current_function_calls_tls_descriptor
11268 && !crtl->accesses_prior_frames
11269 && !cfun->calls_alloca
11270 && !crtl->calls_eh_return
11271 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
11272 && !ix86_frame_pointer_required ()
11273 && get_frame_size () == 0
11274 && ix86_nsaved_sseregs () == 0
11275 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
11277 HARD_REG_SET set_up_by_prologue, prologue_used;
11278 basic_block bb;
11280 CLEAR_HARD_REG_SET (prologue_used);
11281 CLEAR_HARD_REG_SET (set_up_by_prologue);
11282 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
11283 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
11284 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
11285 HARD_FRAME_POINTER_REGNUM);
11286 FOR_EACH_BB_FN (bb, cfun)
11288 rtx_insn *insn;
11289 FOR_BB_INSNS (bb, insn)
11290 if (NONDEBUG_INSN_P (insn)
11291 && requires_stack_frame_p (insn, prologue_used,
11292 set_up_by_prologue))
11294 crtl->stack_realign_needed = stack_realign;
11295 crtl->stack_realign_finalized = true;
11296 return;
11300 /* If drap has been set, but it actually isn't live at the start
11301 of the function, there is no reason to set it up. */
11302 if (crtl->drap_reg)
11304 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11305 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
11307 crtl->drap_reg = NULL_RTX;
11308 crtl->need_drap = false;
11311 else
11312 cfun->machine->no_drap_save_restore = true;
11314 frame_pointer_needed = false;
11315 stack_realign = false;
11316 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
11317 crtl->stack_alignment_needed = incoming_stack_boundary;
11318 crtl->stack_alignment_estimated = incoming_stack_boundary;
11319 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
11320 crtl->preferred_stack_boundary = incoming_stack_boundary;
11321 df_finish_pass (true);
11322 df_scan_alloc (NULL);
11323 df_scan_blocks ();
11324 df_compute_regs_ever_live (true);
11325 df_analyze ();
11328 crtl->stack_realign_needed = stack_realign;
11329 crtl->stack_realign_finalized = true;
11332 /* Delete SET_GOT right after entry block if it is allocated to reg. */
11334 static void
11335 ix86_elim_entry_set_got (rtx reg)
11337 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11338 rtx_insn *c_insn = BB_HEAD (bb);
11339 if (!NONDEBUG_INSN_P (c_insn))
11340 c_insn = next_nonnote_nondebug_insn (c_insn);
11341 if (c_insn && NONJUMP_INSN_P (c_insn))
11343 rtx pat = PATTERN (c_insn);
11344 if (GET_CODE (pat) == PARALLEL)
11346 rtx vec = XVECEXP (pat, 0, 0);
11347 if (GET_CODE (vec) == SET
11348 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
11349 && REGNO (XEXP (vec, 0)) == REGNO (reg))
11350 delete_insn (c_insn);
11355 /* Expand the prologue into a bunch of separate insns. */
11357 void
11358 ix86_expand_prologue (void)
11360 struct machine_function *m = cfun->machine;
11361 rtx insn, t;
11362 struct ix86_frame frame;
11363 HOST_WIDE_INT allocate;
11364 bool int_registers_saved;
11365 bool sse_registers_saved;
11367 ix86_finalize_stack_realign_flags ();
11369 /* DRAP should not coexist with stack_realign_fp */
11370 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
11372 memset (&m->fs, 0, sizeof (m->fs));
11374 /* Initialize CFA state for before the prologue. */
11375 m->fs.cfa_reg = stack_pointer_rtx;
11376 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
11378 /* Track SP offset to the CFA. We continue tracking this after we've
11379 swapped the CFA register away from SP. In the case of re-alignment
11380 this is fudged; we're interested to offsets within the local frame. */
11381 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11382 m->fs.sp_valid = true;
11384 ix86_compute_frame_layout (&frame);
11386 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
11388 /* We should have already generated an error for any use of
11389 ms_hook on a nested function. */
11390 gcc_checking_assert (!ix86_static_chain_on_stack);
11392 /* Check if profiling is active and we shall use profiling before
11393 prologue variant. If so sorry. */
11394 if (crtl->profile && flag_fentry != 0)
11395 sorry ("ms_hook_prologue attribute isn%'t compatible "
11396 "with -mfentry for 32-bit");
11398 /* In ix86_asm_output_function_label we emitted:
11399 8b ff movl.s %edi,%edi
11400 55 push %ebp
11401 8b ec movl.s %esp,%ebp
11403 This matches the hookable function prologue in Win32 API
11404 functions in Microsoft Windows XP Service Pack 2 and newer.
11405 Wine uses this to enable Windows apps to hook the Win32 API
11406 functions provided by Wine.
11408 What that means is that we've already set up the frame pointer. */
11410 if (frame_pointer_needed
11411 && !(crtl->drap_reg && crtl->stack_realign_needed))
11413 rtx push, mov;
11415 /* We've decided to use the frame pointer already set up.
11416 Describe this to the unwinder by pretending that both
11417 push and mov insns happen right here.
11419 Putting the unwind info here at the end of the ms_hook
11420 is done so that we can make absolutely certain we get
11421 the required byte sequence at the start of the function,
11422 rather than relying on an assembler that can produce
11423 the exact encoding required.
11425 However it does mean (in the unpatched case) that we have
11426 a 1 insn window where the asynchronous unwind info is
11427 incorrect. However, if we placed the unwind info at
11428 its correct location we would have incorrect unwind info
11429 in the patched case. Which is probably all moot since
11430 I don't expect Wine generates dwarf2 unwind info for the
11431 system libraries that use this feature. */
11433 insn = emit_insn (gen_blockage ());
11435 push = gen_push (hard_frame_pointer_rtx);
11436 mov = gen_rtx_SET (hard_frame_pointer_rtx,
11437 stack_pointer_rtx);
11438 RTX_FRAME_RELATED_P (push) = 1;
11439 RTX_FRAME_RELATED_P (mov) = 1;
11441 RTX_FRAME_RELATED_P (insn) = 1;
11442 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11443 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
11445 /* Note that gen_push incremented m->fs.cfa_offset, even
11446 though we didn't emit the push insn here. */
11447 m->fs.cfa_reg = hard_frame_pointer_rtx;
11448 m->fs.fp_offset = m->fs.cfa_offset;
11449 m->fs.fp_valid = true;
11451 else
11453 /* The frame pointer is not needed so pop %ebp again.
11454 This leaves us with a pristine state. */
11455 emit_insn (gen_pop (hard_frame_pointer_rtx));
11459 /* The first insn of a function that accepts its static chain on the
11460 stack is to push the register that would be filled in by a direct
11461 call. This insn will be skipped by the trampoline. */
11462 else if (ix86_static_chain_on_stack)
11464 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
11465 emit_insn (gen_blockage ());
11467 /* We don't want to interpret this push insn as a register save,
11468 only as a stack adjustment. The real copy of the register as
11469 a save will be done later, if needed. */
11470 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
11471 t = gen_rtx_SET (stack_pointer_rtx, t);
11472 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
11473 RTX_FRAME_RELATED_P (insn) = 1;
11476 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
11477 of DRAP is needed and stack realignment is really needed after reload */
11478 if (stack_realign_drap)
11480 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11482 /* Only need to push parameter pointer reg if it is caller saved. */
11483 if (!call_used_regs[REGNO (crtl->drap_reg)])
11485 /* Push arg pointer reg */
11486 insn = emit_insn (gen_push (crtl->drap_reg));
11487 RTX_FRAME_RELATED_P (insn) = 1;
11490 /* Grab the argument pointer. */
11491 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
11492 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
11493 RTX_FRAME_RELATED_P (insn) = 1;
11494 m->fs.cfa_reg = crtl->drap_reg;
11495 m->fs.cfa_offset = 0;
11497 /* Align the stack. */
11498 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11499 stack_pointer_rtx,
11500 GEN_INT (-align_bytes)));
11501 RTX_FRAME_RELATED_P (insn) = 1;
11503 /* Replicate the return address on the stack so that return
11504 address can be reached via (argp - 1) slot. This is needed
11505 to implement macro RETURN_ADDR_RTX and intrinsic function
11506 expand_builtin_return_addr etc. */
11507 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
11508 t = gen_frame_mem (word_mode, t);
11509 insn = emit_insn (gen_push (t));
11510 RTX_FRAME_RELATED_P (insn) = 1;
11512 /* For the purposes of frame and register save area addressing,
11513 we've started over with a new frame. */
11514 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11515 m->fs.realigned = true;
11518 int_registers_saved = (frame.nregs == 0);
11519 sse_registers_saved = (frame.nsseregs == 0);
11521 if (frame_pointer_needed && !m->fs.fp_valid)
11523 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11524 slower on all targets. Also sdb doesn't like it. */
11525 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11526 RTX_FRAME_RELATED_P (insn) = 1;
11528 /* Push registers now, before setting the frame pointer
11529 on SEH target. */
11530 if (!int_registers_saved
11531 && TARGET_SEH
11532 && !frame.save_regs_using_mov)
11534 ix86_emit_save_regs ();
11535 int_registers_saved = true;
11536 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11539 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11541 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11542 RTX_FRAME_RELATED_P (insn) = 1;
11544 if (m->fs.cfa_reg == stack_pointer_rtx)
11545 m->fs.cfa_reg = hard_frame_pointer_rtx;
11546 m->fs.fp_offset = m->fs.sp_offset;
11547 m->fs.fp_valid = true;
11551 if (!int_registers_saved)
11553 /* If saving registers via PUSH, do so now. */
11554 if (!frame.save_regs_using_mov)
11556 ix86_emit_save_regs ();
11557 int_registers_saved = true;
11558 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11561 /* When using red zone we may start register saving before allocating
11562 the stack frame saving one cycle of the prologue. However, avoid
11563 doing this if we have to probe the stack; at least on x86_64 the
11564 stack probe can turn into a call that clobbers a red zone location. */
11565 else if (ix86_using_red_zone ()
11566 && (! TARGET_STACK_PROBE
11567 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11569 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11570 int_registers_saved = true;
11574 if (stack_realign_fp)
11576 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11577 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11579 /* The computation of the size of the re-aligned stack frame means
11580 that we must allocate the size of the register save area before
11581 performing the actual alignment. Otherwise we cannot guarantee
11582 that there's enough storage above the realignment point. */
11583 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11584 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11585 GEN_INT (m->fs.sp_offset
11586 - frame.sse_reg_save_offset),
11587 -1, false);
11589 /* Align the stack. */
11590 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11591 stack_pointer_rtx,
11592 GEN_INT (-align_bytes)));
11594 /* For the purposes of register save area addressing, the stack
11595 pointer is no longer valid. As for the value of sp_offset,
11596 see ix86_compute_frame_layout, which we need to match in order
11597 to pass verification of stack_pointer_offset at the end. */
11598 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11599 m->fs.sp_valid = false;
11602 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11604 if (flag_stack_usage_info)
11606 /* We start to count from ARG_POINTER. */
11607 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11609 /* If it was realigned, take into account the fake frame. */
11610 if (stack_realign_drap)
11612 if (ix86_static_chain_on_stack)
11613 stack_size += UNITS_PER_WORD;
11615 if (!call_used_regs[REGNO (crtl->drap_reg)])
11616 stack_size += UNITS_PER_WORD;
11618 /* This over-estimates by 1 minimal-stack-alignment-unit but
11619 mitigates that by counting in the new return address slot. */
11620 current_function_dynamic_stack_size
11621 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11624 current_function_static_stack_size = stack_size;
11627 /* On SEH target with very large frame size, allocate an area to save
11628 SSE registers (as the very large allocation won't be described). */
11629 if (TARGET_SEH
11630 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11631 && !sse_registers_saved)
11633 HOST_WIDE_INT sse_size =
11634 frame.sse_reg_save_offset - frame.reg_save_offset;
11636 gcc_assert (int_registers_saved);
11638 /* No need to do stack checking as the area will be immediately
11639 written. */
11640 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11641 GEN_INT (-sse_size), -1,
11642 m->fs.cfa_reg == stack_pointer_rtx);
11643 allocate -= sse_size;
11644 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11645 sse_registers_saved = true;
11648 /* The stack has already been decremented by the instruction calling us
11649 so probe if the size is non-negative to preserve the protection area. */
11650 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11652 /* We expect the registers to be saved when probes are used. */
11653 gcc_assert (int_registers_saved);
11655 if (STACK_CHECK_MOVING_SP)
11657 if (!(crtl->is_leaf && !cfun->calls_alloca
11658 && allocate <= PROBE_INTERVAL))
11660 ix86_adjust_stack_and_probe (allocate);
11661 allocate = 0;
11664 else
11666 HOST_WIDE_INT size = allocate;
11668 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11669 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11671 if (TARGET_STACK_PROBE)
11673 if (crtl->is_leaf && !cfun->calls_alloca)
11675 if (size > PROBE_INTERVAL)
11676 ix86_emit_probe_stack_range (0, size);
11678 else
11679 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11681 else
11683 if (crtl->is_leaf && !cfun->calls_alloca)
11685 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11686 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11687 size - STACK_CHECK_PROTECT);
11689 else
11690 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11695 if (allocate == 0)
11697 else if (!ix86_target_stack_probe ()
11698 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11700 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11701 GEN_INT (-allocate), -1,
11702 m->fs.cfa_reg == stack_pointer_rtx);
11704 else
11706 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11707 rtx r10 = NULL;
11708 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11709 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11710 bool eax_live = ix86_eax_live_at_start_p ();
11711 bool r10_live = false;
11713 if (TARGET_64BIT)
11714 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11716 if (eax_live)
11718 insn = emit_insn (gen_push (eax));
11719 allocate -= UNITS_PER_WORD;
11720 /* Note that SEH directives need to continue tracking the stack
11721 pointer even after the frame pointer has been set up. */
11722 if (sp_is_cfa_reg || TARGET_SEH)
11724 if (sp_is_cfa_reg)
11725 m->fs.cfa_offset += UNITS_PER_WORD;
11726 RTX_FRAME_RELATED_P (insn) = 1;
11727 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11728 gen_rtx_SET (stack_pointer_rtx,
11729 plus_constant (Pmode, stack_pointer_rtx,
11730 -UNITS_PER_WORD)));
11734 if (r10_live)
11736 r10 = gen_rtx_REG (Pmode, R10_REG);
11737 insn = emit_insn (gen_push (r10));
11738 allocate -= UNITS_PER_WORD;
11739 if (sp_is_cfa_reg || TARGET_SEH)
11741 if (sp_is_cfa_reg)
11742 m->fs.cfa_offset += UNITS_PER_WORD;
11743 RTX_FRAME_RELATED_P (insn) = 1;
11744 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11745 gen_rtx_SET (stack_pointer_rtx,
11746 plus_constant (Pmode, stack_pointer_rtx,
11747 -UNITS_PER_WORD)));
11751 emit_move_insn (eax, GEN_INT (allocate));
11752 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11754 /* Use the fact that AX still contains ALLOCATE. */
11755 adjust_stack_insn = (Pmode == DImode
11756 ? gen_pro_epilogue_adjust_stack_di_sub
11757 : gen_pro_epilogue_adjust_stack_si_sub);
11759 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11760 stack_pointer_rtx, eax));
11762 if (sp_is_cfa_reg || TARGET_SEH)
11764 if (sp_is_cfa_reg)
11765 m->fs.cfa_offset += allocate;
11766 RTX_FRAME_RELATED_P (insn) = 1;
11767 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11768 gen_rtx_SET (stack_pointer_rtx,
11769 plus_constant (Pmode, stack_pointer_rtx,
11770 -allocate)));
11772 m->fs.sp_offset += allocate;
11774 /* Use stack_pointer_rtx for relative addressing so that code
11775 works for realigned stack, too. */
11776 if (r10_live && eax_live)
11778 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11779 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11780 gen_frame_mem (word_mode, t));
11781 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11782 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11783 gen_frame_mem (word_mode, t));
11785 else if (eax_live || r10_live)
11787 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11788 emit_move_insn (gen_rtx_REG (word_mode,
11789 (eax_live ? AX_REG : R10_REG)),
11790 gen_frame_mem (word_mode, t));
11793 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11795 /* If we havn't already set up the frame pointer, do so now. */
11796 if (frame_pointer_needed && !m->fs.fp_valid)
11798 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11799 GEN_INT (frame.stack_pointer_offset
11800 - frame.hard_frame_pointer_offset));
11801 insn = emit_insn (insn);
11802 RTX_FRAME_RELATED_P (insn) = 1;
11803 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11805 if (m->fs.cfa_reg == stack_pointer_rtx)
11806 m->fs.cfa_reg = hard_frame_pointer_rtx;
11807 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11808 m->fs.fp_valid = true;
11811 if (!int_registers_saved)
11812 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11813 if (!sse_registers_saved)
11814 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11816 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
11817 in PROLOGUE. */
11818 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
11820 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
11821 insn = emit_insn (gen_set_got (pic));
11822 RTX_FRAME_RELATED_P (insn) = 1;
11823 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11824 emit_insn (gen_prologue_use (pic));
11825 /* Deleting already emmitted SET_GOT if exist and allocated to
11826 REAL_PIC_OFFSET_TABLE_REGNUM. */
11827 ix86_elim_entry_set_got (pic);
11830 if (crtl->drap_reg && !crtl->stack_realign_needed)
11832 /* vDRAP is setup but after reload it turns out stack realign
11833 isn't necessary, here we will emit prologue to setup DRAP
11834 without stack realign adjustment */
11835 t = choose_baseaddr (0);
11836 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
11839 /* Prevent instructions from being scheduled into register save push
11840 sequence when access to the redzone area is done through frame pointer.
11841 The offset between the frame pointer and the stack pointer is calculated
11842 relative to the value of the stack pointer at the end of the function
11843 prologue, and moving instructions that access redzone area via frame
11844 pointer inside push sequence violates this assumption. */
11845 if (frame_pointer_needed && frame.red_zone_size)
11846 emit_insn (gen_memory_blockage ());
11848 /* Emit cld instruction if stringops are used in the function. */
11849 if (TARGET_CLD && ix86_current_function_needs_cld)
11850 emit_insn (gen_cld ());
11852 /* SEH requires that the prologue end within 256 bytes of the start of
11853 the function. Prevent instruction schedules that would extend that.
11854 Further, prevent alloca modifications to the stack pointer from being
11855 combined with prologue modifications. */
11856 if (TARGET_SEH)
11857 emit_insn (gen_prologue_use (stack_pointer_rtx));
11860 /* Emit code to restore REG using a POP insn. */
11862 static void
11863 ix86_emit_restore_reg_using_pop (rtx reg)
11865 struct machine_function *m = cfun->machine;
11866 rtx_insn *insn = emit_insn (gen_pop (reg));
11868 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11869 m->fs.sp_offset -= UNITS_PER_WORD;
11871 if (m->fs.cfa_reg == crtl->drap_reg
11872 && REGNO (reg) == REGNO (crtl->drap_reg))
11874 /* Previously we'd represented the CFA as an expression
11875 like *(%ebp - 8). We've just popped that value from
11876 the stack, which means we need to reset the CFA to
11877 the drap register. This will remain until we restore
11878 the stack pointer. */
11879 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11880 RTX_FRAME_RELATED_P (insn) = 1;
11882 /* This means that the DRAP register is valid for addressing too. */
11883 m->fs.drap_valid = true;
11884 return;
11887 if (m->fs.cfa_reg == stack_pointer_rtx)
11889 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11890 x = gen_rtx_SET (stack_pointer_rtx, x);
11891 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11892 RTX_FRAME_RELATED_P (insn) = 1;
11894 m->fs.cfa_offset -= UNITS_PER_WORD;
11897 /* When the frame pointer is the CFA, and we pop it, we are
11898 swapping back to the stack pointer as the CFA. This happens
11899 for stack frames that don't allocate other data, so we assume
11900 the stack pointer is now pointing at the return address, i.e.
11901 the function entry state, which makes the offset be 1 word. */
11902 if (reg == hard_frame_pointer_rtx)
11904 m->fs.fp_valid = false;
11905 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11907 m->fs.cfa_reg = stack_pointer_rtx;
11908 m->fs.cfa_offset -= UNITS_PER_WORD;
11910 add_reg_note (insn, REG_CFA_DEF_CFA,
11911 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11912 GEN_INT (m->fs.cfa_offset)));
11913 RTX_FRAME_RELATED_P (insn) = 1;
11918 /* Emit code to restore saved registers using POP insns. */
11920 static void
11921 ix86_emit_restore_regs_using_pop (void)
11923 unsigned int regno;
11925 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11926 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11927 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11930 /* Emit code and notes for the LEAVE instruction. */
11932 static void
11933 ix86_emit_leave (void)
11935 struct machine_function *m = cfun->machine;
11936 rtx_insn *insn = emit_insn (ix86_gen_leave ());
11938 ix86_add_queued_cfa_restore_notes (insn);
11940 gcc_assert (m->fs.fp_valid);
11941 m->fs.sp_valid = true;
11942 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11943 m->fs.fp_valid = false;
11945 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11947 m->fs.cfa_reg = stack_pointer_rtx;
11948 m->fs.cfa_offset = m->fs.sp_offset;
11950 add_reg_note (insn, REG_CFA_DEF_CFA,
11951 plus_constant (Pmode, stack_pointer_rtx,
11952 m->fs.sp_offset));
11953 RTX_FRAME_RELATED_P (insn) = 1;
11955 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11956 m->fs.fp_offset);
11959 /* Emit code to restore saved registers using MOV insns.
11960 First register is restored from CFA - CFA_OFFSET. */
11961 static void
11962 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11963 bool maybe_eh_return)
11965 struct machine_function *m = cfun->machine;
11966 unsigned int regno;
11968 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11969 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11971 rtx reg = gen_rtx_REG (word_mode, regno);
11972 rtx mem;
11973 rtx_insn *insn;
11975 mem = choose_baseaddr (cfa_offset);
11976 mem = gen_frame_mem (word_mode, mem);
11977 insn = emit_move_insn (reg, mem);
11979 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11981 /* Previously we'd represented the CFA as an expression
11982 like *(%ebp - 8). We've just popped that value from
11983 the stack, which means we need to reset the CFA to
11984 the drap register. This will remain until we restore
11985 the stack pointer. */
11986 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11987 RTX_FRAME_RELATED_P (insn) = 1;
11989 /* This means that the DRAP register is valid for addressing. */
11990 m->fs.drap_valid = true;
11992 else
11993 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
11995 cfa_offset -= UNITS_PER_WORD;
11999 /* Emit code to restore saved registers using MOV insns.
12000 First register is restored from CFA - CFA_OFFSET. */
12001 static void
12002 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
12003 bool maybe_eh_return)
12005 unsigned int regno;
12007 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12008 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
12010 rtx reg = gen_rtx_REG (V4SFmode, regno);
12011 rtx mem;
12013 mem = choose_baseaddr (cfa_offset);
12014 mem = gen_rtx_MEM (V4SFmode, mem);
12015 set_mem_align (mem, 128);
12016 emit_move_insn (reg, mem);
12018 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
12020 cfa_offset -= 16;
12024 /* Restore function stack, frame, and registers. */
12026 void
12027 ix86_expand_epilogue (int style)
12029 struct machine_function *m = cfun->machine;
12030 struct machine_frame_state frame_state_save = m->fs;
12031 struct ix86_frame frame;
12032 bool restore_regs_via_mov;
12033 bool using_drap;
12035 ix86_finalize_stack_realign_flags ();
12036 ix86_compute_frame_layout (&frame);
12038 m->fs.sp_valid = (!frame_pointer_needed
12039 || (crtl->sp_is_unchanging
12040 && !stack_realign_fp));
12041 gcc_assert (!m->fs.sp_valid
12042 || m->fs.sp_offset == frame.stack_pointer_offset);
12044 /* The FP must be valid if the frame pointer is present. */
12045 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
12046 gcc_assert (!m->fs.fp_valid
12047 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
12049 /* We must have *some* valid pointer to the stack frame. */
12050 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
12052 /* The DRAP is never valid at this point. */
12053 gcc_assert (!m->fs.drap_valid);
12055 /* See the comment about red zone and frame
12056 pointer usage in ix86_expand_prologue. */
12057 if (frame_pointer_needed && frame.red_zone_size)
12058 emit_insn (gen_memory_blockage ());
12060 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
12061 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
12063 /* Determine the CFA offset of the end of the red-zone. */
12064 m->fs.red_zone_offset = 0;
12065 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
12067 /* The red-zone begins below the return address. */
12068 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
12070 /* When the register save area is in the aligned portion of
12071 the stack, determine the maximum runtime displacement that
12072 matches up with the aligned frame. */
12073 if (stack_realign_drap)
12074 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
12075 + UNITS_PER_WORD);
12078 /* Special care must be taken for the normal return case of a function
12079 using eh_return: the eax and edx registers are marked as saved, but
12080 not restored along this path. Adjust the save location to match. */
12081 if (crtl->calls_eh_return && style != 2)
12082 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
12084 /* EH_RETURN requires the use of moves to function properly. */
12085 if (crtl->calls_eh_return)
12086 restore_regs_via_mov = true;
12087 /* SEH requires the use of pops to identify the epilogue. */
12088 else if (TARGET_SEH)
12089 restore_regs_via_mov = false;
12090 /* If we're only restoring one register and sp is not valid then
12091 using a move instruction to restore the register since it's
12092 less work than reloading sp and popping the register. */
12093 else if (!m->fs.sp_valid && frame.nregs <= 1)
12094 restore_regs_via_mov = true;
12095 else if (TARGET_EPILOGUE_USING_MOVE
12096 && cfun->machine->use_fast_prologue_epilogue
12097 && (frame.nregs > 1
12098 || m->fs.sp_offset != frame.reg_save_offset))
12099 restore_regs_via_mov = true;
12100 else if (frame_pointer_needed
12101 && !frame.nregs
12102 && m->fs.sp_offset != frame.reg_save_offset)
12103 restore_regs_via_mov = true;
12104 else if (frame_pointer_needed
12105 && TARGET_USE_LEAVE
12106 && cfun->machine->use_fast_prologue_epilogue
12107 && frame.nregs == 1)
12108 restore_regs_via_mov = true;
12109 else
12110 restore_regs_via_mov = false;
12112 if (restore_regs_via_mov || frame.nsseregs)
12114 /* Ensure that the entire register save area is addressable via
12115 the stack pointer, if we will restore via sp. */
12116 if (TARGET_64BIT
12117 && m->fs.sp_offset > 0x7fffffff
12118 && !(m->fs.fp_valid || m->fs.drap_valid)
12119 && (frame.nsseregs + frame.nregs) != 0)
12121 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12122 GEN_INT (m->fs.sp_offset
12123 - frame.sse_reg_save_offset),
12124 style,
12125 m->fs.cfa_reg == stack_pointer_rtx);
12129 /* If there are any SSE registers to restore, then we have to do it
12130 via moves, since there's obviously no pop for SSE regs. */
12131 if (frame.nsseregs)
12132 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
12133 style == 2);
12135 if (restore_regs_via_mov)
12137 rtx t;
12139 if (frame.nregs)
12140 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
12142 /* eh_return epilogues need %ecx added to the stack pointer. */
12143 if (style == 2)
12145 rtx sa = EH_RETURN_STACKADJ_RTX;
12146 rtx_insn *insn;
12148 /* Stack align doesn't work with eh_return. */
12149 gcc_assert (!stack_realign_drap);
12150 /* Neither does regparm nested functions. */
12151 gcc_assert (!ix86_static_chain_on_stack);
12153 if (frame_pointer_needed)
12155 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
12156 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
12157 emit_insn (gen_rtx_SET (sa, t));
12159 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
12160 insn = emit_move_insn (hard_frame_pointer_rtx, t);
12162 /* Note that we use SA as a temporary CFA, as the return
12163 address is at the proper place relative to it. We
12164 pretend this happens at the FP restore insn because
12165 prior to this insn the FP would be stored at the wrong
12166 offset relative to SA, and after this insn we have no
12167 other reasonable register to use for the CFA. We don't
12168 bother resetting the CFA to the SP for the duration of
12169 the return insn. */
12170 add_reg_note (insn, REG_CFA_DEF_CFA,
12171 plus_constant (Pmode, sa, UNITS_PER_WORD));
12172 ix86_add_queued_cfa_restore_notes (insn);
12173 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
12174 RTX_FRAME_RELATED_P (insn) = 1;
12176 m->fs.cfa_reg = sa;
12177 m->fs.cfa_offset = UNITS_PER_WORD;
12178 m->fs.fp_valid = false;
12180 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
12181 const0_rtx, style, false);
12183 else
12185 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
12186 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
12187 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
12188 ix86_add_queued_cfa_restore_notes (insn);
12190 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
12191 if (m->fs.cfa_offset != UNITS_PER_WORD)
12193 m->fs.cfa_offset = UNITS_PER_WORD;
12194 add_reg_note (insn, REG_CFA_DEF_CFA,
12195 plus_constant (Pmode, stack_pointer_rtx,
12196 UNITS_PER_WORD));
12197 RTX_FRAME_RELATED_P (insn) = 1;
12200 m->fs.sp_offset = UNITS_PER_WORD;
12201 m->fs.sp_valid = true;
12204 else
12206 /* SEH requires that the function end with (1) a stack adjustment
12207 if necessary, (2) a sequence of pops, and (3) a return or
12208 jump instruction. Prevent insns from the function body from
12209 being scheduled into this sequence. */
12210 if (TARGET_SEH)
12212 /* Prevent a catch region from being adjacent to the standard
12213 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
12214 several other flags that would be interesting to test are
12215 not yet set up. */
12216 if (flag_non_call_exceptions)
12217 emit_insn (gen_nops (const1_rtx));
12218 else
12219 emit_insn (gen_blockage ());
12222 /* First step is to deallocate the stack frame so that we can
12223 pop the registers. Also do it on SEH target for very large
12224 frame as the emitted instructions aren't allowed by the ABI in
12225 epilogues. */
12226 if (!m->fs.sp_valid
12227 || (TARGET_SEH
12228 && (m->fs.sp_offset - frame.reg_save_offset
12229 >= SEH_MAX_FRAME_SIZE)))
12231 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
12232 GEN_INT (m->fs.fp_offset
12233 - frame.reg_save_offset),
12234 style, false);
12236 else if (m->fs.sp_offset != frame.reg_save_offset)
12238 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12239 GEN_INT (m->fs.sp_offset
12240 - frame.reg_save_offset),
12241 style,
12242 m->fs.cfa_reg == stack_pointer_rtx);
12245 ix86_emit_restore_regs_using_pop ();
12248 /* If we used a stack pointer and haven't already got rid of it,
12249 then do so now. */
12250 if (m->fs.fp_valid)
12252 /* If the stack pointer is valid and pointing at the frame
12253 pointer store address, then we only need a pop. */
12254 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
12255 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12256 /* Leave results in shorter dependency chains on CPUs that are
12257 able to grok it fast. */
12258 else if (TARGET_USE_LEAVE
12259 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
12260 || !cfun->machine->use_fast_prologue_epilogue)
12261 ix86_emit_leave ();
12262 else
12264 pro_epilogue_adjust_stack (stack_pointer_rtx,
12265 hard_frame_pointer_rtx,
12266 const0_rtx, style, !using_drap);
12267 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12271 if (using_drap)
12273 int param_ptr_offset = UNITS_PER_WORD;
12274 rtx_insn *insn;
12276 gcc_assert (stack_realign_drap);
12278 if (ix86_static_chain_on_stack)
12279 param_ptr_offset += UNITS_PER_WORD;
12280 if (!call_used_regs[REGNO (crtl->drap_reg)])
12281 param_ptr_offset += UNITS_PER_WORD;
12283 insn = emit_insn (gen_rtx_SET
12284 (stack_pointer_rtx,
12285 gen_rtx_PLUS (Pmode,
12286 crtl->drap_reg,
12287 GEN_INT (-param_ptr_offset))));
12288 m->fs.cfa_reg = stack_pointer_rtx;
12289 m->fs.cfa_offset = param_ptr_offset;
12290 m->fs.sp_offset = param_ptr_offset;
12291 m->fs.realigned = false;
12293 add_reg_note (insn, REG_CFA_DEF_CFA,
12294 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12295 GEN_INT (param_ptr_offset)));
12296 RTX_FRAME_RELATED_P (insn) = 1;
12298 if (!call_used_regs[REGNO (crtl->drap_reg)])
12299 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
12302 /* At this point the stack pointer must be valid, and we must have
12303 restored all of the registers. We may not have deallocated the
12304 entire stack frame. We've delayed this until now because it may
12305 be possible to merge the local stack deallocation with the
12306 deallocation forced by ix86_static_chain_on_stack. */
12307 gcc_assert (m->fs.sp_valid);
12308 gcc_assert (!m->fs.fp_valid);
12309 gcc_assert (!m->fs.realigned);
12310 if (m->fs.sp_offset != UNITS_PER_WORD)
12312 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12313 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
12314 style, true);
12316 else
12317 ix86_add_queued_cfa_restore_notes (get_last_insn ());
12319 /* Sibcall epilogues don't want a return instruction. */
12320 if (style == 0)
12322 m->fs = frame_state_save;
12323 return;
12326 if (crtl->args.pops_args && crtl->args.size)
12328 rtx popc = GEN_INT (crtl->args.pops_args);
12330 /* i386 can only pop 64K bytes. If asked to pop more, pop return
12331 address, do explicit add, and jump indirectly to the caller. */
12333 if (crtl->args.pops_args >= 65536)
12335 rtx ecx = gen_rtx_REG (SImode, CX_REG);
12336 rtx_insn *insn;
12338 /* There is no "pascal" calling convention in any 64bit ABI. */
12339 gcc_assert (!TARGET_64BIT);
12341 insn = emit_insn (gen_pop (ecx));
12342 m->fs.cfa_offset -= UNITS_PER_WORD;
12343 m->fs.sp_offset -= UNITS_PER_WORD;
12345 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12346 x = gen_rtx_SET (stack_pointer_rtx, x);
12347 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12348 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
12349 RTX_FRAME_RELATED_P (insn) = 1;
12351 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12352 popc, -1, true);
12353 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
12355 else
12356 emit_jump_insn (gen_simple_return_pop_internal (popc));
12358 else
12359 emit_jump_insn (gen_simple_return_internal ());
12361 /* Restore the state back to the state from the prologue,
12362 so that it's correct for the next epilogue. */
12363 m->fs = frame_state_save;
12366 /* Reset from the function's potential modifications. */
12368 static void
12369 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
12371 if (pic_offset_table_rtx
12372 && !ix86_use_pseudo_pic_reg ())
12373 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
12374 #if TARGET_MACHO
12375 /* Mach-O doesn't support labels at the end of objects, so if
12376 it looks like we might want one, insert a NOP. */
12378 rtx_insn *insn = get_last_insn ();
12379 rtx_insn *deleted_debug_label = NULL;
12380 while (insn
12381 && NOTE_P (insn)
12382 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
12384 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
12385 notes only, instead set their CODE_LABEL_NUMBER to -1,
12386 otherwise there would be code generation differences
12387 in between -g and -g0. */
12388 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12389 deleted_debug_label = insn;
12390 insn = PREV_INSN (insn);
12392 if (insn
12393 && (LABEL_P (insn)
12394 || (NOTE_P (insn)
12395 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
12396 fputs ("\tnop\n", file);
12397 else if (deleted_debug_label)
12398 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
12399 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12400 CODE_LABEL_NUMBER (insn) = -1;
12402 #endif
12406 /* Return a scratch register to use in the split stack prologue. The
12407 split stack prologue is used for -fsplit-stack. It is the first
12408 instructions in the function, even before the regular prologue.
12409 The scratch register can be any caller-saved register which is not
12410 used for parameters or for the static chain. */
12412 static unsigned int
12413 split_stack_prologue_scratch_regno (void)
12415 if (TARGET_64BIT)
12416 return R11_REG;
12417 else
12419 bool is_fastcall, is_thiscall;
12420 int regparm;
12422 is_fastcall = (lookup_attribute ("fastcall",
12423 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12424 != NULL);
12425 is_thiscall = (lookup_attribute ("thiscall",
12426 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12427 != NULL);
12428 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12430 if (is_fastcall)
12432 if (DECL_STATIC_CHAIN (cfun->decl))
12434 sorry ("-fsplit-stack does not support fastcall with "
12435 "nested function");
12436 return INVALID_REGNUM;
12438 return AX_REG;
12440 else if (is_thiscall)
12442 if (!DECL_STATIC_CHAIN (cfun->decl))
12443 return DX_REG;
12444 return AX_REG;
12446 else if (regparm < 3)
12448 if (!DECL_STATIC_CHAIN (cfun->decl))
12449 return CX_REG;
12450 else
12452 if (regparm >= 2)
12454 sorry ("-fsplit-stack does not support 2 register "
12455 "parameters for a nested function");
12456 return INVALID_REGNUM;
12458 return DX_REG;
12461 else
12463 /* FIXME: We could make this work by pushing a register
12464 around the addition and comparison. */
12465 sorry ("-fsplit-stack does not support 3 register parameters");
12466 return INVALID_REGNUM;
12471 /* A SYMBOL_REF for the function which allocates new stackspace for
12472 -fsplit-stack. */
12474 static GTY(()) rtx split_stack_fn;
12476 /* A SYMBOL_REF for the more stack function when using the large
12477 model. */
12479 static GTY(()) rtx split_stack_fn_large;
12481 /* Handle -fsplit-stack. These are the first instructions in the
12482 function, even before the regular prologue. */
12484 void
12485 ix86_expand_split_stack_prologue (void)
12487 struct ix86_frame frame;
12488 HOST_WIDE_INT allocate;
12489 unsigned HOST_WIDE_INT args_size;
12490 rtx_code_label *label;
12491 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12492 rtx scratch_reg = NULL_RTX;
12493 rtx_code_label *varargs_label = NULL;
12494 rtx fn;
12496 gcc_assert (flag_split_stack && reload_completed);
12498 ix86_finalize_stack_realign_flags ();
12499 ix86_compute_frame_layout (&frame);
12500 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12502 /* This is the label we will branch to if we have enough stack
12503 space. We expect the basic block reordering pass to reverse this
12504 branch if optimizing, so that we branch in the unlikely case. */
12505 label = gen_label_rtx ();
12507 /* We need to compare the stack pointer minus the frame size with
12508 the stack boundary in the TCB. The stack boundary always gives
12509 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12510 can compare directly. Otherwise we need to do an addition. */
12512 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12513 UNSPEC_STACK_CHECK);
12514 limit = gen_rtx_CONST (Pmode, limit);
12515 limit = gen_rtx_MEM (Pmode, limit);
12516 if (allocate < SPLIT_STACK_AVAILABLE)
12517 current = stack_pointer_rtx;
12518 else
12520 unsigned int scratch_regno;
12521 rtx offset;
12523 /* We need a scratch register to hold the stack pointer minus
12524 the required frame size. Since this is the very start of the
12525 function, the scratch register can be any caller-saved
12526 register which is not used for parameters. */
12527 offset = GEN_INT (- allocate);
12528 scratch_regno = split_stack_prologue_scratch_regno ();
12529 if (scratch_regno == INVALID_REGNUM)
12530 return;
12531 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12532 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12534 /* We don't use ix86_gen_add3 in this case because it will
12535 want to split to lea, but when not optimizing the insn
12536 will not be split after this point. */
12537 emit_insn (gen_rtx_SET (scratch_reg,
12538 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12539 offset)));
12541 else
12543 emit_move_insn (scratch_reg, offset);
12544 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12545 stack_pointer_rtx));
12547 current = scratch_reg;
12550 ix86_expand_branch (GEU, current, limit, label);
12551 jump_insn = get_last_insn ();
12552 JUMP_LABEL (jump_insn) = label;
12554 /* Mark the jump as very likely to be taken. */
12555 add_int_reg_note (jump_insn, REG_BR_PROB,
12556 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12558 if (split_stack_fn == NULL_RTX)
12560 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12561 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12563 fn = split_stack_fn;
12565 /* Get more stack space. We pass in the desired stack space and the
12566 size of the arguments to copy to the new stack. In 32-bit mode
12567 we push the parameters; __morestack will return on a new stack
12568 anyhow. In 64-bit mode we pass the parameters in r10 and
12569 r11. */
12570 allocate_rtx = GEN_INT (allocate);
12571 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12572 call_fusage = NULL_RTX;
12573 if (TARGET_64BIT)
12575 rtx reg10, reg11;
12577 reg10 = gen_rtx_REG (Pmode, R10_REG);
12578 reg11 = gen_rtx_REG (Pmode, R11_REG);
12580 /* If this function uses a static chain, it will be in %r10.
12581 Preserve it across the call to __morestack. */
12582 if (DECL_STATIC_CHAIN (cfun->decl))
12584 rtx rax;
12586 rax = gen_rtx_REG (word_mode, AX_REG);
12587 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12588 use_reg (&call_fusage, rax);
12591 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12592 && !TARGET_PECOFF)
12594 HOST_WIDE_INT argval;
12596 gcc_assert (Pmode == DImode);
12597 /* When using the large model we need to load the address
12598 into a register, and we've run out of registers. So we
12599 switch to a different calling convention, and we call a
12600 different function: __morestack_large. We pass the
12601 argument size in the upper 32 bits of r10 and pass the
12602 frame size in the lower 32 bits. */
12603 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12604 gcc_assert ((args_size & 0xffffffff) == args_size);
12606 if (split_stack_fn_large == NULL_RTX)
12608 split_stack_fn_large =
12609 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12610 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12612 if (ix86_cmodel == CM_LARGE_PIC)
12614 rtx_code_label *label;
12615 rtx x;
12617 label = gen_label_rtx ();
12618 emit_label (label);
12619 LABEL_PRESERVE_P (label) = 1;
12620 emit_insn (gen_set_rip_rex64 (reg10, label));
12621 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12622 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12623 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12624 UNSPEC_GOT);
12625 x = gen_rtx_CONST (Pmode, x);
12626 emit_move_insn (reg11, x);
12627 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12628 x = gen_const_mem (Pmode, x);
12629 emit_move_insn (reg11, x);
12631 else
12632 emit_move_insn (reg11, split_stack_fn_large);
12634 fn = reg11;
12636 argval = ((args_size << 16) << 16) + allocate;
12637 emit_move_insn (reg10, GEN_INT (argval));
12639 else
12641 emit_move_insn (reg10, allocate_rtx);
12642 emit_move_insn (reg11, GEN_INT (args_size));
12643 use_reg (&call_fusage, reg11);
12646 use_reg (&call_fusage, reg10);
12648 else
12650 emit_insn (gen_push (GEN_INT (args_size)));
12651 emit_insn (gen_push (allocate_rtx));
12653 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12654 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12655 NULL_RTX, false);
12656 add_function_usage_to (call_insn, call_fusage);
12658 /* In order to make call/return prediction work right, we now need
12659 to execute a return instruction. See
12660 libgcc/config/i386/morestack.S for the details on how this works.
12662 For flow purposes gcc must not see this as a return
12663 instruction--we need control flow to continue at the subsequent
12664 label. Therefore, we use an unspec. */
12665 gcc_assert (crtl->args.pops_args < 65536);
12666 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12668 /* If we are in 64-bit mode and this function uses a static chain,
12669 we saved %r10 in %rax before calling _morestack. */
12670 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12671 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12672 gen_rtx_REG (word_mode, AX_REG));
12674 /* If this function calls va_start, we need to store a pointer to
12675 the arguments on the old stack, because they may not have been
12676 all copied to the new stack. At this point the old stack can be
12677 found at the frame pointer value used by __morestack, because
12678 __morestack has set that up before calling back to us. Here we
12679 store that pointer in a scratch register, and in
12680 ix86_expand_prologue we store the scratch register in a stack
12681 slot. */
12682 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12684 unsigned int scratch_regno;
12685 rtx frame_reg;
12686 int words;
12688 scratch_regno = split_stack_prologue_scratch_regno ();
12689 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12690 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12692 /* 64-bit:
12693 fp -> old fp value
12694 return address within this function
12695 return address of caller of this function
12696 stack arguments
12697 So we add three words to get to the stack arguments.
12699 32-bit:
12700 fp -> old fp value
12701 return address within this function
12702 first argument to __morestack
12703 second argument to __morestack
12704 return address of caller of this function
12705 stack arguments
12706 So we add five words to get to the stack arguments.
12708 words = TARGET_64BIT ? 3 : 5;
12709 emit_insn (gen_rtx_SET (scratch_reg,
12710 gen_rtx_PLUS (Pmode, frame_reg,
12711 GEN_INT (words * UNITS_PER_WORD))));
12713 varargs_label = gen_label_rtx ();
12714 emit_jump_insn (gen_jump (varargs_label));
12715 JUMP_LABEL (get_last_insn ()) = varargs_label;
12717 emit_barrier ();
12720 emit_label (label);
12721 LABEL_NUSES (label) = 1;
12723 /* If this function calls va_start, we now have to set the scratch
12724 register for the case where we do not call __morestack. In this
12725 case we need to set it based on the stack pointer. */
12726 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12728 emit_insn (gen_rtx_SET (scratch_reg,
12729 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12730 GEN_INT (UNITS_PER_WORD))));
12732 emit_label (varargs_label);
12733 LABEL_NUSES (varargs_label) = 1;
12737 /* We may have to tell the dataflow pass that the split stack prologue
12738 is initializing a scratch register. */
12740 static void
12741 ix86_live_on_entry (bitmap regs)
12743 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12745 gcc_assert (flag_split_stack);
12746 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12750 /* Extract the parts of an RTL expression that is a valid memory address
12751 for an instruction. Return 0 if the structure of the address is
12752 grossly off. Return -1 if the address contains ASHIFT, so it is not
12753 strictly valid, but still used for computing length of lea instruction. */
12756 ix86_decompose_address (rtx addr, struct ix86_address *out)
12758 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12759 rtx base_reg, index_reg;
12760 HOST_WIDE_INT scale = 1;
12761 rtx scale_rtx = NULL_RTX;
12762 rtx tmp;
12763 int retval = 1;
12764 enum ix86_address_seg seg = SEG_DEFAULT;
12766 /* Allow zero-extended SImode addresses,
12767 they will be emitted with addr32 prefix. */
12768 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12770 if (GET_CODE (addr) == ZERO_EXTEND
12771 && GET_MODE (XEXP (addr, 0)) == SImode)
12773 addr = XEXP (addr, 0);
12774 if (CONST_INT_P (addr))
12775 return 0;
12777 else if (GET_CODE (addr) == AND
12778 && const_32bit_mask (XEXP (addr, 1), DImode))
12780 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12781 if (addr == NULL_RTX)
12782 return 0;
12784 if (CONST_INT_P (addr))
12785 return 0;
12789 /* Allow SImode subregs of DImode addresses,
12790 they will be emitted with addr32 prefix. */
12791 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12793 if (GET_CODE (addr) == SUBREG
12794 && GET_MODE (SUBREG_REG (addr)) == DImode)
12796 addr = SUBREG_REG (addr);
12797 if (CONST_INT_P (addr))
12798 return 0;
12802 if (REG_P (addr))
12803 base = addr;
12804 else if (GET_CODE (addr) == SUBREG)
12806 if (REG_P (SUBREG_REG (addr)))
12807 base = addr;
12808 else
12809 return 0;
12811 else if (GET_CODE (addr) == PLUS)
12813 rtx addends[4], op;
12814 int n = 0, i;
12816 op = addr;
12819 if (n >= 4)
12820 return 0;
12821 addends[n++] = XEXP (op, 1);
12822 op = XEXP (op, 0);
12824 while (GET_CODE (op) == PLUS);
12825 if (n >= 4)
12826 return 0;
12827 addends[n] = op;
12829 for (i = n; i >= 0; --i)
12831 op = addends[i];
12832 switch (GET_CODE (op))
12834 case MULT:
12835 if (index)
12836 return 0;
12837 index = XEXP (op, 0);
12838 scale_rtx = XEXP (op, 1);
12839 break;
12841 case ASHIFT:
12842 if (index)
12843 return 0;
12844 index = XEXP (op, 0);
12845 tmp = XEXP (op, 1);
12846 if (!CONST_INT_P (tmp))
12847 return 0;
12848 scale = INTVAL (tmp);
12849 if ((unsigned HOST_WIDE_INT) scale > 3)
12850 return 0;
12851 scale = 1 << scale;
12852 break;
12854 case ZERO_EXTEND:
12855 op = XEXP (op, 0);
12856 if (GET_CODE (op) != UNSPEC)
12857 return 0;
12858 /* FALLTHRU */
12860 case UNSPEC:
12861 if (XINT (op, 1) == UNSPEC_TP
12862 && TARGET_TLS_DIRECT_SEG_REFS
12863 && seg == SEG_DEFAULT)
12864 seg = DEFAULT_TLS_SEG_REG;
12865 else
12866 return 0;
12867 break;
12869 case SUBREG:
12870 if (!REG_P (SUBREG_REG (op)))
12871 return 0;
12872 /* FALLTHRU */
12874 case REG:
12875 if (!base)
12876 base = op;
12877 else if (!index)
12878 index = op;
12879 else
12880 return 0;
12881 break;
12883 case CONST:
12884 case CONST_INT:
12885 case SYMBOL_REF:
12886 case LABEL_REF:
12887 if (disp)
12888 return 0;
12889 disp = op;
12890 break;
12892 default:
12893 return 0;
12897 else if (GET_CODE (addr) == MULT)
12899 index = XEXP (addr, 0); /* index*scale */
12900 scale_rtx = XEXP (addr, 1);
12902 else if (GET_CODE (addr) == ASHIFT)
12904 /* We're called for lea too, which implements ashift on occasion. */
12905 index = XEXP (addr, 0);
12906 tmp = XEXP (addr, 1);
12907 if (!CONST_INT_P (tmp))
12908 return 0;
12909 scale = INTVAL (tmp);
12910 if ((unsigned HOST_WIDE_INT) scale > 3)
12911 return 0;
12912 scale = 1 << scale;
12913 retval = -1;
12915 else
12916 disp = addr; /* displacement */
12918 if (index)
12920 if (REG_P (index))
12922 else if (GET_CODE (index) == SUBREG
12923 && REG_P (SUBREG_REG (index)))
12925 else
12926 return 0;
12929 /* Extract the integral value of scale. */
12930 if (scale_rtx)
12932 if (!CONST_INT_P (scale_rtx))
12933 return 0;
12934 scale = INTVAL (scale_rtx);
12937 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12938 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12940 /* Avoid useless 0 displacement. */
12941 if (disp == const0_rtx && (base || index))
12942 disp = NULL_RTX;
12944 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12945 if (base_reg && index_reg && scale == 1
12946 && (index_reg == arg_pointer_rtx
12947 || index_reg == frame_pointer_rtx
12948 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12950 std::swap (base, index);
12951 std::swap (base_reg, index_reg);
12954 /* Special case: %ebp cannot be encoded as a base without a displacement.
12955 Similarly %r13. */
12956 if (!disp
12957 && base_reg
12958 && (base_reg == hard_frame_pointer_rtx
12959 || base_reg == frame_pointer_rtx
12960 || base_reg == arg_pointer_rtx
12961 || (REG_P (base_reg)
12962 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12963 || REGNO (base_reg) == R13_REG))))
12964 disp = const0_rtx;
12966 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12967 Avoid this by transforming to [%esi+0].
12968 Reload calls address legitimization without cfun defined, so we need
12969 to test cfun for being non-NULL. */
12970 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12971 && base_reg && !index_reg && !disp
12972 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12973 disp = const0_rtx;
12975 /* Special case: encode reg+reg instead of reg*2. */
12976 if (!base && index && scale == 2)
12977 base = index, base_reg = index_reg, scale = 1;
12979 /* Special case: scaling cannot be encoded without base or displacement. */
12980 if (!base && !disp && index && scale != 1)
12981 disp = const0_rtx;
12983 out->base = base;
12984 out->index = index;
12985 out->disp = disp;
12986 out->scale = scale;
12987 out->seg = seg;
12989 return retval;
12992 /* Return cost of the memory address x.
12993 For i386, it is better to use a complex address than let gcc copy
12994 the address into a reg and make a new pseudo. But not if the address
12995 requires to two regs - that would mean more pseudos with longer
12996 lifetimes. */
12997 static int
12998 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
13000 struct ix86_address parts;
13001 int cost = 1;
13002 int ok = ix86_decompose_address (x, &parts);
13004 gcc_assert (ok);
13006 if (parts.base && GET_CODE (parts.base) == SUBREG)
13007 parts.base = SUBREG_REG (parts.base);
13008 if (parts.index && GET_CODE (parts.index) == SUBREG)
13009 parts.index = SUBREG_REG (parts.index);
13011 /* Attempt to minimize number of registers in the address by increasing
13012 address cost for each used register. We don't increase address cost
13013 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
13014 is not invariant itself it most likely means that base or index is not
13015 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
13016 which is not profitable for x86. */
13017 if (parts.base
13018 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
13019 && (current_pass->type == GIMPLE_PASS
13020 || !pic_offset_table_rtx
13021 || !REG_P (parts.base)
13022 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
13023 cost++;
13025 if (parts.index
13026 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
13027 && (current_pass->type == GIMPLE_PASS
13028 || !pic_offset_table_rtx
13029 || !REG_P (parts.index)
13030 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
13031 cost++;
13033 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
13034 since it's predecode logic can't detect the length of instructions
13035 and it degenerates to vector decoded. Increase cost of such
13036 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
13037 to split such addresses or even refuse such addresses at all.
13039 Following addressing modes are affected:
13040 [base+scale*index]
13041 [scale*index+disp]
13042 [base+index]
13044 The first and last case may be avoidable by explicitly coding the zero in
13045 memory address, but I don't have AMD-K6 machine handy to check this
13046 theory. */
13048 if (TARGET_K6
13049 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
13050 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
13051 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
13052 cost += 10;
13054 return cost;
13057 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
13058 this is used for to form addresses to local data when -fPIC is in
13059 use. */
13061 static bool
13062 darwin_local_data_pic (rtx disp)
13064 return (GET_CODE (disp) == UNSPEC
13065 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
13068 /* Determine if a given RTX is a valid constant. We already know this
13069 satisfies CONSTANT_P. */
13071 static bool
13072 ix86_legitimate_constant_p (machine_mode, rtx x)
13074 /* Pointer bounds constants are not valid. */
13075 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
13076 return false;
13078 switch (GET_CODE (x))
13080 case CONST:
13081 x = XEXP (x, 0);
13083 if (GET_CODE (x) == PLUS)
13085 if (!CONST_INT_P (XEXP (x, 1)))
13086 return false;
13087 x = XEXP (x, 0);
13090 if (TARGET_MACHO && darwin_local_data_pic (x))
13091 return true;
13093 /* Only some unspecs are valid as "constants". */
13094 if (GET_CODE (x) == UNSPEC)
13095 switch (XINT (x, 1))
13097 case UNSPEC_GOT:
13098 case UNSPEC_GOTOFF:
13099 case UNSPEC_PLTOFF:
13100 return TARGET_64BIT;
13101 case UNSPEC_TPOFF:
13102 case UNSPEC_NTPOFF:
13103 x = XVECEXP (x, 0, 0);
13104 return (GET_CODE (x) == SYMBOL_REF
13105 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13106 case UNSPEC_DTPOFF:
13107 x = XVECEXP (x, 0, 0);
13108 return (GET_CODE (x) == SYMBOL_REF
13109 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
13110 default:
13111 return false;
13114 /* We must have drilled down to a symbol. */
13115 if (GET_CODE (x) == LABEL_REF)
13116 return true;
13117 if (GET_CODE (x) != SYMBOL_REF)
13118 return false;
13119 /* FALLTHRU */
13121 case SYMBOL_REF:
13122 /* TLS symbols are never valid. */
13123 if (SYMBOL_REF_TLS_MODEL (x))
13124 return false;
13126 /* DLLIMPORT symbols are never valid. */
13127 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13128 && SYMBOL_REF_DLLIMPORT_P (x))
13129 return false;
13131 #if TARGET_MACHO
13132 /* mdynamic-no-pic */
13133 if (MACHO_DYNAMIC_NO_PIC_P)
13134 return machopic_symbol_defined_p (x);
13135 #endif
13136 break;
13138 case CONST_WIDE_INT:
13139 if (!TARGET_64BIT && !standard_sse_constant_p (x))
13140 return false;
13141 break;
13143 case CONST_VECTOR:
13144 if (!standard_sse_constant_p (x))
13145 return false;
13147 default:
13148 break;
13151 /* Otherwise we handle everything else in the move patterns. */
13152 return true;
13155 /* Determine if it's legal to put X into the constant pool. This
13156 is not possible for the address of thread-local symbols, which
13157 is checked above. */
13159 static bool
13160 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
13162 /* We can always put integral constants and vectors in memory. */
13163 switch (GET_CODE (x))
13165 case CONST_INT:
13166 case CONST_WIDE_INT:
13167 case CONST_DOUBLE:
13168 case CONST_VECTOR:
13169 return false;
13171 default:
13172 break;
13174 return !ix86_legitimate_constant_p (mode, x);
13177 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
13178 otherwise zero. */
13180 static bool
13181 is_imported_p (rtx x)
13183 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
13184 || GET_CODE (x) != SYMBOL_REF)
13185 return false;
13187 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
13191 /* Nonzero if the constant value X is a legitimate general operand
13192 when generating PIC code. It is given that flag_pic is on and
13193 that X satisfies CONSTANT_P. */
13195 bool
13196 legitimate_pic_operand_p (rtx x)
13198 rtx inner;
13200 switch (GET_CODE (x))
13202 case CONST:
13203 inner = XEXP (x, 0);
13204 if (GET_CODE (inner) == PLUS
13205 && CONST_INT_P (XEXP (inner, 1)))
13206 inner = XEXP (inner, 0);
13208 /* Only some unspecs are valid as "constants". */
13209 if (GET_CODE (inner) == UNSPEC)
13210 switch (XINT (inner, 1))
13212 case UNSPEC_GOT:
13213 case UNSPEC_GOTOFF:
13214 case UNSPEC_PLTOFF:
13215 return TARGET_64BIT;
13216 case UNSPEC_TPOFF:
13217 x = XVECEXP (inner, 0, 0);
13218 return (GET_CODE (x) == SYMBOL_REF
13219 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13220 case UNSPEC_MACHOPIC_OFFSET:
13221 return legitimate_pic_address_disp_p (x);
13222 default:
13223 return false;
13225 /* FALLTHRU */
13227 case SYMBOL_REF:
13228 case LABEL_REF:
13229 return legitimate_pic_address_disp_p (x);
13231 default:
13232 return true;
13236 /* Determine if a given CONST RTX is a valid memory displacement
13237 in PIC mode. */
13239 bool
13240 legitimate_pic_address_disp_p (rtx disp)
13242 bool saw_plus;
13244 /* In 64bit mode we can allow direct addresses of symbols and labels
13245 when they are not dynamic symbols. */
13246 if (TARGET_64BIT)
13248 rtx op0 = disp, op1;
13250 switch (GET_CODE (disp))
13252 case LABEL_REF:
13253 return true;
13255 case CONST:
13256 if (GET_CODE (XEXP (disp, 0)) != PLUS)
13257 break;
13258 op0 = XEXP (XEXP (disp, 0), 0);
13259 op1 = XEXP (XEXP (disp, 0), 1);
13260 if (!CONST_INT_P (op1)
13261 || INTVAL (op1) >= 16*1024*1024
13262 || INTVAL (op1) < -16*1024*1024)
13263 break;
13264 if (GET_CODE (op0) == LABEL_REF)
13265 return true;
13266 if (GET_CODE (op0) == CONST
13267 && GET_CODE (XEXP (op0, 0)) == UNSPEC
13268 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
13269 return true;
13270 if (GET_CODE (op0) == UNSPEC
13271 && XINT (op0, 1) == UNSPEC_PCREL)
13272 return true;
13273 if (GET_CODE (op0) != SYMBOL_REF)
13274 break;
13275 /* FALLTHRU */
13277 case SYMBOL_REF:
13278 /* TLS references should always be enclosed in UNSPEC.
13279 The dllimported symbol needs always to be resolved. */
13280 if (SYMBOL_REF_TLS_MODEL (op0)
13281 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
13282 return false;
13284 if (TARGET_PECOFF)
13286 if (is_imported_p (op0))
13287 return true;
13289 if (SYMBOL_REF_FAR_ADDR_P (op0)
13290 || !SYMBOL_REF_LOCAL_P (op0))
13291 break;
13293 /* Function-symbols need to be resolved only for
13294 large-model.
13295 For the small-model we don't need to resolve anything
13296 here. */
13297 if ((ix86_cmodel != CM_LARGE_PIC
13298 && SYMBOL_REF_FUNCTION_P (op0))
13299 || ix86_cmodel == CM_SMALL_PIC)
13300 return true;
13301 /* Non-external symbols don't need to be resolved for
13302 large, and medium-model. */
13303 if ((ix86_cmodel == CM_LARGE_PIC
13304 || ix86_cmodel == CM_MEDIUM_PIC)
13305 && !SYMBOL_REF_EXTERNAL_P (op0))
13306 return true;
13308 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
13309 && (SYMBOL_REF_LOCAL_P (op0)
13310 || (HAVE_LD_PIE_COPYRELOC
13311 && flag_pie
13312 && !SYMBOL_REF_WEAK (op0)
13313 && !SYMBOL_REF_FUNCTION_P (op0)))
13314 && ix86_cmodel != CM_LARGE_PIC)
13315 return true;
13316 break;
13318 default:
13319 break;
13322 if (GET_CODE (disp) != CONST)
13323 return false;
13324 disp = XEXP (disp, 0);
13326 if (TARGET_64BIT)
13328 /* We are unsafe to allow PLUS expressions. This limit allowed distance
13329 of GOT tables. We should not need these anyway. */
13330 if (GET_CODE (disp) != UNSPEC
13331 || (XINT (disp, 1) != UNSPEC_GOTPCREL
13332 && XINT (disp, 1) != UNSPEC_GOTOFF
13333 && XINT (disp, 1) != UNSPEC_PCREL
13334 && XINT (disp, 1) != UNSPEC_PLTOFF))
13335 return false;
13337 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
13338 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
13339 return false;
13340 return true;
13343 saw_plus = false;
13344 if (GET_CODE (disp) == PLUS)
13346 if (!CONST_INT_P (XEXP (disp, 1)))
13347 return false;
13348 disp = XEXP (disp, 0);
13349 saw_plus = true;
13352 if (TARGET_MACHO && darwin_local_data_pic (disp))
13353 return true;
13355 if (GET_CODE (disp) != UNSPEC)
13356 return false;
13358 switch (XINT (disp, 1))
13360 case UNSPEC_GOT:
13361 if (saw_plus)
13362 return false;
13363 /* We need to check for both symbols and labels because VxWorks loads
13364 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
13365 details. */
13366 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13367 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
13368 case UNSPEC_GOTOFF:
13369 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
13370 While ABI specify also 32bit relocation but we don't produce it in
13371 small PIC model at all. */
13372 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13373 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
13374 && !TARGET_64BIT)
13375 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
13376 return false;
13377 case UNSPEC_GOTTPOFF:
13378 case UNSPEC_GOTNTPOFF:
13379 case UNSPEC_INDNTPOFF:
13380 if (saw_plus)
13381 return false;
13382 disp = XVECEXP (disp, 0, 0);
13383 return (GET_CODE (disp) == SYMBOL_REF
13384 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
13385 case UNSPEC_NTPOFF:
13386 disp = XVECEXP (disp, 0, 0);
13387 return (GET_CODE (disp) == SYMBOL_REF
13388 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
13389 case UNSPEC_DTPOFF:
13390 disp = XVECEXP (disp, 0, 0);
13391 return (GET_CODE (disp) == SYMBOL_REF
13392 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
13395 return false;
13398 /* Determine if op is suitable RTX for an address register.
13399 Return naked register if a register or a register subreg is
13400 found, otherwise return NULL_RTX. */
13402 static rtx
13403 ix86_validate_address_register (rtx op)
13405 machine_mode mode = GET_MODE (op);
13407 /* Only SImode or DImode registers can form the address. */
13408 if (mode != SImode && mode != DImode)
13409 return NULL_RTX;
13411 if (REG_P (op))
13412 return op;
13413 else if (GET_CODE (op) == SUBREG)
13415 rtx reg = SUBREG_REG (op);
13417 if (!REG_P (reg))
13418 return NULL_RTX;
13420 mode = GET_MODE (reg);
13422 /* Don't allow SUBREGs that span more than a word. It can
13423 lead to spill failures when the register is one word out
13424 of a two word structure. */
13425 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13426 return NULL_RTX;
13428 /* Allow only SUBREGs of non-eliminable hard registers. */
13429 if (register_no_elim_operand (reg, mode))
13430 return reg;
13433 /* Op is not a register. */
13434 return NULL_RTX;
13437 /* Recognizes RTL expressions that are valid memory addresses for an
13438 instruction. The MODE argument is the machine mode for the MEM
13439 expression that wants to use this address.
13441 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
13442 convert common non-canonical forms to canonical form so that they will
13443 be recognized. */
13445 static bool
13446 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
13448 struct ix86_address parts;
13449 rtx base, index, disp;
13450 HOST_WIDE_INT scale;
13451 enum ix86_address_seg seg;
13453 if (ix86_decompose_address (addr, &parts) <= 0)
13454 /* Decomposition failed. */
13455 return false;
13457 base = parts.base;
13458 index = parts.index;
13459 disp = parts.disp;
13460 scale = parts.scale;
13461 seg = parts.seg;
13463 /* Validate base register. */
13464 if (base)
13466 rtx reg = ix86_validate_address_register (base);
13468 if (reg == NULL_RTX)
13469 return false;
13471 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13472 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13473 /* Base is not valid. */
13474 return false;
13477 /* Validate index register. */
13478 if (index)
13480 rtx reg = ix86_validate_address_register (index);
13482 if (reg == NULL_RTX)
13483 return false;
13485 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13486 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13487 /* Index is not valid. */
13488 return false;
13491 /* Index and base should have the same mode. */
13492 if (base && index
13493 && GET_MODE (base) != GET_MODE (index))
13494 return false;
13496 /* Address override works only on the (%reg) part of %fs:(%reg). */
13497 if (seg != SEG_DEFAULT
13498 && ((base && GET_MODE (base) != word_mode)
13499 || (index && GET_MODE (index) != word_mode)))
13500 return false;
13502 /* Validate scale factor. */
13503 if (scale != 1)
13505 if (!index)
13506 /* Scale without index. */
13507 return false;
13509 if (scale != 2 && scale != 4 && scale != 8)
13510 /* Scale is not a valid multiplier. */
13511 return false;
13514 /* Validate displacement. */
13515 if (disp)
13517 if (GET_CODE (disp) == CONST
13518 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13519 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13520 switch (XINT (XEXP (disp, 0), 1))
13522 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13523 used. While ABI specify also 32bit relocations, we don't produce
13524 them at all and use IP relative instead. */
13525 case UNSPEC_GOT:
13526 case UNSPEC_GOTOFF:
13527 gcc_assert (flag_pic);
13528 if (!TARGET_64BIT)
13529 goto is_legitimate_pic;
13531 /* 64bit address unspec. */
13532 return false;
13534 case UNSPEC_GOTPCREL:
13535 case UNSPEC_PCREL:
13536 gcc_assert (flag_pic);
13537 goto is_legitimate_pic;
13539 case UNSPEC_GOTTPOFF:
13540 case UNSPEC_GOTNTPOFF:
13541 case UNSPEC_INDNTPOFF:
13542 case UNSPEC_NTPOFF:
13543 case UNSPEC_DTPOFF:
13544 break;
13546 case UNSPEC_STACK_CHECK:
13547 gcc_assert (flag_split_stack);
13548 break;
13550 default:
13551 /* Invalid address unspec. */
13552 return false;
13555 else if (SYMBOLIC_CONST (disp)
13556 && (flag_pic
13557 || (TARGET_MACHO
13558 #if TARGET_MACHO
13559 && MACHOPIC_INDIRECT
13560 && !machopic_operand_p (disp)
13561 #endif
13565 is_legitimate_pic:
13566 if (TARGET_64BIT && (index || base))
13568 /* foo@dtpoff(%rX) is ok. */
13569 if (GET_CODE (disp) != CONST
13570 || GET_CODE (XEXP (disp, 0)) != PLUS
13571 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13572 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13573 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13574 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13575 /* Non-constant pic memory reference. */
13576 return false;
13578 else if ((!TARGET_MACHO || flag_pic)
13579 && ! legitimate_pic_address_disp_p (disp))
13580 /* Displacement is an invalid pic construct. */
13581 return false;
13582 #if TARGET_MACHO
13583 else if (MACHO_DYNAMIC_NO_PIC_P
13584 && !ix86_legitimate_constant_p (Pmode, disp))
13585 /* displacment must be referenced via non_lazy_pointer */
13586 return false;
13587 #endif
13589 /* This code used to verify that a symbolic pic displacement
13590 includes the pic_offset_table_rtx register.
13592 While this is good idea, unfortunately these constructs may
13593 be created by "adds using lea" optimization for incorrect
13594 code like:
13596 int a;
13597 int foo(int i)
13599 return *(&a+i);
13602 This code is nonsensical, but results in addressing
13603 GOT table with pic_offset_table_rtx base. We can't
13604 just refuse it easily, since it gets matched by
13605 "addsi3" pattern, that later gets split to lea in the
13606 case output register differs from input. While this
13607 can be handled by separate addsi pattern for this case
13608 that never results in lea, this seems to be easier and
13609 correct fix for crash to disable this test. */
13611 else if (GET_CODE (disp) != LABEL_REF
13612 && !CONST_INT_P (disp)
13613 && (GET_CODE (disp) != CONST
13614 || !ix86_legitimate_constant_p (Pmode, disp))
13615 && (GET_CODE (disp) != SYMBOL_REF
13616 || !ix86_legitimate_constant_p (Pmode, disp)))
13617 /* Displacement is not constant. */
13618 return false;
13619 else if (TARGET_64BIT
13620 && !x86_64_immediate_operand (disp, VOIDmode))
13621 /* Displacement is out of range. */
13622 return false;
13623 /* In x32 mode, constant addresses are sign extended to 64bit, so
13624 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13625 else if (TARGET_X32 && !(index || base)
13626 && CONST_INT_P (disp)
13627 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13628 return false;
13631 /* Everything looks valid. */
13632 return true;
13635 /* Determine if a given RTX is a valid constant address. */
13637 bool
13638 constant_address_p (rtx x)
13640 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13643 /* Return a unique alias set for the GOT. */
13645 static alias_set_type
13646 ix86_GOT_alias_set (void)
13648 static alias_set_type set = -1;
13649 if (set == -1)
13650 set = new_alias_set ();
13651 return set;
13654 /* Return a legitimate reference for ORIG (an address) using the
13655 register REG. If REG is 0, a new pseudo is generated.
13657 There are two types of references that must be handled:
13659 1. Global data references must load the address from the GOT, via
13660 the PIC reg. An insn is emitted to do this load, and the reg is
13661 returned.
13663 2. Static data references, constant pool addresses, and code labels
13664 compute the address as an offset from the GOT, whose base is in
13665 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13666 differentiate them from global data objects. The returned
13667 address is the PIC reg + an unspec constant.
13669 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13670 reg also appears in the address. */
13672 static rtx
13673 legitimize_pic_address (rtx orig, rtx reg)
13675 rtx addr = orig;
13676 rtx new_rtx = orig;
13678 #if TARGET_MACHO
13679 if (TARGET_MACHO && !TARGET_64BIT)
13681 if (reg == 0)
13682 reg = gen_reg_rtx (Pmode);
13683 /* Use the generic Mach-O PIC machinery. */
13684 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13686 #endif
13688 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13690 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13691 if (tmp)
13692 return tmp;
13695 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13696 new_rtx = addr;
13697 else if (TARGET_64BIT && !TARGET_PECOFF
13698 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13700 rtx tmpreg;
13701 /* This symbol may be referenced via a displacement from the PIC
13702 base address (@GOTOFF). */
13704 if (GET_CODE (addr) == CONST)
13705 addr = XEXP (addr, 0);
13706 if (GET_CODE (addr) == PLUS)
13708 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13709 UNSPEC_GOTOFF);
13710 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13712 else
13713 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13714 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13715 if (!reg)
13716 tmpreg = gen_reg_rtx (Pmode);
13717 else
13718 tmpreg = reg;
13719 emit_move_insn (tmpreg, new_rtx);
13721 if (reg != 0)
13723 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13724 tmpreg, 1, OPTAB_DIRECT);
13725 new_rtx = reg;
13727 else
13728 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13730 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13732 /* This symbol may be referenced via a displacement from the PIC
13733 base address (@GOTOFF). */
13735 if (GET_CODE (addr) == CONST)
13736 addr = XEXP (addr, 0);
13737 if (GET_CODE (addr) == PLUS)
13739 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13740 UNSPEC_GOTOFF);
13741 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13743 else
13744 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13745 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13746 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13748 if (reg != 0)
13750 emit_move_insn (reg, new_rtx);
13751 new_rtx = reg;
13754 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13755 /* We can't use @GOTOFF for text labels on VxWorks;
13756 see gotoff_operand. */
13757 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13759 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13760 if (tmp)
13761 return tmp;
13763 /* For x64 PE-COFF there is no GOT table. So we use address
13764 directly. */
13765 if (TARGET_64BIT && TARGET_PECOFF)
13767 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13768 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13770 if (reg == 0)
13771 reg = gen_reg_rtx (Pmode);
13772 emit_move_insn (reg, new_rtx);
13773 new_rtx = reg;
13775 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13777 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13778 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13779 new_rtx = gen_const_mem (Pmode, new_rtx);
13780 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13782 if (reg == 0)
13783 reg = gen_reg_rtx (Pmode);
13784 /* Use directly gen_movsi, otherwise the address is loaded
13785 into register for CSE. We don't want to CSE this addresses,
13786 instead we CSE addresses from the GOT table, so skip this. */
13787 emit_insn (gen_movsi (reg, new_rtx));
13788 new_rtx = reg;
13790 else
13792 /* This symbol must be referenced via a load from the
13793 Global Offset Table (@GOT). */
13795 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13796 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13797 if (TARGET_64BIT)
13798 new_rtx = force_reg (Pmode, new_rtx);
13799 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13800 new_rtx = gen_const_mem (Pmode, new_rtx);
13801 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13803 if (reg == 0)
13804 reg = gen_reg_rtx (Pmode);
13805 emit_move_insn (reg, new_rtx);
13806 new_rtx = reg;
13809 else
13811 if (CONST_INT_P (addr)
13812 && !x86_64_immediate_operand (addr, VOIDmode))
13814 if (reg)
13816 emit_move_insn (reg, addr);
13817 new_rtx = reg;
13819 else
13820 new_rtx = force_reg (Pmode, addr);
13822 else if (GET_CODE (addr) == CONST)
13824 addr = XEXP (addr, 0);
13826 /* We must match stuff we generate before. Assume the only
13827 unspecs that can get here are ours. Not that we could do
13828 anything with them anyway.... */
13829 if (GET_CODE (addr) == UNSPEC
13830 || (GET_CODE (addr) == PLUS
13831 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13832 return orig;
13833 gcc_assert (GET_CODE (addr) == PLUS);
13835 if (GET_CODE (addr) == PLUS)
13837 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13839 /* Check first to see if this is a constant offset from a @GOTOFF
13840 symbol reference. */
13841 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13842 && CONST_INT_P (op1))
13844 if (!TARGET_64BIT)
13846 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13847 UNSPEC_GOTOFF);
13848 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13849 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13850 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13852 if (reg != 0)
13854 emit_move_insn (reg, new_rtx);
13855 new_rtx = reg;
13858 else
13860 if (INTVAL (op1) < -16*1024*1024
13861 || INTVAL (op1) >= 16*1024*1024)
13863 if (!x86_64_immediate_operand (op1, Pmode))
13864 op1 = force_reg (Pmode, op1);
13865 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13869 else
13871 rtx base = legitimize_pic_address (op0, reg);
13872 machine_mode mode = GET_MODE (base);
13873 new_rtx
13874 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13876 if (CONST_INT_P (new_rtx))
13878 if (INTVAL (new_rtx) < -16*1024*1024
13879 || INTVAL (new_rtx) >= 16*1024*1024)
13881 if (!x86_64_immediate_operand (new_rtx, mode))
13882 new_rtx = force_reg (mode, new_rtx);
13883 new_rtx
13884 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13886 else
13887 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13889 else
13891 /* For %rip addressing, we have to use just disp32, not
13892 base nor index. */
13893 if (TARGET_64BIT
13894 && (GET_CODE (base) == SYMBOL_REF
13895 || GET_CODE (base) == LABEL_REF))
13896 base = force_reg (mode, base);
13897 if (GET_CODE (new_rtx) == PLUS
13898 && CONSTANT_P (XEXP (new_rtx, 1)))
13900 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13901 new_rtx = XEXP (new_rtx, 1);
13903 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13908 return new_rtx;
13911 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13913 static rtx
13914 get_thread_pointer (machine_mode tp_mode, bool to_reg)
13916 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13918 if (GET_MODE (tp) != tp_mode)
13920 gcc_assert (GET_MODE (tp) == SImode);
13921 gcc_assert (tp_mode == DImode);
13923 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13926 if (to_reg)
13927 tp = copy_to_mode_reg (tp_mode, tp);
13929 return tp;
13932 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13934 static GTY(()) rtx ix86_tls_symbol;
13936 static rtx
13937 ix86_tls_get_addr (void)
13939 if (!ix86_tls_symbol)
13941 const char *sym
13942 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13943 ? "___tls_get_addr" : "__tls_get_addr");
13945 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13948 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13950 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13951 UNSPEC_PLTOFF);
13952 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13953 gen_rtx_CONST (Pmode, unspec));
13956 return ix86_tls_symbol;
13959 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13961 static GTY(()) rtx ix86_tls_module_base_symbol;
13964 ix86_tls_module_base (void)
13966 if (!ix86_tls_module_base_symbol)
13968 ix86_tls_module_base_symbol
13969 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13971 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13972 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13975 return ix86_tls_module_base_symbol;
13978 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13979 false if we expect this to be used for a memory address and true if
13980 we expect to load the address into a register. */
13982 static rtx
13983 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13985 rtx dest, base, off;
13986 rtx pic = NULL_RTX, tp = NULL_RTX;
13987 machine_mode tp_mode = Pmode;
13988 int type;
13990 /* Fall back to global dynamic model if tool chain cannot support local
13991 dynamic. */
13992 if (TARGET_SUN_TLS && !TARGET_64BIT
13993 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13994 && model == TLS_MODEL_LOCAL_DYNAMIC)
13995 model = TLS_MODEL_GLOBAL_DYNAMIC;
13997 switch (model)
13999 case TLS_MODEL_GLOBAL_DYNAMIC:
14000 dest = gen_reg_rtx (Pmode);
14002 if (!TARGET_64BIT)
14004 if (flag_pic && !TARGET_PECOFF)
14005 pic = pic_offset_table_rtx;
14006 else
14008 pic = gen_reg_rtx (Pmode);
14009 emit_insn (gen_set_got (pic));
14013 if (TARGET_GNU2_TLS)
14015 if (TARGET_64BIT)
14016 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
14017 else
14018 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
14020 tp = get_thread_pointer (Pmode, true);
14021 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
14023 if (GET_MODE (x) != Pmode)
14024 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14026 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14028 else
14030 rtx caddr = ix86_tls_get_addr ();
14032 if (TARGET_64BIT)
14034 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14035 rtx_insn *insns;
14037 start_sequence ();
14038 emit_call_insn
14039 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
14040 insns = get_insns ();
14041 end_sequence ();
14043 if (GET_MODE (x) != Pmode)
14044 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14046 RTL_CONST_CALL_P (insns) = 1;
14047 emit_libcall_block (insns, dest, rax, x);
14049 else
14050 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
14052 break;
14054 case TLS_MODEL_LOCAL_DYNAMIC:
14055 base = gen_reg_rtx (Pmode);
14057 if (!TARGET_64BIT)
14059 if (flag_pic)
14060 pic = pic_offset_table_rtx;
14061 else
14063 pic = gen_reg_rtx (Pmode);
14064 emit_insn (gen_set_got (pic));
14068 if (TARGET_GNU2_TLS)
14070 rtx tmp = ix86_tls_module_base ();
14072 if (TARGET_64BIT)
14073 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
14074 else
14075 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
14077 tp = get_thread_pointer (Pmode, true);
14078 set_unique_reg_note (get_last_insn (), REG_EQUAL,
14079 gen_rtx_MINUS (Pmode, tmp, tp));
14081 else
14083 rtx caddr = ix86_tls_get_addr ();
14085 if (TARGET_64BIT)
14087 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14088 rtx_insn *insns;
14089 rtx eqv;
14091 start_sequence ();
14092 emit_call_insn
14093 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
14094 insns = get_insns ();
14095 end_sequence ();
14097 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
14098 share the LD_BASE result with other LD model accesses. */
14099 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
14100 UNSPEC_TLS_LD_BASE);
14102 RTL_CONST_CALL_P (insns) = 1;
14103 emit_libcall_block (insns, base, rax, eqv);
14105 else
14106 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
14109 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
14110 off = gen_rtx_CONST (Pmode, off);
14112 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
14114 if (TARGET_GNU2_TLS)
14116 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
14118 if (GET_MODE (x) != Pmode)
14119 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14121 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14123 break;
14125 case TLS_MODEL_INITIAL_EXEC:
14126 if (TARGET_64BIT)
14128 if (TARGET_SUN_TLS && !TARGET_X32)
14130 /* The Sun linker took the AMD64 TLS spec literally
14131 and can only handle %rax as destination of the
14132 initial executable code sequence. */
14134 dest = gen_reg_rtx (DImode);
14135 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
14136 return dest;
14139 /* Generate DImode references to avoid %fs:(%reg32)
14140 problems and linker IE->LE relaxation bug. */
14141 tp_mode = DImode;
14142 pic = NULL;
14143 type = UNSPEC_GOTNTPOFF;
14145 else if (flag_pic)
14147 pic = pic_offset_table_rtx;
14148 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
14150 else if (!TARGET_ANY_GNU_TLS)
14152 pic = gen_reg_rtx (Pmode);
14153 emit_insn (gen_set_got (pic));
14154 type = UNSPEC_GOTTPOFF;
14156 else
14158 pic = NULL;
14159 type = UNSPEC_INDNTPOFF;
14162 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
14163 off = gen_rtx_CONST (tp_mode, off);
14164 if (pic)
14165 off = gen_rtx_PLUS (tp_mode, pic, off);
14166 off = gen_const_mem (tp_mode, off);
14167 set_mem_alias_set (off, ix86_GOT_alias_set ());
14169 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14171 base = get_thread_pointer (tp_mode,
14172 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14173 off = force_reg (tp_mode, off);
14174 return gen_rtx_PLUS (tp_mode, base, off);
14176 else
14178 base = get_thread_pointer (Pmode, true);
14179 dest = gen_reg_rtx (Pmode);
14180 emit_insn (ix86_gen_sub3 (dest, base, off));
14182 break;
14184 case TLS_MODEL_LOCAL_EXEC:
14185 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14186 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14187 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
14188 off = gen_rtx_CONST (Pmode, off);
14190 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14192 base = get_thread_pointer (Pmode,
14193 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14194 return gen_rtx_PLUS (Pmode, base, off);
14196 else
14198 base = get_thread_pointer (Pmode, true);
14199 dest = gen_reg_rtx (Pmode);
14200 emit_insn (ix86_gen_sub3 (dest, base, off));
14202 break;
14204 default:
14205 gcc_unreachable ();
14208 return dest;
14211 /* Create or return the unique __imp_DECL dllimport symbol corresponding
14212 to symbol DECL if BEIMPORT is true. Otherwise create or return the
14213 unique refptr-DECL symbol corresponding to symbol DECL. */
14215 struct dllimport_hasher : ggc_cache_hasher<tree_map *>
14217 static inline hashval_t hash (tree_map *m) { return m->hash; }
14218 static inline bool
14219 equal (tree_map *a, tree_map *b)
14221 return a->base.from == b->base.from;
14224 static int
14225 keep_cache_entry (tree_map *&m)
14227 return ggc_marked_p (m->base.from);
14231 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
14233 static tree
14234 get_dllimport_decl (tree decl, bool beimport)
14236 struct tree_map *h, in;
14237 const char *name;
14238 const char *prefix;
14239 size_t namelen, prefixlen;
14240 char *imp_name;
14241 tree to;
14242 rtx rtl;
14244 if (!dllimport_map)
14245 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
14247 in.hash = htab_hash_pointer (decl);
14248 in.base.from = decl;
14249 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
14250 h = *loc;
14251 if (h)
14252 return h->to;
14254 *loc = h = ggc_alloc<tree_map> ();
14255 h->hash = in.hash;
14256 h->base.from = decl;
14257 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
14258 VAR_DECL, NULL, ptr_type_node);
14259 DECL_ARTIFICIAL (to) = 1;
14260 DECL_IGNORED_P (to) = 1;
14261 DECL_EXTERNAL (to) = 1;
14262 TREE_READONLY (to) = 1;
14264 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
14265 name = targetm.strip_name_encoding (name);
14266 if (beimport)
14267 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
14268 ? "*__imp_" : "*__imp__";
14269 else
14270 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
14271 namelen = strlen (name);
14272 prefixlen = strlen (prefix);
14273 imp_name = (char *) alloca (namelen + prefixlen + 1);
14274 memcpy (imp_name, prefix, prefixlen);
14275 memcpy (imp_name + prefixlen, name, namelen + 1);
14277 name = ggc_alloc_string (imp_name, namelen + prefixlen);
14278 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
14279 SET_SYMBOL_REF_DECL (rtl, to);
14280 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
14281 if (!beimport)
14283 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
14284 #ifdef SUB_TARGET_RECORD_STUB
14285 SUB_TARGET_RECORD_STUB (name);
14286 #endif
14289 rtl = gen_const_mem (Pmode, rtl);
14290 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
14292 SET_DECL_RTL (to, rtl);
14293 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
14295 return to;
14298 /* Expand SYMBOL into its corresponding far-addresse symbol.
14299 WANT_REG is true if we require the result be a register. */
14301 static rtx
14302 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
14304 tree imp_decl;
14305 rtx x;
14307 gcc_assert (SYMBOL_REF_DECL (symbol));
14308 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
14310 x = DECL_RTL (imp_decl);
14311 if (want_reg)
14312 x = force_reg (Pmode, x);
14313 return x;
14316 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
14317 true if we require the result be a register. */
14319 static rtx
14320 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
14322 tree imp_decl;
14323 rtx x;
14325 gcc_assert (SYMBOL_REF_DECL (symbol));
14326 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
14328 x = DECL_RTL (imp_decl);
14329 if (want_reg)
14330 x = force_reg (Pmode, x);
14331 return x;
14334 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
14335 is true if we require the result be a register. */
14337 static rtx
14338 legitimize_pe_coff_symbol (rtx addr, bool inreg)
14340 if (!TARGET_PECOFF)
14341 return NULL_RTX;
14343 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14345 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
14346 return legitimize_dllimport_symbol (addr, inreg);
14347 if (GET_CODE (addr) == CONST
14348 && GET_CODE (XEXP (addr, 0)) == PLUS
14349 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14350 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
14352 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
14353 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14357 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
14358 return NULL_RTX;
14359 if (GET_CODE (addr) == SYMBOL_REF
14360 && !is_imported_p (addr)
14361 && SYMBOL_REF_EXTERNAL_P (addr)
14362 && SYMBOL_REF_DECL (addr))
14363 return legitimize_pe_coff_extern_decl (addr, inreg);
14365 if (GET_CODE (addr) == CONST
14366 && GET_CODE (XEXP (addr, 0)) == PLUS
14367 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14368 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
14369 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
14370 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
14372 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
14373 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14375 return NULL_RTX;
14378 /* Try machine-dependent ways of modifying an illegitimate address
14379 to be legitimate. If we find one, return the new, valid address.
14380 This macro is used in only one place: `memory_address' in explow.c.
14382 OLDX is the address as it was before break_out_memory_refs was called.
14383 In some cases it is useful to look at this to decide what needs to be done.
14385 It is always safe for this macro to do nothing. It exists to recognize
14386 opportunities to optimize the output.
14388 For the 80386, we handle X+REG by loading X into a register R and
14389 using R+REG. R will go in a general reg and indexing will be used.
14390 However, if REG is a broken-out memory address or multiplication,
14391 nothing needs to be done because REG can certainly go in a general reg.
14393 When -fpic is used, special handling is needed for symbolic references.
14394 See comments by legitimize_pic_address in i386.c for details. */
14396 static rtx
14397 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
14399 bool changed = false;
14400 unsigned log;
14402 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14403 if (log)
14404 return legitimize_tls_address (x, (enum tls_model) log, false);
14405 if (GET_CODE (x) == CONST
14406 && GET_CODE (XEXP (x, 0)) == PLUS
14407 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14408 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14410 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14411 (enum tls_model) log, false);
14412 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14415 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14417 rtx tmp = legitimize_pe_coff_symbol (x, true);
14418 if (tmp)
14419 return tmp;
14422 if (flag_pic && SYMBOLIC_CONST (x))
14423 return legitimize_pic_address (x, 0);
14425 #if TARGET_MACHO
14426 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14427 return machopic_indirect_data_reference (x, 0);
14428 #endif
14430 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14431 if (GET_CODE (x) == ASHIFT
14432 && CONST_INT_P (XEXP (x, 1))
14433 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14435 changed = true;
14436 log = INTVAL (XEXP (x, 1));
14437 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14438 GEN_INT (1 << log));
14441 if (GET_CODE (x) == PLUS)
14443 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
14445 if (GET_CODE (XEXP (x, 0)) == ASHIFT
14446 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14447 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14449 changed = true;
14450 log = INTVAL (XEXP (XEXP (x, 0), 1));
14451 XEXP (x, 0) = gen_rtx_MULT (Pmode,
14452 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14453 GEN_INT (1 << log));
14456 if (GET_CODE (XEXP (x, 1)) == ASHIFT
14457 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14458 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14460 changed = true;
14461 log = INTVAL (XEXP (XEXP (x, 1), 1));
14462 XEXP (x, 1) = gen_rtx_MULT (Pmode,
14463 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14464 GEN_INT (1 << log));
14467 /* Put multiply first if it isn't already. */
14468 if (GET_CODE (XEXP (x, 1)) == MULT)
14470 std::swap (XEXP (x, 0), XEXP (x, 1));
14471 changed = true;
14474 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14475 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
14476 created by virtual register instantiation, register elimination, and
14477 similar optimizations. */
14478 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14480 changed = true;
14481 x = gen_rtx_PLUS (Pmode,
14482 gen_rtx_PLUS (Pmode, XEXP (x, 0),
14483 XEXP (XEXP (x, 1), 0)),
14484 XEXP (XEXP (x, 1), 1));
14487 /* Canonicalize
14488 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14489 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
14490 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14491 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14492 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14493 && CONSTANT_P (XEXP (x, 1)))
14495 rtx constant;
14496 rtx other = NULL_RTX;
14498 if (CONST_INT_P (XEXP (x, 1)))
14500 constant = XEXP (x, 1);
14501 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14503 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14505 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14506 other = XEXP (x, 1);
14508 else
14509 constant = 0;
14511 if (constant)
14513 changed = true;
14514 x = gen_rtx_PLUS (Pmode,
14515 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14516 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14517 plus_constant (Pmode, other,
14518 INTVAL (constant)));
14522 if (changed && ix86_legitimate_address_p (mode, x, false))
14523 return x;
14525 if (GET_CODE (XEXP (x, 0)) == MULT)
14527 changed = true;
14528 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14531 if (GET_CODE (XEXP (x, 1)) == MULT)
14533 changed = true;
14534 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14537 if (changed
14538 && REG_P (XEXP (x, 1))
14539 && REG_P (XEXP (x, 0)))
14540 return x;
14542 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14544 changed = true;
14545 x = legitimize_pic_address (x, 0);
14548 if (changed && ix86_legitimate_address_p (mode, x, false))
14549 return x;
14551 if (REG_P (XEXP (x, 0)))
14553 rtx temp = gen_reg_rtx (Pmode);
14554 rtx val = force_operand (XEXP (x, 1), temp);
14555 if (val != temp)
14557 val = convert_to_mode (Pmode, val, 1);
14558 emit_move_insn (temp, val);
14561 XEXP (x, 1) = temp;
14562 return x;
14565 else if (REG_P (XEXP (x, 1)))
14567 rtx temp = gen_reg_rtx (Pmode);
14568 rtx val = force_operand (XEXP (x, 0), temp);
14569 if (val != temp)
14571 val = convert_to_mode (Pmode, val, 1);
14572 emit_move_insn (temp, val);
14575 XEXP (x, 0) = temp;
14576 return x;
14580 return x;
14583 /* Print an integer constant expression in assembler syntax. Addition
14584 and subtraction are the only arithmetic that may appear in these
14585 expressions. FILE is the stdio stream to write to, X is the rtx, and
14586 CODE is the operand print code from the output string. */
14588 static void
14589 output_pic_addr_const (FILE *file, rtx x, int code)
14591 char buf[256];
14593 switch (GET_CODE (x))
14595 case PC:
14596 gcc_assert (flag_pic);
14597 putc ('.', file);
14598 break;
14600 case SYMBOL_REF:
14601 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14602 output_addr_const (file, x);
14603 else
14605 const char *name = XSTR (x, 0);
14607 /* Mark the decl as referenced so that cgraph will
14608 output the function. */
14609 if (SYMBOL_REF_DECL (x))
14610 mark_decl_referenced (SYMBOL_REF_DECL (x));
14612 #if TARGET_MACHO
14613 if (MACHOPIC_INDIRECT
14614 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14615 name = machopic_indirection_name (x, /*stub_p=*/true);
14616 #endif
14617 assemble_name (file, name);
14619 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14620 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14621 fputs ("@PLT", file);
14622 break;
14624 case LABEL_REF:
14625 x = XEXP (x, 0);
14626 /* FALLTHRU */
14627 case CODE_LABEL:
14628 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14629 assemble_name (asm_out_file, buf);
14630 break;
14632 case CONST_INT:
14633 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14634 break;
14636 case CONST:
14637 /* This used to output parentheses around the expression,
14638 but that does not work on the 386 (either ATT or BSD assembler). */
14639 output_pic_addr_const (file, XEXP (x, 0), code);
14640 break;
14642 case CONST_DOUBLE:
14643 /* We can't handle floating point constants;
14644 TARGET_PRINT_OPERAND must handle them. */
14645 output_operand_lossage ("floating constant misused");
14646 break;
14648 case PLUS:
14649 /* Some assemblers need integer constants to appear first. */
14650 if (CONST_INT_P (XEXP (x, 0)))
14652 output_pic_addr_const (file, XEXP (x, 0), code);
14653 putc ('+', file);
14654 output_pic_addr_const (file, XEXP (x, 1), code);
14656 else
14658 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14659 output_pic_addr_const (file, XEXP (x, 1), code);
14660 putc ('+', file);
14661 output_pic_addr_const (file, XEXP (x, 0), code);
14663 break;
14665 case MINUS:
14666 if (!TARGET_MACHO)
14667 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14668 output_pic_addr_const (file, XEXP (x, 0), code);
14669 putc ('-', file);
14670 output_pic_addr_const (file, XEXP (x, 1), code);
14671 if (!TARGET_MACHO)
14672 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14673 break;
14675 case UNSPEC:
14676 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14678 bool f = i386_asm_output_addr_const_extra (file, x);
14679 gcc_assert (f);
14680 break;
14683 gcc_assert (XVECLEN (x, 0) == 1);
14684 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14685 switch (XINT (x, 1))
14687 case UNSPEC_GOT:
14688 fputs ("@GOT", file);
14689 break;
14690 case UNSPEC_GOTOFF:
14691 fputs ("@GOTOFF", file);
14692 break;
14693 case UNSPEC_PLTOFF:
14694 fputs ("@PLTOFF", file);
14695 break;
14696 case UNSPEC_PCREL:
14697 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14698 "(%rip)" : "[rip]", file);
14699 break;
14700 case UNSPEC_GOTPCREL:
14701 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14702 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14703 break;
14704 case UNSPEC_GOTTPOFF:
14705 /* FIXME: This might be @TPOFF in Sun ld too. */
14706 fputs ("@gottpoff", file);
14707 break;
14708 case UNSPEC_TPOFF:
14709 fputs ("@tpoff", file);
14710 break;
14711 case UNSPEC_NTPOFF:
14712 if (TARGET_64BIT)
14713 fputs ("@tpoff", file);
14714 else
14715 fputs ("@ntpoff", file);
14716 break;
14717 case UNSPEC_DTPOFF:
14718 fputs ("@dtpoff", file);
14719 break;
14720 case UNSPEC_GOTNTPOFF:
14721 if (TARGET_64BIT)
14722 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14723 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14724 else
14725 fputs ("@gotntpoff", file);
14726 break;
14727 case UNSPEC_INDNTPOFF:
14728 fputs ("@indntpoff", file);
14729 break;
14730 #if TARGET_MACHO
14731 case UNSPEC_MACHOPIC_OFFSET:
14732 putc ('-', file);
14733 machopic_output_function_base_name (file);
14734 break;
14735 #endif
14736 default:
14737 output_operand_lossage ("invalid UNSPEC as operand");
14738 break;
14740 break;
14742 default:
14743 output_operand_lossage ("invalid expression as operand");
14747 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14748 We need to emit DTP-relative relocations. */
14750 static void ATTRIBUTE_UNUSED
14751 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14753 fputs (ASM_LONG, file);
14754 output_addr_const (file, x);
14755 fputs ("@dtpoff", file);
14756 switch (size)
14758 case 4:
14759 break;
14760 case 8:
14761 fputs (", 0", file);
14762 break;
14763 default:
14764 gcc_unreachable ();
14768 /* Return true if X is a representation of the PIC register. This copes
14769 with calls from ix86_find_base_term, where the register might have
14770 been replaced by a cselib value. */
14772 static bool
14773 ix86_pic_register_p (rtx x)
14775 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14776 return (pic_offset_table_rtx
14777 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14778 else if (!REG_P (x))
14779 return false;
14780 else if (pic_offset_table_rtx)
14782 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14783 return true;
14784 if (HARD_REGISTER_P (x)
14785 && !HARD_REGISTER_P (pic_offset_table_rtx)
14786 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14787 return true;
14788 return false;
14790 else
14791 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14794 /* Helper function for ix86_delegitimize_address.
14795 Attempt to delegitimize TLS local-exec accesses. */
14797 static rtx
14798 ix86_delegitimize_tls_address (rtx orig_x)
14800 rtx x = orig_x, unspec;
14801 struct ix86_address addr;
14803 if (!TARGET_TLS_DIRECT_SEG_REFS)
14804 return orig_x;
14805 if (MEM_P (x))
14806 x = XEXP (x, 0);
14807 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14808 return orig_x;
14809 if (ix86_decompose_address (x, &addr) == 0
14810 || addr.seg != DEFAULT_TLS_SEG_REG
14811 || addr.disp == NULL_RTX
14812 || GET_CODE (addr.disp) != CONST)
14813 return orig_x;
14814 unspec = XEXP (addr.disp, 0);
14815 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14816 unspec = XEXP (unspec, 0);
14817 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14818 return orig_x;
14819 x = XVECEXP (unspec, 0, 0);
14820 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14821 if (unspec != XEXP (addr.disp, 0))
14822 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14823 if (addr.index)
14825 rtx idx = addr.index;
14826 if (addr.scale != 1)
14827 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14828 x = gen_rtx_PLUS (Pmode, idx, x);
14830 if (addr.base)
14831 x = gen_rtx_PLUS (Pmode, addr.base, x);
14832 if (MEM_P (orig_x))
14833 x = replace_equiv_address_nv (orig_x, x);
14834 return x;
14837 /* In the name of slightly smaller debug output, and to cater to
14838 general assembler lossage, recognize PIC+GOTOFF and turn it back
14839 into a direct symbol reference.
14841 On Darwin, this is necessary to avoid a crash, because Darwin
14842 has a different PIC label for each routine but the DWARF debugging
14843 information is not associated with any particular routine, so it's
14844 necessary to remove references to the PIC label from RTL stored by
14845 the DWARF output code. */
14847 static rtx
14848 ix86_delegitimize_address (rtx x)
14850 rtx orig_x = delegitimize_mem_from_attrs (x);
14851 /* addend is NULL or some rtx if x is something+GOTOFF where
14852 something doesn't include the PIC register. */
14853 rtx addend = NULL_RTX;
14854 /* reg_addend is NULL or a multiple of some register. */
14855 rtx reg_addend = NULL_RTX;
14856 /* const_addend is NULL or a const_int. */
14857 rtx const_addend = NULL_RTX;
14858 /* This is the result, or NULL. */
14859 rtx result = NULL_RTX;
14861 x = orig_x;
14863 if (MEM_P (x))
14864 x = XEXP (x, 0);
14866 if (TARGET_64BIT)
14868 if (GET_CODE (x) == CONST
14869 && GET_CODE (XEXP (x, 0)) == PLUS
14870 && GET_MODE (XEXP (x, 0)) == Pmode
14871 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14872 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14873 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14875 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14876 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14877 if (MEM_P (orig_x))
14878 x = replace_equiv_address_nv (orig_x, x);
14879 return x;
14882 if (GET_CODE (x) == CONST
14883 && GET_CODE (XEXP (x, 0)) == UNSPEC
14884 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14885 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14886 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14888 x = XVECEXP (XEXP (x, 0), 0, 0);
14889 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14891 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14892 GET_MODE (x), 0);
14893 if (x == NULL_RTX)
14894 return orig_x;
14896 return x;
14899 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14900 return ix86_delegitimize_tls_address (orig_x);
14902 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14903 and -mcmodel=medium -fpic. */
14906 if (GET_CODE (x) != PLUS
14907 || GET_CODE (XEXP (x, 1)) != CONST)
14908 return ix86_delegitimize_tls_address (orig_x);
14910 if (ix86_pic_register_p (XEXP (x, 0)))
14911 /* %ebx + GOT/GOTOFF */
14913 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14915 /* %ebx + %reg * scale + GOT/GOTOFF */
14916 reg_addend = XEXP (x, 0);
14917 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14918 reg_addend = XEXP (reg_addend, 1);
14919 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14920 reg_addend = XEXP (reg_addend, 0);
14921 else
14923 reg_addend = NULL_RTX;
14924 addend = XEXP (x, 0);
14927 else
14928 addend = XEXP (x, 0);
14930 x = XEXP (XEXP (x, 1), 0);
14931 if (GET_CODE (x) == PLUS
14932 && CONST_INT_P (XEXP (x, 1)))
14934 const_addend = XEXP (x, 1);
14935 x = XEXP (x, 0);
14938 if (GET_CODE (x) == UNSPEC
14939 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14940 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14941 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14942 && !MEM_P (orig_x) && !addend)))
14943 result = XVECEXP (x, 0, 0);
14945 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14946 && !MEM_P (orig_x))
14947 result = XVECEXP (x, 0, 0);
14949 if (! result)
14950 return ix86_delegitimize_tls_address (orig_x);
14952 if (const_addend)
14953 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14954 if (reg_addend)
14955 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14956 if (addend)
14958 /* If the rest of original X doesn't involve the PIC register, add
14959 addend and subtract pic_offset_table_rtx. This can happen e.g.
14960 for code like:
14961 leal (%ebx, %ecx, 4), %ecx
14963 movl foo@GOTOFF(%ecx), %edx
14964 in which case we return (%ecx - %ebx) + foo
14965 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
14966 and reload has completed. */
14967 if (pic_offset_table_rtx
14968 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
14969 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14970 pic_offset_table_rtx),
14971 result);
14972 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
14974 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
14975 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
14976 result = gen_rtx_PLUS (Pmode, tmp, result);
14978 else
14979 return orig_x;
14981 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14983 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14984 if (result == NULL_RTX)
14985 return orig_x;
14987 return result;
14990 /* If X is a machine specific address (i.e. a symbol or label being
14991 referenced as a displacement from the GOT implemented using an
14992 UNSPEC), then return the base term. Otherwise return X. */
14995 ix86_find_base_term (rtx x)
14997 rtx term;
14999 if (TARGET_64BIT)
15001 if (GET_CODE (x) != CONST)
15002 return x;
15003 term = XEXP (x, 0);
15004 if (GET_CODE (term) == PLUS
15005 && CONST_INT_P (XEXP (term, 1)))
15006 term = XEXP (term, 0);
15007 if (GET_CODE (term) != UNSPEC
15008 || (XINT (term, 1) != UNSPEC_GOTPCREL
15009 && XINT (term, 1) != UNSPEC_PCREL))
15010 return x;
15012 return XVECEXP (term, 0, 0);
15015 return ix86_delegitimize_address (x);
15018 static void
15019 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
15020 bool fp, FILE *file)
15022 const char *suffix;
15024 if (mode == CCFPmode || mode == CCFPUmode)
15026 code = ix86_fp_compare_code_to_integer (code);
15027 mode = CCmode;
15029 if (reverse)
15030 code = reverse_condition (code);
15032 switch (code)
15034 case EQ:
15035 switch (mode)
15037 case CCAmode:
15038 suffix = "a";
15039 break;
15040 case CCCmode:
15041 suffix = "c";
15042 break;
15043 case CCOmode:
15044 suffix = "o";
15045 break;
15046 case CCPmode:
15047 suffix = "p";
15048 break;
15049 case CCSmode:
15050 suffix = "s";
15051 break;
15052 default:
15053 suffix = "e";
15054 break;
15056 break;
15057 case NE:
15058 switch (mode)
15060 case CCAmode:
15061 suffix = "na";
15062 break;
15063 case CCCmode:
15064 suffix = "nc";
15065 break;
15066 case CCOmode:
15067 suffix = "no";
15068 break;
15069 case CCPmode:
15070 suffix = "np";
15071 break;
15072 case CCSmode:
15073 suffix = "ns";
15074 break;
15075 default:
15076 suffix = "ne";
15077 break;
15079 break;
15080 case GT:
15081 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
15082 suffix = "g";
15083 break;
15084 case GTU:
15085 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
15086 Those same assemblers have the same but opposite lossage on cmov. */
15087 if (mode == CCmode)
15088 suffix = fp ? "nbe" : "a";
15089 else
15090 gcc_unreachable ();
15091 break;
15092 case LT:
15093 switch (mode)
15095 case CCNOmode:
15096 case CCGOCmode:
15097 suffix = "s";
15098 break;
15100 case CCmode:
15101 case CCGCmode:
15102 suffix = "l";
15103 break;
15105 default:
15106 gcc_unreachable ();
15108 break;
15109 case LTU:
15110 if (mode == CCmode)
15111 suffix = "b";
15112 else if (mode == CCCmode)
15113 suffix = fp ? "b" : "c";
15114 else
15115 gcc_unreachable ();
15116 break;
15117 case GE:
15118 switch (mode)
15120 case CCNOmode:
15121 case CCGOCmode:
15122 suffix = "ns";
15123 break;
15125 case CCmode:
15126 case CCGCmode:
15127 suffix = "ge";
15128 break;
15130 default:
15131 gcc_unreachable ();
15133 break;
15134 case GEU:
15135 if (mode == CCmode)
15136 suffix = "nb";
15137 else if (mode == CCCmode)
15138 suffix = fp ? "nb" : "nc";
15139 else
15140 gcc_unreachable ();
15141 break;
15142 case LE:
15143 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
15144 suffix = "le";
15145 break;
15146 case LEU:
15147 if (mode == CCmode)
15148 suffix = "be";
15149 else
15150 gcc_unreachable ();
15151 break;
15152 case UNORDERED:
15153 suffix = fp ? "u" : "p";
15154 break;
15155 case ORDERED:
15156 suffix = fp ? "nu" : "np";
15157 break;
15158 default:
15159 gcc_unreachable ();
15161 fputs (suffix, file);
15164 /* Print the name of register X to FILE based on its machine mode and number.
15165 If CODE is 'w', pretend the mode is HImode.
15166 If CODE is 'b', pretend the mode is QImode.
15167 If CODE is 'k', pretend the mode is SImode.
15168 If CODE is 'q', pretend the mode is DImode.
15169 If CODE is 'x', pretend the mode is V4SFmode.
15170 If CODE is 't', pretend the mode is V8SFmode.
15171 If CODE is 'g', pretend the mode is V16SFmode.
15172 If CODE is 'h', pretend the reg is the 'high' byte register.
15173 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
15174 If CODE is 'd', duplicate the operand for AVX instruction.
15177 void
15178 print_reg (rtx x, int code, FILE *file)
15180 const char *reg;
15181 int msize;
15182 unsigned int regno;
15183 bool duplicated;
15185 if (ASSEMBLER_DIALECT == ASM_ATT)
15186 putc ('%', file);
15188 if (x == pc_rtx)
15190 gcc_assert (TARGET_64BIT);
15191 fputs ("rip", file);
15192 return;
15195 if (code == 'y' && STACK_TOP_P (x))
15197 fputs ("st(0)", file);
15198 return;
15201 if (code == 'w')
15202 msize = 2;
15203 else if (code == 'b')
15204 msize = 1;
15205 else if (code == 'k')
15206 msize = 4;
15207 else if (code == 'q')
15208 msize = 8;
15209 else if (code == 'h')
15210 msize = 0;
15211 else if (code == 'x')
15212 msize = 16;
15213 else if (code == 't')
15214 msize = 32;
15215 else if (code == 'g')
15216 msize = 64;
15217 else
15218 msize = GET_MODE_SIZE (GET_MODE (x));
15220 regno = true_regnum (x);
15222 gcc_assert (regno != ARG_POINTER_REGNUM
15223 && regno != FRAME_POINTER_REGNUM
15224 && regno != FLAGS_REG
15225 && regno != FPSR_REG
15226 && regno != FPCR_REG);
15228 duplicated = code == 'd' && TARGET_AVX;
15230 switch (msize)
15232 case 8:
15233 case 4:
15234 if (LEGACY_INT_REGNO_P (regno))
15235 putc (msize == 8 && TARGET_64BIT ? 'r' : 'e', file);
15236 case 16:
15237 case 12:
15238 case 2:
15239 normal:
15240 reg = hi_reg_name[regno];
15241 break;
15242 case 1:
15243 if (regno >= ARRAY_SIZE (qi_reg_name))
15244 goto normal;
15245 reg = qi_reg_name[regno];
15246 break;
15247 case 0:
15248 if (regno >= ARRAY_SIZE (qi_high_reg_name))
15249 goto normal;
15250 reg = qi_high_reg_name[regno];
15251 break;
15252 case 32:
15253 case 64:
15254 if (SSE_REGNO_P (regno))
15256 gcc_assert (!duplicated);
15257 putc (msize == 32 ? 'y' : 'z', file);
15258 reg = hi_reg_name[regno] + 1;
15259 break;
15261 goto normal;
15262 default:
15263 gcc_unreachable ();
15266 fputs (reg, file);
15268 /* Irritatingly, AMD extended registers use
15269 different naming convention: "r%d[bwd]" */
15270 if (REX_INT_REGNO_P (regno))
15272 gcc_assert (TARGET_64BIT);
15273 switch (msize)
15275 case 0:
15276 error ("extended registers have no high halves");
15277 break;
15278 case 1:
15279 putc ('b', file);
15280 break;
15281 case 2:
15282 putc ('w', file);
15283 break;
15284 case 4:
15285 putc ('d', file);
15286 break;
15287 case 8:
15288 /* no suffix */
15289 break;
15290 default:
15291 error ("unsupported operand size for extended register");
15292 break;
15294 return;
15297 if (duplicated)
15299 if (ASSEMBLER_DIALECT == ASM_ATT)
15300 fprintf (file, ", %%%s", reg);
15301 else
15302 fprintf (file, ", %s", reg);
15306 /* Meaning of CODE:
15307 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
15308 C -- print opcode suffix for set/cmov insn.
15309 c -- like C, but print reversed condition
15310 F,f -- likewise, but for floating-point.
15311 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
15312 otherwise nothing
15313 R -- print embeded rounding and sae.
15314 r -- print only sae.
15315 z -- print the opcode suffix for the size of the current operand.
15316 Z -- likewise, with special suffixes for x87 instructions.
15317 * -- print a star (in certain assembler syntax)
15318 A -- print an absolute memory reference.
15319 E -- print address with DImode register names if TARGET_64BIT.
15320 w -- print the operand as if it's a "word" (HImode) even if it isn't.
15321 s -- print a shift double count, followed by the assemblers argument
15322 delimiter.
15323 b -- print the QImode name of the register for the indicated operand.
15324 %b0 would print %al if operands[0] is reg 0.
15325 w -- likewise, print the HImode name of the register.
15326 k -- likewise, print the SImode name of the register.
15327 q -- likewise, print the DImode name of the register.
15328 x -- likewise, print the V4SFmode name of the register.
15329 t -- likewise, print the V8SFmode name of the register.
15330 g -- likewise, print the V16SFmode name of the register.
15331 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
15332 y -- print "st(0)" instead of "st" as a register.
15333 d -- print duplicated register operand for AVX instruction.
15334 D -- print condition for SSE cmp instruction.
15335 P -- if PIC, print an @PLT suffix.
15336 p -- print raw symbol name.
15337 X -- don't print any sort of PIC '@' suffix for a symbol.
15338 & -- print some in-use local-dynamic symbol name.
15339 H -- print a memory address offset by 8; used for sse high-parts
15340 Y -- print condition for XOP pcom* instruction.
15341 + -- print a branch hint as 'cs' or 'ds' prefix
15342 ; -- print a semicolon (after prefixes due to bug in older gas).
15343 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15344 @ -- print a segment register of thread base pointer load
15345 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15346 ! -- print MPX prefix for jxx/call/ret instructions if required.
15349 void
15350 ix86_print_operand (FILE *file, rtx x, int code)
15352 if (code)
15354 switch (code)
15356 case 'A':
15357 switch (ASSEMBLER_DIALECT)
15359 case ASM_ATT:
15360 putc ('*', file);
15361 break;
15363 case ASM_INTEL:
15364 /* Intel syntax. For absolute addresses, registers should not
15365 be surrounded by braces. */
15366 if (!REG_P (x))
15368 putc ('[', file);
15369 ix86_print_operand (file, x, 0);
15370 putc (']', file);
15371 return;
15373 break;
15375 default:
15376 gcc_unreachable ();
15379 ix86_print_operand (file, x, 0);
15380 return;
15382 case 'E':
15383 /* Wrap address in an UNSPEC to declare special handling. */
15384 if (TARGET_64BIT)
15385 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15387 output_address (x);
15388 return;
15390 case 'L':
15391 if (ASSEMBLER_DIALECT == ASM_ATT)
15392 putc ('l', file);
15393 return;
15395 case 'W':
15396 if (ASSEMBLER_DIALECT == ASM_ATT)
15397 putc ('w', file);
15398 return;
15400 case 'B':
15401 if (ASSEMBLER_DIALECT == ASM_ATT)
15402 putc ('b', file);
15403 return;
15405 case 'Q':
15406 if (ASSEMBLER_DIALECT == ASM_ATT)
15407 putc ('l', file);
15408 return;
15410 case 'S':
15411 if (ASSEMBLER_DIALECT == ASM_ATT)
15412 putc ('s', file);
15413 return;
15415 case 'T':
15416 if (ASSEMBLER_DIALECT == ASM_ATT)
15417 putc ('t', file);
15418 return;
15420 case 'O':
15421 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15422 if (ASSEMBLER_DIALECT != ASM_ATT)
15423 return;
15425 switch (GET_MODE_SIZE (GET_MODE (x)))
15427 case 2:
15428 putc ('w', file);
15429 break;
15431 case 4:
15432 putc ('l', file);
15433 break;
15435 case 8:
15436 putc ('q', file);
15437 break;
15439 default:
15440 output_operand_lossage
15441 ("invalid operand size for operand code 'O'");
15442 return;
15445 putc ('.', file);
15446 #endif
15447 return;
15449 case 'z':
15450 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15452 /* Opcodes don't get size suffixes if using Intel opcodes. */
15453 if (ASSEMBLER_DIALECT == ASM_INTEL)
15454 return;
15456 switch (GET_MODE_SIZE (GET_MODE (x)))
15458 case 1:
15459 putc ('b', file);
15460 return;
15462 case 2:
15463 putc ('w', file);
15464 return;
15466 case 4:
15467 putc ('l', file);
15468 return;
15470 case 8:
15471 putc ('q', file);
15472 return;
15474 default:
15475 output_operand_lossage
15476 ("invalid operand size for operand code 'z'");
15477 return;
15481 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15482 warning
15483 (0, "non-integer operand used with operand code 'z'");
15484 /* FALLTHRU */
15486 case 'Z':
15487 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15488 if (ASSEMBLER_DIALECT == ASM_INTEL)
15489 return;
15491 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15493 switch (GET_MODE_SIZE (GET_MODE (x)))
15495 case 2:
15496 #ifdef HAVE_AS_IX86_FILDS
15497 putc ('s', file);
15498 #endif
15499 return;
15501 case 4:
15502 putc ('l', file);
15503 return;
15505 case 8:
15506 #ifdef HAVE_AS_IX86_FILDQ
15507 putc ('q', file);
15508 #else
15509 fputs ("ll", file);
15510 #endif
15511 return;
15513 default:
15514 break;
15517 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15519 /* 387 opcodes don't get size suffixes
15520 if the operands are registers. */
15521 if (STACK_REG_P (x))
15522 return;
15524 switch (GET_MODE_SIZE (GET_MODE (x)))
15526 case 4:
15527 putc ('s', file);
15528 return;
15530 case 8:
15531 putc ('l', file);
15532 return;
15534 case 12:
15535 case 16:
15536 putc ('t', file);
15537 return;
15539 default:
15540 break;
15543 else
15545 output_operand_lossage
15546 ("invalid operand type used with operand code 'Z'");
15547 return;
15550 output_operand_lossage
15551 ("invalid operand size for operand code 'Z'");
15552 return;
15554 case 'd':
15555 case 'b':
15556 case 'w':
15557 case 'k':
15558 case 'q':
15559 case 'h':
15560 case 't':
15561 case 'g':
15562 case 'y':
15563 case 'x':
15564 case 'X':
15565 case 'P':
15566 case 'p':
15567 break;
15569 case 's':
15570 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15572 ix86_print_operand (file, x, 0);
15573 fputs (", ", file);
15575 return;
15577 case 'Y':
15578 switch (GET_CODE (x))
15580 case NE:
15581 fputs ("neq", file);
15582 break;
15583 case EQ:
15584 fputs ("eq", file);
15585 break;
15586 case GE:
15587 case GEU:
15588 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15589 break;
15590 case GT:
15591 case GTU:
15592 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15593 break;
15594 case LE:
15595 case LEU:
15596 fputs ("le", file);
15597 break;
15598 case LT:
15599 case LTU:
15600 fputs ("lt", file);
15601 break;
15602 case UNORDERED:
15603 fputs ("unord", file);
15604 break;
15605 case ORDERED:
15606 fputs ("ord", file);
15607 break;
15608 case UNEQ:
15609 fputs ("ueq", file);
15610 break;
15611 case UNGE:
15612 fputs ("nlt", file);
15613 break;
15614 case UNGT:
15615 fputs ("nle", file);
15616 break;
15617 case UNLE:
15618 fputs ("ule", file);
15619 break;
15620 case UNLT:
15621 fputs ("ult", file);
15622 break;
15623 case LTGT:
15624 fputs ("une", file);
15625 break;
15626 default:
15627 output_operand_lossage ("operand is not a condition code, "
15628 "invalid operand code 'Y'");
15629 return;
15631 return;
15633 case 'D':
15634 /* Little bit of braindamage here. The SSE compare instructions
15635 does use completely different names for the comparisons that the
15636 fp conditional moves. */
15637 switch (GET_CODE (x))
15639 case UNEQ:
15640 if (TARGET_AVX)
15642 fputs ("eq_us", file);
15643 break;
15645 case EQ:
15646 fputs ("eq", file);
15647 break;
15648 case UNLT:
15649 if (TARGET_AVX)
15651 fputs ("nge", file);
15652 break;
15654 case LT:
15655 fputs ("lt", file);
15656 break;
15657 case UNLE:
15658 if (TARGET_AVX)
15660 fputs ("ngt", file);
15661 break;
15663 case LE:
15664 fputs ("le", file);
15665 break;
15666 case UNORDERED:
15667 fputs ("unord", file);
15668 break;
15669 case LTGT:
15670 if (TARGET_AVX)
15672 fputs ("neq_oq", file);
15673 break;
15675 case NE:
15676 fputs ("neq", file);
15677 break;
15678 case GE:
15679 if (TARGET_AVX)
15681 fputs ("ge", file);
15682 break;
15684 case UNGE:
15685 fputs ("nlt", file);
15686 break;
15687 case GT:
15688 if (TARGET_AVX)
15690 fputs ("gt", file);
15691 break;
15693 case UNGT:
15694 fputs ("nle", file);
15695 break;
15696 case ORDERED:
15697 fputs ("ord", file);
15698 break;
15699 default:
15700 output_operand_lossage ("operand is not a condition code, "
15701 "invalid operand code 'D'");
15702 return;
15704 return;
15706 case 'F':
15707 case 'f':
15708 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15709 if (ASSEMBLER_DIALECT == ASM_ATT)
15710 putc ('.', file);
15711 #endif
15713 case 'C':
15714 case 'c':
15715 if (!COMPARISON_P (x))
15717 output_operand_lossage ("operand is not a condition code, "
15718 "invalid operand code '%c'", code);
15719 return;
15721 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15722 code == 'c' || code == 'f',
15723 code == 'F' || code == 'f',
15724 file);
15725 return;
15727 case 'H':
15728 if (!offsettable_memref_p (x))
15730 output_operand_lossage ("operand is not an offsettable memory "
15731 "reference, invalid operand code 'H'");
15732 return;
15734 /* It doesn't actually matter what mode we use here, as we're
15735 only going to use this for printing. */
15736 x = adjust_address_nv (x, DImode, 8);
15737 /* Output 'qword ptr' for intel assembler dialect. */
15738 if (ASSEMBLER_DIALECT == ASM_INTEL)
15739 code = 'q';
15740 break;
15742 case 'K':
15743 gcc_assert (CONST_INT_P (x));
15745 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15746 #ifdef HAVE_AS_IX86_HLE
15747 fputs ("xacquire ", file);
15748 #else
15749 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15750 #endif
15751 else if (INTVAL (x) & IX86_HLE_RELEASE)
15752 #ifdef HAVE_AS_IX86_HLE
15753 fputs ("xrelease ", file);
15754 #else
15755 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15756 #endif
15757 /* We do not want to print value of the operand. */
15758 return;
15760 case 'N':
15761 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15762 fputs ("{z}", file);
15763 return;
15765 case 'r':
15766 gcc_assert (CONST_INT_P (x));
15767 gcc_assert (INTVAL (x) == ROUND_SAE);
15769 if (ASSEMBLER_DIALECT == ASM_INTEL)
15770 fputs (", ", file);
15772 fputs ("{sae}", file);
15774 if (ASSEMBLER_DIALECT == ASM_ATT)
15775 fputs (", ", file);
15777 return;
15779 case 'R':
15780 gcc_assert (CONST_INT_P (x));
15782 if (ASSEMBLER_DIALECT == ASM_INTEL)
15783 fputs (", ", file);
15785 switch (INTVAL (x))
15787 case ROUND_NEAREST_INT | ROUND_SAE:
15788 fputs ("{rn-sae}", file);
15789 break;
15790 case ROUND_NEG_INF | ROUND_SAE:
15791 fputs ("{rd-sae}", file);
15792 break;
15793 case ROUND_POS_INF | ROUND_SAE:
15794 fputs ("{ru-sae}", file);
15795 break;
15796 case ROUND_ZERO | ROUND_SAE:
15797 fputs ("{rz-sae}", file);
15798 break;
15799 default:
15800 gcc_unreachable ();
15803 if (ASSEMBLER_DIALECT == ASM_ATT)
15804 fputs (", ", file);
15806 return;
15808 case '*':
15809 if (ASSEMBLER_DIALECT == ASM_ATT)
15810 putc ('*', file);
15811 return;
15813 case '&':
15815 const char *name = get_some_local_dynamic_name ();
15816 if (name == NULL)
15817 output_operand_lossage ("'%%&' used without any "
15818 "local dynamic TLS references");
15819 else
15820 assemble_name (file, name);
15821 return;
15824 case '+':
15826 rtx x;
15828 if (!optimize
15829 || optimize_function_for_size_p (cfun)
15830 || !TARGET_BRANCH_PREDICTION_HINTS)
15831 return;
15833 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15834 if (x)
15836 int pred_val = XINT (x, 0);
15838 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15839 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15841 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15842 bool cputaken
15843 = final_forward_branch_p (current_output_insn) == 0;
15845 /* Emit hints only in the case default branch prediction
15846 heuristics would fail. */
15847 if (taken != cputaken)
15849 /* We use 3e (DS) prefix for taken branches and
15850 2e (CS) prefix for not taken branches. */
15851 if (taken)
15852 fputs ("ds ; ", file);
15853 else
15854 fputs ("cs ; ", file);
15858 return;
15861 case ';':
15862 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15863 putc (';', file);
15864 #endif
15865 return;
15867 case '@':
15868 if (ASSEMBLER_DIALECT == ASM_ATT)
15869 putc ('%', file);
15871 /* The kernel uses a different segment register for performance
15872 reasons; a system call would not have to trash the userspace
15873 segment register, which would be expensive. */
15874 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15875 fputs ("fs", file);
15876 else
15877 fputs ("gs", file);
15878 return;
15880 case '~':
15881 putc (TARGET_AVX2 ? 'i' : 'f', file);
15882 return;
15884 case '^':
15885 if (TARGET_64BIT && Pmode != word_mode)
15886 fputs ("addr32 ", file);
15887 return;
15889 case '!':
15890 if (ix86_bnd_prefixed_insn_p (current_output_insn))
15891 fputs ("bnd ", file);
15892 return;
15894 default:
15895 output_operand_lossage ("invalid operand code '%c'", code);
15899 if (REG_P (x))
15900 print_reg (x, code, file);
15902 else if (MEM_P (x))
15904 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15905 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15906 && GET_MODE (x) != BLKmode)
15908 const char * size;
15909 switch (GET_MODE_SIZE (GET_MODE (x)))
15911 case 1: size = "BYTE"; break;
15912 case 2: size = "WORD"; break;
15913 case 4: size = "DWORD"; break;
15914 case 8: size = "QWORD"; break;
15915 case 12: size = "TBYTE"; break;
15916 case 16:
15917 if (GET_MODE (x) == XFmode)
15918 size = "TBYTE";
15919 else
15920 size = "XMMWORD";
15921 break;
15922 case 32: size = "YMMWORD"; break;
15923 case 64: size = "ZMMWORD"; break;
15924 default:
15925 gcc_unreachable ();
15928 /* Check for explicit size override (codes 'b', 'w', 'k',
15929 'q' and 'x') */
15930 if (code == 'b')
15931 size = "BYTE";
15932 else if (code == 'w')
15933 size = "WORD";
15934 else if (code == 'k')
15935 size = "DWORD";
15936 else if (code == 'q')
15937 size = "QWORD";
15938 else if (code == 'x')
15939 size = "XMMWORD";
15941 fputs (size, file);
15942 fputs (" PTR ", file);
15945 x = XEXP (x, 0);
15946 /* Avoid (%rip) for call operands. */
15947 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15948 && !CONST_INT_P (x))
15949 output_addr_const (file, x);
15950 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15951 output_operand_lossage ("invalid constraints for operand");
15952 else
15953 output_address (x);
15956 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
15958 REAL_VALUE_TYPE r;
15959 long l;
15961 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15962 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15964 if (ASSEMBLER_DIALECT == ASM_ATT)
15965 putc ('$', file);
15966 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15967 if (code == 'q')
15968 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15969 (unsigned long long) (int) l);
15970 else
15971 fprintf (file, "0x%08x", (unsigned int) l);
15974 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
15976 REAL_VALUE_TYPE r;
15977 long l[2];
15979 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15980 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15982 if (ASSEMBLER_DIALECT == ASM_ATT)
15983 putc ('$', file);
15984 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15987 /* These float cases don't actually occur as immediate operands. */
15988 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
15990 char dstr[30];
15992 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15993 fputs (dstr, file);
15996 else
15998 /* We have patterns that allow zero sets of memory, for instance.
15999 In 64-bit mode, we should probably support all 8-byte vectors,
16000 since we can in fact encode that into an immediate. */
16001 if (GET_CODE (x) == CONST_VECTOR)
16003 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
16004 x = const0_rtx;
16007 if (code != 'P' && code != 'p')
16009 if (CONST_INT_P (x))
16011 if (ASSEMBLER_DIALECT == ASM_ATT)
16012 putc ('$', file);
16014 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
16015 || GET_CODE (x) == LABEL_REF)
16017 if (ASSEMBLER_DIALECT == ASM_ATT)
16018 putc ('$', file);
16019 else
16020 fputs ("OFFSET FLAT:", file);
16023 if (CONST_INT_P (x))
16024 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16025 else if (flag_pic || MACHOPIC_INDIRECT)
16026 output_pic_addr_const (file, x, code);
16027 else
16028 output_addr_const (file, x);
16032 static bool
16033 ix86_print_operand_punct_valid_p (unsigned char code)
16035 return (code == '@' || code == '*' || code == '+' || code == '&'
16036 || code == ';' || code == '~' || code == '^' || code == '!');
16039 /* Print a memory operand whose address is ADDR. */
16041 static void
16042 ix86_print_operand_address (FILE *file, rtx addr)
16044 struct ix86_address parts;
16045 rtx base, index, disp;
16046 int scale;
16047 int ok;
16048 bool vsib = false;
16049 int code = 0;
16051 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
16053 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16054 gcc_assert (parts.index == NULL_RTX);
16055 parts.index = XVECEXP (addr, 0, 1);
16056 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
16057 addr = XVECEXP (addr, 0, 0);
16058 vsib = true;
16060 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
16062 gcc_assert (TARGET_64BIT);
16063 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16064 code = 'q';
16066 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
16068 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
16069 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
16070 if (parts.base != NULL_RTX)
16072 parts.index = parts.base;
16073 parts.scale = 1;
16075 parts.base = XVECEXP (addr, 0, 0);
16076 addr = XVECEXP (addr, 0, 0);
16078 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
16080 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16081 gcc_assert (parts.index == NULL_RTX);
16082 parts.index = XVECEXP (addr, 0, 1);
16083 addr = XVECEXP (addr, 0, 0);
16085 else
16086 ok = ix86_decompose_address (addr, &parts);
16088 gcc_assert (ok);
16090 base = parts.base;
16091 index = parts.index;
16092 disp = parts.disp;
16093 scale = parts.scale;
16095 switch (parts.seg)
16097 case SEG_DEFAULT:
16098 break;
16099 case SEG_FS:
16100 case SEG_GS:
16101 if (ASSEMBLER_DIALECT == ASM_ATT)
16102 putc ('%', file);
16103 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
16104 break;
16105 default:
16106 gcc_unreachable ();
16109 /* Use one byte shorter RIP relative addressing for 64bit mode. */
16110 if (TARGET_64BIT && !base && !index)
16112 rtx symbol = disp;
16114 if (GET_CODE (disp) == CONST
16115 && GET_CODE (XEXP (disp, 0)) == PLUS
16116 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16117 symbol = XEXP (XEXP (disp, 0), 0);
16119 if (GET_CODE (symbol) == LABEL_REF
16120 || (GET_CODE (symbol) == SYMBOL_REF
16121 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
16122 base = pc_rtx;
16124 if (!base && !index)
16126 /* Displacement only requires special attention. */
16128 if (CONST_INT_P (disp))
16130 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
16131 fputs ("ds:", file);
16132 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
16134 else if (flag_pic)
16135 output_pic_addr_const (file, disp, 0);
16136 else
16137 output_addr_const (file, disp);
16139 else
16141 /* Print SImode register names to force addr32 prefix. */
16142 if (SImode_address_operand (addr, VOIDmode))
16144 #ifdef ENABLE_CHECKING
16145 gcc_assert (TARGET_64BIT);
16146 switch (GET_CODE (addr))
16148 case SUBREG:
16149 gcc_assert (GET_MODE (addr) == SImode);
16150 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
16151 break;
16152 case ZERO_EXTEND:
16153 case AND:
16154 gcc_assert (GET_MODE (addr) == DImode);
16155 break;
16156 default:
16157 gcc_unreachable ();
16159 #endif
16160 gcc_assert (!code);
16161 code = 'k';
16163 else if (code == 0
16164 && TARGET_X32
16165 && disp
16166 && CONST_INT_P (disp)
16167 && INTVAL (disp) < -16*1024*1024)
16169 /* X32 runs in 64-bit mode, where displacement, DISP, in
16170 address DISP(%r64), is encoded as 32-bit immediate sign-
16171 extended from 32-bit to 64-bit. For -0x40000300(%r64),
16172 address is %r64 + 0xffffffffbffffd00. When %r64 <
16173 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
16174 which is invalid for x32. The correct address is %r64
16175 - 0x40000300 == 0xf7ffdd64. To properly encode
16176 -0x40000300(%r64) for x32, we zero-extend negative
16177 displacement by forcing addr32 prefix which truncates
16178 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
16179 zero-extend all negative displacements, including -1(%rsp).
16180 However, for small negative displacements, sign-extension
16181 won't cause overflow. We only zero-extend negative
16182 displacements if they < -16*1024*1024, which is also used
16183 to check legitimate address displacements for PIC. */
16184 code = 'k';
16187 if (ASSEMBLER_DIALECT == ASM_ATT)
16189 if (disp)
16191 if (flag_pic)
16192 output_pic_addr_const (file, disp, 0);
16193 else if (GET_CODE (disp) == LABEL_REF)
16194 output_asm_label (disp);
16195 else
16196 output_addr_const (file, disp);
16199 putc ('(', file);
16200 if (base)
16201 print_reg (base, code, file);
16202 if (index)
16204 putc (',', file);
16205 print_reg (index, vsib ? 0 : code, file);
16206 if (scale != 1 || vsib)
16207 fprintf (file, ",%d", scale);
16209 putc (')', file);
16211 else
16213 rtx offset = NULL_RTX;
16215 if (disp)
16217 /* Pull out the offset of a symbol; print any symbol itself. */
16218 if (GET_CODE (disp) == CONST
16219 && GET_CODE (XEXP (disp, 0)) == PLUS
16220 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16222 offset = XEXP (XEXP (disp, 0), 1);
16223 disp = gen_rtx_CONST (VOIDmode,
16224 XEXP (XEXP (disp, 0), 0));
16227 if (flag_pic)
16228 output_pic_addr_const (file, disp, 0);
16229 else if (GET_CODE (disp) == LABEL_REF)
16230 output_asm_label (disp);
16231 else if (CONST_INT_P (disp))
16232 offset = disp;
16233 else
16234 output_addr_const (file, disp);
16237 putc ('[', file);
16238 if (base)
16240 print_reg (base, code, file);
16241 if (offset)
16243 if (INTVAL (offset) >= 0)
16244 putc ('+', file);
16245 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16248 else if (offset)
16249 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16250 else
16251 putc ('0', file);
16253 if (index)
16255 putc ('+', file);
16256 print_reg (index, vsib ? 0 : code, file);
16257 if (scale != 1 || vsib)
16258 fprintf (file, "*%d", scale);
16260 putc (']', file);
16265 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
16267 static bool
16268 i386_asm_output_addr_const_extra (FILE *file, rtx x)
16270 rtx op;
16272 if (GET_CODE (x) != UNSPEC)
16273 return false;
16275 op = XVECEXP (x, 0, 0);
16276 switch (XINT (x, 1))
16278 case UNSPEC_GOTTPOFF:
16279 output_addr_const (file, op);
16280 /* FIXME: This might be @TPOFF in Sun ld. */
16281 fputs ("@gottpoff", file);
16282 break;
16283 case UNSPEC_TPOFF:
16284 output_addr_const (file, op);
16285 fputs ("@tpoff", file);
16286 break;
16287 case UNSPEC_NTPOFF:
16288 output_addr_const (file, op);
16289 if (TARGET_64BIT)
16290 fputs ("@tpoff", file);
16291 else
16292 fputs ("@ntpoff", file);
16293 break;
16294 case UNSPEC_DTPOFF:
16295 output_addr_const (file, op);
16296 fputs ("@dtpoff", file);
16297 break;
16298 case UNSPEC_GOTNTPOFF:
16299 output_addr_const (file, op);
16300 if (TARGET_64BIT)
16301 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16302 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
16303 else
16304 fputs ("@gotntpoff", file);
16305 break;
16306 case UNSPEC_INDNTPOFF:
16307 output_addr_const (file, op);
16308 fputs ("@indntpoff", file);
16309 break;
16310 #if TARGET_MACHO
16311 case UNSPEC_MACHOPIC_OFFSET:
16312 output_addr_const (file, op);
16313 putc ('-', file);
16314 machopic_output_function_base_name (file);
16315 break;
16316 #endif
16318 case UNSPEC_STACK_CHECK:
16320 int offset;
16322 gcc_assert (flag_split_stack);
16324 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
16325 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
16326 #else
16327 gcc_unreachable ();
16328 #endif
16330 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
16332 break;
16334 default:
16335 return false;
16338 return true;
16341 /* Split one or more double-mode RTL references into pairs of half-mode
16342 references. The RTL can be REG, offsettable MEM, integer constant, or
16343 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
16344 split and "num" is its length. lo_half and hi_half are output arrays
16345 that parallel "operands". */
16347 void
16348 split_double_mode (machine_mode mode, rtx operands[],
16349 int num, rtx lo_half[], rtx hi_half[])
16351 machine_mode half_mode;
16352 unsigned int byte;
16354 switch (mode)
16356 case TImode:
16357 half_mode = DImode;
16358 break;
16359 case DImode:
16360 half_mode = SImode;
16361 break;
16362 default:
16363 gcc_unreachable ();
16366 byte = GET_MODE_SIZE (half_mode);
16368 while (num--)
16370 rtx op = operands[num];
16372 /* simplify_subreg refuse to split volatile memory addresses,
16373 but we still have to handle it. */
16374 if (MEM_P (op))
16376 lo_half[num] = adjust_address (op, half_mode, 0);
16377 hi_half[num] = adjust_address (op, half_mode, byte);
16379 else
16381 lo_half[num] = simplify_gen_subreg (half_mode, op,
16382 GET_MODE (op) == VOIDmode
16383 ? mode : GET_MODE (op), 0);
16384 hi_half[num] = simplify_gen_subreg (half_mode, op,
16385 GET_MODE (op) == VOIDmode
16386 ? mode : GET_MODE (op), byte);
16391 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
16392 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
16393 is the expression of the binary operation. The output may either be
16394 emitted here, or returned to the caller, like all output_* functions.
16396 There is no guarantee that the operands are the same mode, as they
16397 might be within FLOAT or FLOAT_EXTEND expressions. */
16399 #ifndef SYSV386_COMPAT
16400 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
16401 wants to fix the assemblers because that causes incompatibility
16402 with gcc. No-one wants to fix gcc because that causes
16403 incompatibility with assemblers... You can use the option of
16404 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
16405 #define SYSV386_COMPAT 1
16406 #endif
16408 const char *
16409 output_387_binary_op (rtx insn, rtx *operands)
16411 static char buf[40];
16412 const char *p;
16413 const char *ssep;
16414 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16416 #ifdef ENABLE_CHECKING
16417 /* Even if we do not want to check the inputs, this documents input
16418 constraints. Which helps in understanding the following code. */
16419 if (STACK_REG_P (operands[0])
16420 && ((REG_P (operands[1])
16421 && REGNO (operands[0]) == REGNO (operands[1])
16422 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16423 || (REG_P (operands[2])
16424 && REGNO (operands[0]) == REGNO (operands[2])
16425 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16426 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16427 ; /* ok */
16428 else
16429 gcc_assert (is_sse);
16430 #endif
16432 switch (GET_CODE (operands[3]))
16434 case PLUS:
16435 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16436 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16437 p = "fiadd";
16438 else
16439 p = "fadd";
16440 ssep = "vadd";
16441 break;
16443 case MINUS:
16444 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16445 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16446 p = "fisub";
16447 else
16448 p = "fsub";
16449 ssep = "vsub";
16450 break;
16452 case MULT:
16453 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16454 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16455 p = "fimul";
16456 else
16457 p = "fmul";
16458 ssep = "vmul";
16459 break;
16461 case DIV:
16462 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16463 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16464 p = "fidiv";
16465 else
16466 p = "fdiv";
16467 ssep = "vdiv";
16468 break;
16470 default:
16471 gcc_unreachable ();
16474 if (is_sse)
16476 if (TARGET_AVX)
16478 strcpy (buf, ssep);
16479 if (GET_MODE (operands[0]) == SFmode)
16480 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16481 else
16482 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16484 else
16486 strcpy (buf, ssep + 1);
16487 if (GET_MODE (operands[0]) == SFmode)
16488 strcat (buf, "ss\t{%2, %0|%0, %2}");
16489 else
16490 strcat (buf, "sd\t{%2, %0|%0, %2}");
16492 return buf;
16494 strcpy (buf, p);
16496 switch (GET_CODE (operands[3]))
16498 case MULT:
16499 case PLUS:
16500 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16501 std::swap (operands[1], operands[2]);
16503 /* know operands[0] == operands[1]. */
16505 if (MEM_P (operands[2]))
16507 p = "%Z2\t%2";
16508 break;
16511 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16513 if (STACK_TOP_P (operands[0]))
16514 /* How is it that we are storing to a dead operand[2]?
16515 Well, presumably operands[1] is dead too. We can't
16516 store the result to st(0) as st(0) gets popped on this
16517 instruction. Instead store to operands[2] (which I
16518 think has to be st(1)). st(1) will be popped later.
16519 gcc <= 2.8.1 didn't have this check and generated
16520 assembly code that the Unixware assembler rejected. */
16521 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16522 else
16523 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16524 break;
16527 if (STACK_TOP_P (operands[0]))
16528 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16529 else
16530 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16531 break;
16533 case MINUS:
16534 case DIV:
16535 if (MEM_P (operands[1]))
16537 p = "r%Z1\t%1";
16538 break;
16541 if (MEM_P (operands[2]))
16543 p = "%Z2\t%2";
16544 break;
16547 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16549 #if SYSV386_COMPAT
16550 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16551 derived assemblers, confusingly reverse the direction of
16552 the operation for fsub{r} and fdiv{r} when the
16553 destination register is not st(0). The Intel assembler
16554 doesn't have this brain damage. Read !SYSV386_COMPAT to
16555 figure out what the hardware really does. */
16556 if (STACK_TOP_P (operands[0]))
16557 p = "{p\t%0, %2|rp\t%2, %0}";
16558 else
16559 p = "{rp\t%2, %0|p\t%0, %2}";
16560 #else
16561 if (STACK_TOP_P (operands[0]))
16562 /* As above for fmul/fadd, we can't store to st(0). */
16563 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16564 else
16565 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16566 #endif
16567 break;
16570 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16572 #if SYSV386_COMPAT
16573 if (STACK_TOP_P (operands[0]))
16574 p = "{rp\t%0, %1|p\t%1, %0}";
16575 else
16576 p = "{p\t%1, %0|rp\t%0, %1}";
16577 #else
16578 if (STACK_TOP_P (operands[0]))
16579 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16580 else
16581 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16582 #endif
16583 break;
16586 if (STACK_TOP_P (operands[0]))
16588 if (STACK_TOP_P (operands[1]))
16589 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16590 else
16591 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16592 break;
16594 else if (STACK_TOP_P (operands[1]))
16596 #if SYSV386_COMPAT
16597 p = "{\t%1, %0|r\t%0, %1}";
16598 #else
16599 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16600 #endif
16602 else
16604 #if SYSV386_COMPAT
16605 p = "{r\t%2, %0|\t%0, %2}";
16606 #else
16607 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16608 #endif
16610 break;
16612 default:
16613 gcc_unreachable ();
16616 strcat (buf, p);
16617 return buf;
16620 /* Check if a 256bit AVX register is referenced inside of EXP. */
16622 static bool
16623 ix86_check_avx256_register (const_rtx exp)
16625 if (GET_CODE (exp) == SUBREG)
16626 exp = SUBREG_REG (exp);
16628 return (REG_P (exp)
16629 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16632 /* Return needed mode for entity in optimize_mode_switching pass. */
16634 static int
16635 ix86_avx_u128_mode_needed (rtx_insn *insn)
16637 if (CALL_P (insn))
16639 rtx link;
16641 /* Needed mode is set to AVX_U128_CLEAN if there are
16642 no 256bit modes used in function arguments. */
16643 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16644 link;
16645 link = XEXP (link, 1))
16647 if (GET_CODE (XEXP (link, 0)) == USE)
16649 rtx arg = XEXP (XEXP (link, 0), 0);
16651 if (ix86_check_avx256_register (arg))
16652 return AVX_U128_DIRTY;
16656 return AVX_U128_CLEAN;
16659 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16660 changes state only when a 256bit register is written to, but we need
16661 to prevent the compiler from moving optimal insertion point above
16662 eventual read from 256bit register. */
16663 subrtx_iterator::array_type array;
16664 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16665 if (ix86_check_avx256_register (*iter))
16666 return AVX_U128_DIRTY;
16668 return AVX_U128_ANY;
16671 /* Return mode that i387 must be switched into
16672 prior to the execution of insn. */
16674 static int
16675 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16677 enum attr_i387_cw mode;
16679 /* The mode UNINITIALIZED is used to store control word after a
16680 function call or ASM pattern. The mode ANY specify that function
16681 has no requirements on the control word and make no changes in the
16682 bits we are interested in. */
16684 if (CALL_P (insn)
16685 || (NONJUMP_INSN_P (insn)
16686 && (asm_noperands (PATTERN (insn)) >= 0
16687 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16688 return I387_CW_UNINITIALIZED;
16690 if (recog_memoized (insn) < 0)
16691 return I387_CW_ANY;
16693 mode = get_attr_i387_cw (insn);
16695 switch (entity)
16697 case I387_TRUNC:
16698 if (mode == I387_CW_TRUNC)
16699 return mode;
16700 break;
16702 case I387_FLOOR:
16703 if (mode == I387_CW_FLOOR)
16704 return mode;
16705 break;
16707 case I387_CEIL:
16708 if (mode == I387_CW_CEIL)
16709 return mode;
16710 break;
16712 case I387_MASK_PM:
16713 if (mode == I387_CW_MASK_PM)
16714 return mode;
16715 break;
16717 default:
16718 gcc_unreachable ();
16721 return I387_CW_ANY;
16724 /* Return mode that entity must be switched into
16725 prior to the execution of insn. */
16727 static int
16728 ix86_mode_needed (int entity, rtx_insn *insn)
16730 switch (entity)
16732 case AVX_U128:
16733 return ix86_avx_u128_mode_needed (insn);
16734 case I387_TRUNC:
16735 case I387_FLOOR:
16736 case I387_CEIL:
16737 case I387_MASK_PM:
16738 return ix86_i387_mode_needed (entity, insn);
16739 default:
16740 gcc_unreachable ();
16742 return 0;
16745 /* Check if a 256bit AVX register is referenced in stores. */
16747 static void
16748 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16750 if (ix86_check_avx256_register (dest))
16752 bool *used = (bool *) data;
16753 *used = true;
16757 /* Calculate mode of upper 128bit AVX registers after the insn. */
16759 static int
16760 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16762 rtx pat = PATTERN (insn);
16764 if (vzeroupper_operation (pat, VOIDmode)
16765 || vzeroall_operation (pat, VOIDmode))
16766 return AVX_U128_CLEAN;
16768 /* We know that state is clean after CALL insn if there are no
16769 256bit registers used in the function return register. */
16770 if (CALL_P (insn))
16772 bool avx_reg256_found = false;
16773 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16775 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16778 /* Otherwise, return current mode. Remember that if insn
16779 references AVX 256bit registers, the mode was already changed
16780 to DIRTY from MODE_NEEDED. */
16781 return mode;
16784 /* Return the mode that an insn results in. */
16786 static int
16787 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16789 switch (entity)
16791 case AVX_U128:
16792 return ix86_avx_u128_mode_after (mode, insn);
16793 case I387_TRUNC:
16794 case I387_FLOOR:
16795 case I387_CEIL:
16796 case I387_MASK_PM:
16797 return mode;
16798 default:
16799 gcc_unreachable ();
16803 static int
16804 ix86_avx_u128_mode_entry (void)
16806 tree arg;
16808 /* Entry mode is set to AVX_U128_DIRTY if there are
16809 256bit modes used in function arguments. */
16810 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16811 arg = TREE_CHAIN (arg))
16813 rtx incoming = DECL_INCOMING_RTL (arg);
16815 if (incoming && ix86_check_avx256_register (incoming))
16816 return AVX_U128_DIRTY;
16819 return AVX_U128_CLEAN;
16822 /* Return a mode that ENTITY is assumed to be
16823 switched to at function entry. */
16825 static int
16826 ix86_mode_entry (int entity)
16828 switch (entity)
16830 case AVX_U128:
16831 return ix86_avx_u128_mode_entry ();
16832 case I387_TRUNC:
16833 case I387_FLOOR:
16834 case I387_CEIL:
16835 case I387_MASK_PM:
16836 return I387_CW_ANY;
16837 default:
16838 gcc_unreachable ();
16842 static int
16843 ix86_avx_u128_mode_exit (void)
16845 rtx reg = crtl->return_rtx;
16847 /* Exit mode is set to AVX_U128_DIRTY if there are
16848 256bit modes used in the function return register. */
16849 if (reg && ix86_check_avx256_register (reg))
16850 return AVX_U128_DIRTY;
16852 return AVX_U128_CLEAN;
16855 /* Return a mode that ENTITY is assumed to be
16856 switched to at function exit. */
16858 static int
16859 ix86_mode_exit (int entity)
16861 switch (entity)
16863 case AVX_U128:
16864 return ix86_avx_u128_mode_exit ();
16865 case I387_TRUNC:
16866 case I387_FLOOR:
16867 case I387_CEIL:
16868 case I387_MASK_PM:
16869 return I387_CW_ANY;
16870 default:
16871 gcc_unreachable ();
16875 static int
16876 ix86_mode_priority (int, int n)
16878 return n;
16881 /* Output code to initialize control word copies used by trunc?f?i and
16882 rounding patterns. CURRENT_MODE is set to current control word,
16883 while NEW_MODE is set to new control word. */
16885 static void
16886 emit_i387_cw_initialization (int mode)
16888 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16889 rtx new_mode;
16891 enum ix86_stack_slot slot;
16893 rtx reg = gen_reg_rtx (HImode);
16895 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16896 emit_move_insn (reg, copy_rtx (stored_mode));
16898 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16899 || optimize_insn_for_size_p ())
16901 switch (mode)
16903 case I387_CW_TRUNC:
16904 /* round toward zero (truncate) */
16905 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16906 slot = SLOT_CW_TRUNC;
16907 break;
16909 case I387_CW_FLOOR:
16910 /* round down toward -oo */
16911 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16912 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16913 slot = SLOT_CW_FLOOR;
16914 break;
16916 case I387_CW_CEIL:
16917 /* round up toward +oo */
16918 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16919 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16920 slot = SLOT_CW_CEIL;
16921 break;
16923 case I387_CW_MASK_PM:
16924 /* mask precision exception for nearbyint() */
16925 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16926 slot = SLOT_CW_MASK_PM;
16927 break;
16929 default:
16930 gcc_unreachable ();
16933 else
16935 switch (mode)
16937 case I387_CW_TRUNC:
16938 /* round toward zero (truncate) */
16939 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16940 slot = SLOT_CW_TRUNC;
16941 break;
16943 case I387_CW_FLOOR:
16944 /* round down toward -oo */
16945 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16946 slot = SLOT_CW_FLOOR;
16947 break;
16949 case I387_CW_CEIL:
16950 /* round up toward +oo */
16951 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16952 slot = SLOT_CW_CEIL;
16953 break;
16955 case I387_CW_MASK_PM:
16956 /* mask precision exception for nearbyint() */
16957 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16958 slot = SLOT_CW_MASK_PM;
16959 break;
16961 default:
16962 gcc_unreachable ();
16966 gcc_assert (slot < MAX_386_STACK_LOCALS);
16968 new_mode = assign_386_stack_local (HImode, slot);
16969 emit_move_insn (new_mode, reg);
16972 /* Emit vzeroupper. */
16974 void
16975 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16977 int i;
16979 /* Cancel automatic vzeroupper insertion if there are
16980 live call-saved SSE registers at the insertion point. */
16982 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16983 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16984 return;
16986 if (TARGET_64BIT)
16987 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16988 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16989 return;
16991 emit_insn (gen_avx_vzeroupper ());
16994 /* Generate one or more insns to set ENTITY to MODE. */
16996 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16997 is the set of hard registers live at the point where the insn(s)
16998 are to be inserted. */
17000 static void
17001 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
17002 HARD_REG_SET regs_live)
17004 switch (entity)
17006 case AVX_U128:
17007 if (mode == AVX_U128_CLEAN)
17008 ix86_avx_emit_vzeroupper (regs_live);
17009 break;
17010 case I387_TRUNC:
17011 case I387_FLOOR:
17012 case I387_CEIL:
17013 case I387_MASK_PM:
17014 if (mode != I387_CW_ANY
17015 && mode != I387_CW_UNINITIALIZED)
17016 emit_i387_cw_initialization (mode);
17017 break;
17018 default:
17019 gcc_unreachable ();
17023 /* Output code for INSN to convert a float to a signed int. OPERANDS
17024 are the insn operands. The output may be [HSD]Imode and the input
17025 operand may be [SDX]Fmode. */
17027 const char *
17028 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
17030 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17031 int dimode_p = GET_MODE (operands[0]) == DImode;
17032 int round_mode = get_attr_i387_cw (insn);
17034 /* Jump through a hoop or two for DImode, since the hardware has no
17035 non-popping instruction. We used to do this a different way, but
17036 that was somewhat fragile and broke with post-reload splitters. */
17037 if ((dimode_p || fisttp) && !stack_top_dies)
17038 output_asm_insn ("fld\t%y1", operands);
17040 gcc_assert (STACK_TOP_P (operands[1]));
17041 gcc_assert (MEM_P (operands[0]));
17042 gcc_assert (GET_MODE (operands[1]) != TFmode);
17044 if (fisttp)
17045 output_asm_insn ("fisttp%Z0\t%0", operands);
17046 else
17048 if (round_mode != I387_CW_ANY)
17049 output_asm_insn ("fldcw\t%3", operands);
17050 if (stack_top_dies || dimode_p)
17051 output_asm_insn ("fistp%Z0\t%0", operands);
17052 else
17053 output_asm_insn ("fist%Z0\t%0", operands);
17054 if (round_mode != I387_CW_ANY)
17055 output_asm_insn ("fldcw\t%2", operands);
17058 return "";
17061 /* Output code for x87 ffreep insn. The OPNO argument, which may only
17062 have the values zero or one, indicates the ffreep insn's operand
17063 from the OPERANDS array. */
17065 static const char *
17066 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
17068 if (TARGET_USE_FFREEP)
17069 #ifdef HAVE_AS_IX86_FFREEP
17070 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
17071 #else
17073 static char retval[32];
17074 int regno = REGNO (operands[opno]);
17076 gcc_assert (STACK_REGNO_P (regno));
17078 regno -= FIRST_STACK_REG;
17080 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
17081 return retval;
17083 #endif
17085 return opno ? "fstp\t%y1" : "fstp\t%y0";
17089 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
17090 should be used. UNORDERED_P is true when fucom should be used. */
17092 const char *
17093 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
17095 int stack_top_dies;
17096 rtx cmp_op0, cmp_op1;
17097 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
17099 if (eflags_p)
17101 cmp_op0 = operands[0];
17102 cmp_op1 = operands[1];
17104 else
17106 cmp_op0 = operands[1];
17107 cmp_op1 = operands[2];
17110 if (is_sse)
17112 if (GET_MODE (operands[0]) == SFmode)
17113 if (unordered_p)
17114 return "%vucomiss\t{%1, %0|%0, %1}";
17115 else
17116 return "%vcomiss\t{%1, %0|%0, %1}";
17117 else
17118 if (unordered_p)
17119 return "%vucomisd\t{%1, %0|%0, %1}";
17120 else
17121 return "%vcomisd\t{%1, %0|%0, %1}";
17124 gcc_assert (STACK_TOP_P (cmp_op0));
17126 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17128 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
17130 if (stack_top_dies)
17132 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
17133 return output_387_ffreep (operands, 1);
17135 else
17136 return "ftst\n\tfnstsw\t%0";
17139 if (STACK_REG_P (cmp_op1)
17140 && stack_top_dies
17141 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
17142 && REGNO (cmp_op1) != FIRST_STACK_REG)
17144 /* If both the top of the 387 stack dies, and the other operand
17145 is also a stack register that dies, then this must be a
17146 `fcompp' float compare */
17148 if (eflags_p)
17150 /* There is no double popping fcomi variant. Fortunately,
17151 eflags is immune from the fstp's cc clobbering. */
17152 if (unordered_p)
17153 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
17154 else
17155 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
17156 return output_387_ffreep (operands, 0);
17158 else
17160 if (unordered_p)
17161 return "fucompp\n\tfnstsw\t%0";
17162 else
17163 return "fcompp\n\tfnstsw\t%0";
17166 else
17168 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
17170 static const char * const alt[16] =
17172 "fcom%Z2\t%y2\n\tfnstsw\t%0",
17173 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
17174 "fucom%Z2\t%y2\n\tfnstsw\t%0",
17175 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
17177 "ficom%Z2\t%y2\n\tfnstsw\t%0",
17178 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
17179 NULL,
17180 NULL,
17182 "fcomi\t{%y1, %0|%0, %y1}",
17183 "fcomip\t{%y1, %0|%0, %y1}",
17184 "fucomi\t{%y1, %0|%0, %y1}",
17185 "fucomip\t{%y1, %0|%0, %y1}",
17187 NULL,
17188 NULL,
17189 NULL,
17190 NULL
17193 int mask;
17194 const char *ret;
17196 mask = eflags_p << 3;
17197 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
17198 mask |= unordered_p << 1;
17199 mask |= stack_top_dies;
17201 gcc_assert (mask < 16);
17202 ret = alt[mask];
17203 gcc_assert (ret);
17205 return ret;
17209 void
17210 ix86_output_addr_vec_elt (FILE *file, int value)
17212 const char *directive = ASM_LONG;
17214 #ifdef ASM_QUAD
17215 if (TARGET_LP64)
17216 directive = ASM_QUAD;
17217 #else
17218 gcc_assert (!TARGET_64BIT);
17219 #endif
17221 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
17224 void
17225 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
17227 const char *directive = ASM_LONG;
17229 #ifdef ASM_QUAD
17230 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
17231 directive = ASM_QUAD;
17232 #else
17233 gcc_assert (!TARGET_64BIT);
17234 #endif
17235 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
17236 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
17237 fprintf (file, "%s%s%d-%s%d\n",
17238 directive, LPREFIX, value, LPREFIX, rel);
17239 else if (HAVE_AS_GOTOFF_IN_DATA)
17240 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
17241 #if TARGET_MACHO
17242 else if (TARGET_MACHO)
17244 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
17245 machopic_output_function_base_name (file);
17246 putc ('\n', file);
17248 #endif
17249 else
17250 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
17251 GOT_SYMBOL_NAME, LPREFIX, value);
17254 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
17255 for the target. */
17257 void
17258 ix86_expand_clear (rtx dest)
17260 rtx tmp;
17262 /* We play register width games, which are only valid after reload. */
17263 gcc_assert (reload_completed);
17265 /* Avoid HImode and its attendant prefix byte. */
17266 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
17267 dest = gen_rtx_REG (SImode, REGNO (dest));
17268 tmp = gen_rtx_SET (dest, const0_rtx);
17270 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
17272 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17273 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
17276 emit_insn (tmp);
17279 /* X is an unchanging MEM. If it is a constant pool reference, return
17280 the constant pool rtx, else NULL. */
17283 maybe_get_pool_constant (rtx x)
17285 x = ix86_delegitimize_address (XEXP (x, 0));
17287 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
17288 return get_pool_constant (x);
17290 return NULL_RTX;
17293 void
17294 ix86_expand_move (machine_mode mode, rtx operands[])
17296 rtx op0, op1;
17297 enum tls_model model;
17299 op0 = operands[0];
17300 op1 = operands[1];
17302 if (GET_CODE (op1) == SYMBOL_REF)
17304 rtx tmp;
17306 model = SYMBOL_REF_TLS_MODEL (op1);
17307 if (model)
17309 op1 = legitimize_tls_address (op1, model, true);
17310 op1 = force_operand (op1, op0);
17311 if (op1 == op0)
17312 return;
17313 op1 = convert_to_mode (mode, op1, 1);
17315 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
17316 op1 = tmp;
17318 else if (GET_CODE (op1) == CONST
17319 && GET_CODE (XEXP (op1, 0)) == PLUS
17320 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
17322 rtx addend = XEXP (XEXP (op1, 0), 1);
17323 rtx symbol = XEXP (XEXP (op1, 0), 0);
17324 rtx tmp;
17326 model = SYMBOL_REF_TLS_MODEL (symbol);
17327 if (model)
17328 tmp = legitimize_tls_address (symbol, model, true);
17329 else
17330 tmp = legitimize_pe_coff_symbol (symbol, true);
17332 if (tmp)
17334 tmp = force_operand (tmp, NULL);
17335 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
17336 op0, 1, OPTAB_DIRECT);
17337 if (tmp == op0)
17338 return;
17339 op1 = convert_to_mode (mode, tmp, 1);
17343 if ((flag_pic || MACHOPIC_INDIRECT)
17344 && symbolic_operand (op1, mode))
17346 if (TARGET_MACHO && !TARGET_64BIT)
17348 #if TARGET_MACHO
17349 /* dynamic-no-pic */
17350 if (MACHOPIC_INDIRECT)
17352 rtx temp = (op0 && REG_P (op0) && mode == Pmode)
17353 ? op0 : gen_reg_rtx (Pmode);
17354 op1 = machopic_indirect_data_reference (op1, temp);
17355 if (MACHOPIC_PURE)
17356 op1 = machopic_legitimize_pic_address (op1, mode,
17357 temp == op1 ? 0 : temp);
17359 if (op0 != op1 && GET_CODE (op0) != MEM)
17361 rtx insn = gen_rtx_SET (op0, op1);
17362 emit_insn (insn);
17363 return;
17365 if (GET_CODE (op0) == MEM)
17366 op1 = force_reg (Pmode, op1);
17367 else
17369 rtx temp = op0;
17370 if (GET_CODE (temp) != REG)
17371 temp = gen_reg_rtx (Pmode);
17372 temp = legitimize_pic_address (op1, temp);
17373 if (temp == op0)
17374 return;
17375 op1 = temp;
17377 /* dynamic-no-pic */
17378 #endif
17380 else
17382 if (MEM_P (op0))
17383 op1 = force_reg (mode, op1);
17384 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17386 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17387 op1 = legitimize_pic_address (op1, reg);
17388 if (op0 == op1)
17389 return;
17390 op1 = convert_to_mode (mode, op1, 1);
17394 else
17396 if (MEM_P (op0)
17397 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17398 || !push_operand (op0, mode))
17399 && MEM_P (op1))
17400 op1 = force_reg (mode, op1);
17402 if (push_operand (op0, mode)
17403 && ! general_no_elim_operand (op1, mode))
17404 op1 = copy_to_mode_reg (mode, op1);
17406 /* Force large constants in 64bit compilation into register
17407 to get them CSEed. */
17408 if (can_create_pseudo_p ()
17409 && (mode == DImode) && TARGET_64BIT
17410 && immediate_operand (op1, mode)
17411 && !x86_64_zext_immediate_operand (op1, VOIDmode)
17412 && !register_operand (op0, mode)
17413 && optimize)
17414 op1 = copy_to_mode_reg (mode, op1);
17416 if (can_create_pseudo_p ()
17417 && CONST_DOUBLE_P (op1))
17419 /* If we are loading a floating point constant to a register,
17420 force the value to memory now, since we'll get better code
17421 out the back end. */
17423 op1 = validize_mem (force_const_mem (mode, op1));
17424 if (!register_operand (op0, mode))
17426 rtx temp = gen_reg_rtx (mode);
17427 emit_insn (gen_rtx_SET (temp, op1));
17428 emit_move_insn (op0, temp);
17429 return;
17434 emit_insn (gen_rtx_SET (op0, op1));
17437 void
17438 ix86_expand_vector_move (machine_mode mode, rtx operands[])
17440 rtx op0 = operands[0], op1 = operands[1];
17441 unsigned int align = GET_MODE_ALIGNMENT (mode);
17443 if (push_operand (op0, VOIDmode))
17444 op0 = emit_move_resolve_push (mode, op0);
17446 /* Force constants other than zero into memory. We do not know how
17447 the instructions used to build constants modify the upper 64 bits
17448 of the register, once we have that information we may be able
17449 to handle some of them more efficiently. */
17450 if (can_create_pseudo_p ()
17451 && register_operand (op0, mode)
17452 && (CONSTANT_P (op1)
17453 || (GET_CODE (op1) == SUBREG
17454 && CONSTANT_P (SUBREG_REG (op1))))
17455 && !standard_sse_constant_p (op1))
17456 op1 = validize_mem (force_const_mem (mode, op1));
17458 /* We need to check memory alignment for SSE mode since attribute
17459 can make operands unaligned. */
17460 if (can_create_pseudo_p ()
17461 && SSE_REG_MODE_P (mode)
17462 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17463 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17465 rtx tmp[2];
17467 /* ix86_expand_vector_move_misalign() does not like constants ... */
17468 if (CONSTANT_P (op1)
17469 || (GET_CODE (op1) == SUBREG
17470 && CONSTANT_P (SUBREG_REG (op1))))
17471 op1 = validize_mem (force_const_mem (mode, op1));
17473 /* ... nor both arguments in memory. */
17474 if (!register_operand (op0, mode)
17475 && !register_operand (op1, mode))
17476 op1 = force_reg (mode, op1);
17478 tmp[0] = op0; tmp[1] = op1;
17479 ix86_expand_vector_move_misalign (mode, tmp);
17480 return;
17483 /* Make operand1 a register if it isn't already. */
17484 if (can_create_pseudo_p ()
17485 && !register_operand (op0, mode)
17486 && !register_operand (op1, mode))
17488 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17489 return;
17492 emit_insn (gen_rtx_SET (op0, op1));
17495 /* Split 32-byte AVX unaligned load and store if needed. */
17497 static void
17498 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17500 rtx m;
17501 rtx (*extract) (rtx, rtx, rtx);
17502 rtx (*load_unaligned) (rtx, rtx);
17503 rtx (*store_unaligned) (rtx, rtx);
17504 machine_mode mode;
17506 switch (GET_MODE (op0))
17508 default:
17509 gcc_unreachable ();
17510 case V32QImode:
17511 extract = gen_avx_vextractf128v32qi;
17512 load_unaligned = gen_avx_loaddquv32qi;
17513 store_unaligned = gen_avx_storedquv32qi;
17514 mode = V16QImode;
17515 break;
17516 case V8SFmode:
17517 extract = gen_avx_vextractf128v8sf;
17518 load_unaligned = gen_avx_loadups256;
17519 store_unaligned = gen_avx_storeups256;
17520 mode = V4SFmode;
17521 break;
17522 case V4DFmode:
17523 extract = gen_avx_vextractf128v4df;
17524 load_unaligned = gen_avx_loadupd256;
17525 store_unaligned = gen_avx_storeupd256;
17526 mode = V2DFmode;
17527 break;
17530 if (MEM_P (op1))
17532 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
17533 && optimize_insn_for_speed_p ())
17535 rtx r = gen_reg_rtx (mode);
17536 m = adjust_address (op1, mode, 0);
17537 emit_move_insn (r, m);
17538 m = adjust_address (op1, mode, 16);
17539 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17540 emit_move_insn (op0, r);
17542 /* Normal *mov<mode>_internal pattern will handle
17543 unaligned loads just fine if misaligned_operand
17544 is true, and without the UNSPEC it can be combined
17545 with arithmetic instructions. */
17546 else if (misaligned_operand (op1, GET_MODE (op1)))
17547 emit_insn (gen_rtx_SET (op0, op1));
17548 else
17549 emit_insn (load_unaligned (op0, op1));
17551 else if (MEM_P (op0))
17553 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
17554 && optimize_insn_for_speed_p ())
17556 m = adjust_address (op0, mode, 0);
17557 emit_insn (extract (m, op1, const0_rtx));
17558 m = adjust_address (op0, mode, 16);
17559 emit_insn (extract (m, op1, const1_rtx));
17561 else
17562 emit_insn (store_unaligned (op0, op1));
17564 else
17565 gcc_unreachable ();
17568 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17569 straight to ix86_expand_vector_move. */
17570 /* Code generation for scalar reg-reg moves of single and double precision data:
17571 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17572 movaps reg, reg
17573 else
17574 movss reg, reg
17575 if (x86_sse_partial_reg_dependency == true)
17576 movapd reg, reg
17577 else
17578 movsd reg, reg
17580 Code generation for scalar loads of double precision data:
17581 if (x86_sse_split_regs == true)
17582 movlpd mem, reg (gas syntax)
17583 else
17584 movsd mem, reg
17586 Code generation for unaligned packed loads of single precision data
17587 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17588 if (x86_sse_unaligned_move_optimal)
17589 movups mem, reg
17591 if (x86_sse_partial_reg_dependency == true)
17593 xorps reg, reg
17594 movlps mem, reg
17595 movhps mem+8, reg
17597 else
17599 movlps mem, reg
17600 movhps mem+8, reg
17603 Code generation for unaligned packed loads of double precision data
17604 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17605 if (x86_sse_unaligned_move_optimal)
17606 movupd mem, reg
17608 if (x86_sse_split_regs == true)
17610 movlpd mem, reg
17611 movhpd mem+8, reg
17613 else
17615 movsd mem, reg
17616 movhpd mem+8, reg
17620 void
17621 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
17623 rtx op0, op1, orig_op0 = NULL_RTX, m;
17624 rtx (*load_unaligned) (rtx, rtx);
17625 rtx (*store_unaligned) (rtx, rtx);
17627 op0 = operands[0];
17628 op1 = operands[1];
17630 if (GET_MODE_SIZE (mode) == 64)
17632 switch (GET_MODE_CLASS (mode))
17634 case MODE_VECTOR_INT:
17635 case MODE_INT:
17636 if (GET_MODE (op0) != V16SImode)
17638 if (!MEM_P (op0))
17640 orig_op0 = op0;
17641 op0 = gen_reg_rtx (V16SImode);
17643 else
17644 op0 = gen_lowpart (V16SImode, op0);
17646 op1 = gen_lowpart (V16SImode, op1);
17647 /* FALLTHRU */
17649 case MODE_VECTOR_FLOAT:
17650 switch (GET_MODE (op0))
17652 default:
17653 gcc_unreachable ();
17654 case V16SImode:
17655 load_unaligned = gen_avx512f_loaddquv16si;
17656 store_unaligned = gen_avx512f_storedquv16si;
17657 break;
17658 case V16SFmode:
17659 load_unaligned = gen_avx512f_loadups512;
17660 store_unaligned = gen_avx512f_storeups512;
17661 break;
17662 case V8DFmode:
17663 load_unaligned = gen_avx512f_loadupd512;
17664 store_unaligned = gen_avx512f_storeupd512;
17665 break;
17668 if (MEM_P (op1))
17669 emit_insn (load_unaligned (op0, op1));
17670 else if (MEM_P (op0))
17671 emit_insn (store_unaligned (op0, op1));
17672 else
17673 gcc_unreachable ();
17674 if (orig_op0)
17675 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17676 break;
17678 default:
17679 gcc_unreachable ();
17682 return;
17685 if (TARGET_AVX
17686 && GET_MODE_SIZE (mode) == 32)
17688 switch (GET_MODE_CLASS (mode))
17690 case MODE_VECTOR_INT:
17691 case MODE_INT:
17692 if (GET_MODE (op0) != V32QImode)
17694 if (!MEM_P (op0))
17696 orig_op0 = op0;
17697 op0 = gen_reg_rtx (V32QImode);
17699 else
17700 op0 = gen_lowpart (V32QImode, op0);
17702 op1 = gen_lowpart (V32QImode, op1);
17703 /* FALLTHRU */
17705 case MODE_VECTOR_FLOAT:
17706 ix86_avx256_split_vector_move_misalign (op0, op1);
17707 if (orig_op0)
17708 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17709 break;
17711 default:
17712 gcc_unreachable ();
17715 return;
17718 if (MEM_P (op1))
17720 /* Normal *mov<mode>_internal pattern will handle
17721 unaligned loads just fine if misaligned_operand
17722 is true, and without the UNSPEC it can be combined
17723 with arithmetic instructions. */
17724 if (TARGET_AVX
17725 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17726 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17727 && misaligned_operand (op1, GET_MODE (op1)))
17728 emit_insn (gen_rtx_SET (op0, op1));
17729 /* ??? If we have typed data, then it would appear that using
17730 movdqu is the only way to get unaligned data loaded with
17731 integer type. */
17732 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17734 if (GET_MODE (op0) != V16QImode)
17736 orig_op0 = op0;
17737 op0 = gen_reg_rtx (V16QImode);
17739 op1 = gen_lowpart (V16QImode, op1);
17740 /* We will eventually emit movups based on insn attributes. */
17741 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17742 if (orig_op0)
17743 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17745 else if (TARGET_SSE2 && mode == V2DFmode)
17747 rtx zero;
17749 if (TARGET_AVX
17750 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17751 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17752 || optimize_insn_for_size_p ())
17754 /* We will eventually emit movups based on insn attributes. */
17755 emit_insn (gen_sse2_loadupd (op0, op1));
17756 return;
17759 /* When SSE registers are split into halves, we can avoid
17760 writing to the top half twice. */
17761 if (TARGET_SSE_SPLIT_REGS)
17763 emit_clobber (op0);
17764 zero = op0;
17766 else
17768 /* ??? Not sure about the best option for the Intel chips.
17769 The following would seem to satisfy; the register is
17770 entirely cleared, breaking the dependency chain. We
17771 then store to the upper half, with a dependency depth
17772 of one. A rumor has it that Intel recommends two movsd
17773 followed by an unpacklpd, but this is unconfirmed. And
17774 given that the dependency depth of the unpacklpd would
17775 still be one, I'm not sure why this would be better. */
17776 zero = CONST0_RTX (V2DFmode);
17779 m = adjust_address (op1, DFmode, 0);
17780 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17781 m = adjust_address (op1, DFmode, 8);
17782 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17784 else
17786 rtx t;
17788 if (TARGET_AVX
17789 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17790 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17791 || optimize_insn_for_size_p ())
17793 if (GET_MODE (op0) != V4SFmode)
17795 orig_op0 = op0;
17796 op0 = gen_reg_rtx (V4SFmode);
17798 op1 = gen_lowpart (V4SFmode, op1);
17799 emit_insn (gen_sse_loadups (op0, op1));
17800 if (orig_op0)
17801 emit_move_insn (orig_op0,
17802 gen_lowpart (GET_MODE (orig_op0), op0));
17803 return;
17806 if (mode != V4SFmode)
17807 t = gen_reg_rtx (V4SFmode);
17808 else
17809 t = op0;
17811 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17812 emit_move_insn (t, CONST0_RTX (V4SFmode));
17813 else
17814 emit_clobber (t);
17816 m = adjust_address (op1, V2SFmode, 0);
17817 emit_insn (gen_sse_loadlps (t, t, m));
17818 m = adjust_address (op1, V2SFmode, 8);
17819 emit_insn (gen_sse_loadhps (t, t, m));
17820 if (mode != V4SFmode)
17821 emit_move_insn (op0, gen_lowpart (mode, t));
17824 else if (MEM_P (op0))
17826 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17828 op0 = gen_lowpart (V16QImode, op0);
17829 op1 = gen_lowpart (V16QImode, op1);
17830 /* We will eventually emit movups based on insn attributes. */
17831 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17833 else if (TARGET_SSE2 && mode == V2DFmode)
17835 if (TARGET_AVX
17836 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17837 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17838 || optimize_insn_for_size_p ())
17839 /* We will eventually emit movups based on insn attributes. */
17840 emit_insn (gen_sse2_storeupd (op0, op1));
17841 else
17843 m = adjust_address (op0, DFmode, 0);
17844 emit_insn (gen_sse2_storelpd (m, op1));
17845 m = adjust_address (op0, DFmode, 8);
17846 emit_insn (gen_sse2_storehpd (m, op1));
17849 else
17851 if (mode != V4SFmode)
17852 op1 = gen_lowpart (V4SFmode, op1);
17854 if (TARGET_AVX
17855 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17856 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17857 || optimize_insn_for_size_p ())
17859 op0 = gen_lowpart (V4SFmode, op0);
17860 emit_insn (gen_sse_storeups (op0, op1));
17862 else
17864 m = adjust_address (op0, V2SFmode, 0);
17865 emit_insn (gen_sse_storelps (m, op1));
17866 m = adjust_address (op0, V2SFmode, 8);
17867 emit_insn (gen_sse_storehps (m, op1));
17871 else
17872 gcc_unreachable ();
17875 /* Helper function of ix86_fixup_binary_operands to canonicalize
17876 operand order. Returns true if the operands should be swapped. */
17878 static bool
17879 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
17880 rtx operands[])
17882 rtx dst = operands[0];
17883 rtx src1 = operands[1];
17884 rtx src2 = operands[2];
17886 /* If the operation is not commutative, we can't do anything. */
17887 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17888 return false;
17890 /* Highest priority is that src1 should match dst. */
17891 if (rtx_equal_p (dst, src1))
17892 return false;
17893 if (rtx_equal_p (dst, src2))
17894 return true;
17896 /* Next highest priority is that immediate constants come second. */
17897 if (immediate_operand (src2, mode))
17898 return false;
17899 if (immediate_operand (src1, mode))
17900 return true;
17902 /* Lowest priority is that memory references should come second. */
17903 if (MEM_P (src2))
17904 return false;
17905 if (MEM_P (src1))
17906 return true;
17908 return false;
17912 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17913 destination to use for the operation. If different from the true
17914 destination in operands[0], a copy operation will be required. */
17917 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
17918 rtx operands[])
17920 rtx dst = operands[0];
17921 rtx src1 = operands[1];
17922 rtx src2 = operands[2];
17924 /* Canonicalize operand order. */
17925 if (ix86_swap_binary_operands_p (code, mode, operands))
17927 /* It is invalid to swap operands of different modes. */
17928 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17930 std::swap (src1, src2);
17933 /* Both source operands cannot be in memory. */
17934 if (MEM_P (src1) && MEM_P (src2))
17936 /* Optimization: Only read from memory once. */
17937 if (rtx_equal_p (src1, src2))
17939 src2 = force_reg (mode, src2);
17940 src1 = src2;
17942 else if (rtx_equal_p (dst, src1))
17943 src2 = force_reg (mode, src2);
17944 else
17945 src1 = force_reg (mode, src1);
17948 /* If the destination is memory, and we do not have matching source
17949 operands, do things in registers. */
17950 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17951 dst = gen_reg_rtx (mode);
17953 /* Source 1 cannot be a constant. */
17954 if (CONSTANT_P (src1))
17955 src1 = force_reg (mode, src1);
17957 /* Source 1 cannot be a non-matching memory. */
17958 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17959 src1 = force_reg (mode, src1);
17961 /* Improve address combine. */
17962 if (code == PLUS
17963 && GET_MODE_CLASS (mode) == MODE_INT
17964 && MEM_P (src2))
17965 src2 = force_reg (mode, src2);
17967 operands[1] = src1;
17968 operands[2] = src2;
17969 return dst;
17972 /* Similarly, but assume that the destination has already been
17973 set up properly. */
17975 void
17976 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17977 machine_mode mode, rtx operands[])
17979 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17980 gcc_assert (dst == operands[0]);
17983 /* Attempt to expand a binary operator. Make the expansion closer to the
17984 actual machine, then just general_operand, which will allow 3 separate
17985 memory references (one output, two input) in a single insn. */
17987 void
17988 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
17989 rtx operands[])
17991 rtx src1, src2, dst, op, clob;
17993 dst = ix86_fixup_binary_operands (code, mode, operands);
17994 src1 = operands[1];
17995 src2 = operands[2];
17997 /* Emit the instruction. */
17999 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2));
18001 if (reload_completed
18002 && code == PLUS
18003 && !rtx_equal_p (dst, src1))
18005 /* This is going to be an LEA; avoid splitting it later. */
18006 emit_insn (op);
18008 else
18010 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18011 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18014 /* Fix up the destination if needed. */
18015 if (dst != operands[0])
18016 emit_move_insn (operands[0], dst);
18019 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
18020 the given OPERANDS. */
18022 void
18023 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
18024 rtx operands[])
18026 rtx op1 = NULL_RTX, op2 = NULL_RTX;
18027 if (GET_CODE (operands[1]) == SUBREG)
18029 op1 = operands[1];
18030 op2 = operands[2];
18032 else if (GET_CODE (operands[2]) == SUBREG)
18034 op1 = operands[2];
18035 op2 = operands[1];
18037 /* Optimize (__m128i) d | (__m128i) e and similar code
18038 when d and e are float vectors into float vector logical
18039 insn. In C/C++ without using intrinsics there is no other way
18040 to express vector logical operation on float vectors than
18041 to cast them temporarily to integer vectors. */
18042 if (op1
18043 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
18044 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
18045 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
18046 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
18047 && SUBREG_BYTE (op1) == 0
18048 && (GET_CODE (op2) == CONST_VECTOR
18049 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
18050 && SUBREG_BYTE (op2) == 0))
18051 && can_create_pseudo_p ())
18053 rtx dst;
18054 switch (GET_MODE (SUBREG_REG (op1)))
18056 case V4SFmode:
18057 case V8SFmode:
18058 case V16SFmode:
18059 case V2DFmode:
18060 case V4DFmode:
18061 case V8DFmode:
18062 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
18063 if (GET_CODE (op2) == CONST_VECTOR)
18065 op2 = gen_lowpart (GET_MODE (dst), op2);
18066 op2 = force_reg (GET_MODE (dst), op2);
18068 else
18070 op1 = operands[1];
18071 op2 = SUBREG_REG (operands[2]);
18072 if (!nonimmediate_operand (op2, GET_MODE (dst)))
18073 op2 = force_reg (GET_MODE (dst), op2);
18075 op1 = SUBREG_REG (op1);
18076 if (!nonimmediate_operand (op1, GET_MODE (dst)))
18077 op1 = force_reg (GET_MODE (dst), op1);
18078 emit_insn (gen_rtx_SET (dst,
18079 gen_rtx_fmt_ee (code, GET_MODE (dst),
18080 op1, op2)));
18081 emit_move_insn (operands[0], gen_lowpart (mode, dst));
18082 return;
18083 default:
18084 break;
18087 if (!nonimmediate_operand (operands[1], mode))
18088 operands[1] = force_reg (mode, operands[1]);
18089 if (!nonimmediate_operand (operands[2], mode))
18090 operands[2] = force_reg (mode, operands[2]);
18091 ix86_fixup_binary_operands_no_copy (code, mode, operands);
18092 emit_insn (gen_rtx_SET (operands[0],
18093 gen_rtx_fmt_ee (code, mode, operands[1],
18094 operands[2])));
18097 /* Return TRUE or FALSE depending on whether the binary operator meets the
18098 appropriate constraints. */
18100 bool
18101 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
18102 rtx operands[3])
18104 rtx dst = operands[0];
18105 rtx src1 = operands[1];
18106 rtx src2 = operands[2];
18108 /* Both source operands cannot be in memory. */
18109 if (MEM_P (src1) && MEM_P (src2))
18110 return false;
18112 /* Canonicalize operand order for commutative operators. */
18113 if (ix86_swap_binary_operands_p (code, mode, operands))
18114 std::swap (src1, src2);
18116 /* If the destination is memory, we must have a matching source operand. */
18117 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18118 return false;
18120 /* Source 1 cannot be a constant. */
18121 if (CONSTANT_P (src1))
18122 return false;
18124 /* Source 1 cannot be a non-matching memory. */
18125 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18126 /* Support "andhi/andsi/anddi" as a zero-extending move. */
18127 return (code == AND
18128 && (mode == HImode
18129 || mode == SImode
18130 || (TARGET_64BIT && mode == DImode))
18131 && satisfies_constraint_L (src2));
18133 return true;
18136 /* Attempt to expand a unary operator. Make the expansion closer to the
18137 actual machine, then just general_operand, which will allow 2 separate
18138 memory references (one output, one input) in a single insn. */
18140 void
18141 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
18142 rtx operands[])
18144 bool matching_memory = false;
18145 rtx src, dst, op, clob;
18147 dst = operands[0];
18148 src = operands[1];
18150 /* If the destination is memory, and we do not have matching source
18151 operands, do things in registers. */
18152 if (MEM_P (dst))
18154 if (rtx_equal_p (dst, src))
18155 matching_memory = true;
18156 else
18157 dst = gen_reg_rtx (mode);
18160 /* When source operand is memory, destination must match. */
18161 if (MEM_P (src) && !matching_memory)
18162 src = force_reg (mode, src);
18164 /* Emit the instruction. */
18166 op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src));
18168 if (code == NOT)
18169 emit_insn (op);
18170 else
18172 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18173 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18176 /* Fix up the destination if needed. */
18177 if (dst != operands[0])
18178 emit_move_insn (operands[0], dst);
18181 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
18182 divisor are within the range [0-255]. */
18184 void
18185 ix86_split_idivmod (machine_mode mode, rtx operands[],
18186 bool signed_p)
18188 rtx_code_label *end_label, *qimode_label;
18189 rtx insn, div, mod;
18190 rtx scratch, tmp0, tmp1, tmp2;
18191 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
18192 rtx (*gen_zero_extend) (rtx, rtx);
18193 rtx (*gen_test_ccno_1) (rtx, rtx);
18195 switch (mode)
18197 case SImode:
18198 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
18199 gen_test_ccno_1 = gen_testsi_ccno_1;
18200 gen_zero_extend = gen_zero_extendqisi2;
18201 break;
18202 case DImode:
18203 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
18204 gen_test_ccno_1 = gen_testdi_ccno_1;
18205 gen_zero_extend = gen_zero_extendqidi2;
18206 break;
18207 default:
18208 gcc_unreachable ();
18211 end_label = gen_label_rtx ();
18212 qimode_label = gen_label_rtx ();
18214 scratch = gen_reg_rtx (mode);
18216 /* Use 8bit unsigned divimod if dividend and divisor are within
18217 the range [0-255]. */
18218 emit_move_insn (scratch, operands[2]);
18219 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
18220 scratch, 1, OPTAB_DIRECT);
18221 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
18222 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
18223 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
18224 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
18225 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
18226 pc_rtx);
18227 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0));
18228 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18229 JUMP_LABEL (insn) = qimode_label;
18231 /* Generate original signed/unsigned divimod. */
18232 div = gen_divmod4_1 (operands[0], operands[1],
18233 operands[2], operands[3]);
18234 emit_insn (div);
18236 /* Branch to the end. */
18237 emit_jump_insn (gen_jump (end_label));
18238 emit_barrier ();
18240 /* Generate 8bit unsigned divide. */
18241 emit_label (qimode_label);
18242 /* Don't use operands[0] for result of 8bit divide since not all
18243 registers support QImode ZERO_EXTRACT. */
18244 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
18245 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
18246 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
18247 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
18249 if (signed_p)
18251 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
18252 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
18254 else
18256 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
18257 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
18260 /* Extract remainder from AH. */
18261 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
18262 if (REG_P (operands[1]))
18263 insn = emit_move_insn (operands[1], tmp1);
18264 else
18266 /* Need a new scratch register since the old one has result
18267 of 8bit divide. */
18268 scratch = gen_reg_rtx (mode);
18269 emit_move_insn (scratch, tmp1);
18270 insn = emit_move_insn (operands[1], scratch);
18272 set_unique_reg_note (insn, REG_EQUAL, mod);
18274 /* Zero extend quotient from AL. */
18275 tmp1 = gen_lowpart (QImode, tmp0);
18276 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
18277 set_unique_reg_note (insn, REG_EQUAL, div);
18279 emit_label (end_label);
18282 #define LEA_MAX_STALL (3)
18283 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
18285 /* Increase given DISTANCE in half-cycles according to
18286 dependencies between PREV and NEXT instructions.
18287 Add 1 half-cycle if there is no dependency and
18288 go to next cycle if there is some dependecy. */
18290 static unsigned int
18291 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
18293 df_ref def, use;
18295 if (!prev || !next)
18296 return distance + (distance & 1) + 2;
18298 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
18299 return distance + 1;
18301 FOR_EACH_INSN_USE (use, next)
18302 FOR_EACH_INSN_DEF (def, prev)
18303 if (!DF_REF_IS_ARTIFICIAL (def)
18304 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
18305 return distance + (distance & 1) + 2;
18307 return distance + 1;
18310 /* Function checks if instruction INSN defines register number
18311 REGNO1 or REGNO2. */
18313 static bool
18314 insn_defines_reg (unsigned int regno1, unsigned int regno2,
18315 rtx_insn *insn)
18317 df_ref def;
18319 FOR_EACH_INSN_DEF (def, insn)
18320 if (DF_REF_REG_DEF_P (def)
18321 && !DF_REF_IS_ARTIFICIAL (def)
18322 && (regno1 == DF_REF_REGNO (def)
18323 || regno2 == DF_REF_REGNO (def)))
18324 return true;
18326 return false;
18329 /* Function checks if instruction INSN uses register number
18330 REGNO as a part of address expression. */
18332 static bool
18333 insn_uses_reg_mem (unsigned int regno, rtx insn)
18335 df_ref use;
18337 FOR_EACH_INSN_USE (use, insn)
18338 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
18339 return true;
18341 return false;
18344 /* Search backward for non-agu definition of register number REGNO1
18345 or register number REGNO2 in basic block starting from instruction
18346 START up to head of basic block or instruction INSN.
18348 Function puts true value into *FOUND var if definition was found
18349 and false otherwise.
18351 Distance in half-cycles between START and found instruction or head
18352 of BB is added to DISTANCE and returned. */
18354 static int
18355 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18356 rtx_insn *insn, int distance,
18357 rtx_insn *start, bool *found)
18359 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18360 rtx_insn *prev = start;
18361 rtx_insn *next = NULL;
18363 *found = false;
18365 while (prev
18366 && prev != insn
18367 && distance < LEA_SEARCH_THRESHOLD)
18369 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18371 distance = increase_distance (prev, next, distance);
18372 if (insn_defines_reg (regno1, regno2, prev))
18374 if (recog_memoized (prev) < 0
18375 || get_attr_type (prev) != TYPE_LEA)
18377 *found = true;
18378 return distance;
18382 next = prev;
18384 if (prev == BB_HEAD (bb))
18385 break;
18387 prev = PREV_INSN (prev);
18390 return distance;
18393 /* Search backward for non-agu definition of register number REGNO1
18394 or register number REGNO2 in INSN's basic block until
18395 1. Pass LEA_SEARCH_THRESHOLD instructions, or
18396 2. Reach neighbour BBs boundary, or
18397 3. Reach agu definition.
18398 Returns the distance between the non-agu definition point and INSN.
18399 If no definition point, returns -1. */
18401 static int
18402 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18403 rtx_insn *insn)
18405 basic_block bb = BLOCK_FOR_INSN (insn);
18406 int distance = 0;
18407 bool found = false;
18409 if (insn != BB_HEAD (bb))
18410 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18411 distance, PREV_INSN (insn),
18412 &found);
18414 if (!found && distance < LEA_SEARCH_THRESHOLD)
18416 edge e;
18417 edge_iterator ei;
18418 bool simple_loop = false;
18420 FOR_EACH_EDGE (e, ei, bb->preds)
18421 if (e->src == bb)
18423 simple_loop = true;
18424 break;
18427 if (simple_loop)
18428 distance = distance_non_agu_define_in_bb (regno1, regno2,
18429 insn, distance,
18430 BB_END (bb), &found);
18431 else
18433 int shortest_dist = -1;
18434 bool found_in_bb = false;
18436 FOR_EACH_EDGE (e, ei, bb->preds)
18438 int bb_dist
18439 = distance_non_agu_define_in_bb (regno1, regno2,
18440 insn, distance,
18441 BB_END (e->src),
18442 &found_in_bb);
18443 if (found_in_bb)
18445 if (shortest_dist < 0)
18446 shortest_dist = bb_dist;
18447 else if (bb_dist > 0)
18448 shortest_dist = MIN (bb_dist, shortest_dist);
18450 found = true;
18454 distance = shortest_dist;
18458 /* get_attr_type may modify recog data. We want to make sure
18459 that recog data is valid for instruction INSN, on which
18460 distance_non_agu_define is called. INSN is unchanged here. */
18461 extract_insn_cached (insn);
18463 if (!found)
18464 return -1;
18466 return distance >> 1;
18469 /* Return the distance in half-cycles between INSN and the next
18470 insn that uses register number REGNO in memory address added
18471 to DISTANCE. Return -1 if REGNO0 is set.
18473 Put true value into *FOUND if register usage was found and
18474 false otherwise.
18475 Put true value into *REDEFINED if register redefinition was
18476 found and false otherwise. */
18478 static int
18479 distance_agu_use_in_bb (unsigned int regno,
18480 rtx_insn *insn, int distance, rtx_insn *start,
18481 bool *found, bool *redefined)
18483 basic_block bb = NULL;
18484 rtx_insn *next = start;
18485 rtx_insn *prev = NULL;
18487 *found = false;
18488 *redefined = false;
18490 if (start != NULL_RTX)
18492 bb = BLOCK_FOR_INSN (start);
18493 if (start != BB_HEAD (bb))
18494 /* If insn and start belong to the same bb, set prev to insn,
18495 so the call to increase_distance will increase the distance
18496 between insns by 1. */
18497 prev = insn;
18500 while (next
18501 && next != insn
18502 && distance < LEA_SEARCH_THRESHOLD)
18504 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18506 distance = increase_distance(prev, next, distance);
18507 if (insn_uses_reg_mem (regno, next))
18509 /* Return DISTANCE if OP0 is used in memory
18510 address in NEXT. */
18511 *found = true;
18512 return distance;
18515 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18517 /* Return -1 if OP0 is set in NEXT. */
18518 *redefined = true;
18519 return -1;
18522 prev = next;
18525 if (next == BB_END (bb))
18526 break;
18528 next = NEXT_INSN (next);
18531 return distance;
18534 /* Return the distance between INSN and the next insn that uses
18535 register number REGNO0 in memory address. Return -1 if no such
18536 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18538 static int
18539 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18541 basic_block bb = BLOCK_FOR_INSN (insn);
18542 int distance = 0;
18543 bool found = false;
18544 bool redefined = false;
18546 if (insn != BB_END (bb))
18547 distance = distance_agu_use_in_bb (regno0, insn, distance,
18548 NEXT_INSN (insn),
18549 &found, &redefined);
18551 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18553 edge e;
18554 edge_iterator ei;
18555 bool simple_loop = false;
18557 FOR_EACH_EDGE (e, ei, bb->succs)
18558 if (e->dest == bb)
18560 simple_loop = true;
18561 break;
18564 if (simple_loop)
18565 distance = distance_agu_use_in_bb (regno0, insn,
18566 distance, BB_HEAD (bb),
18567 &found, &redefined);
18568 else
18570 int shortest_dist = -1;
18571 bool found_in_bb = false;
18572 bool redefined_in_bb = false;
18574 FOR_EACH_EDGE (e, ei, bb->succs)
18576 int bb_dist
18577 = distance_agu_use_in_bb (regno0, insn,
18578 distance, BB_HEAD (e->dest),
18579 &found_in_bb, &redefined_in_bb);
18580 if (found_in_bb)
18582 if (shortest_dist < 0)
18583 shortest_dist = bb_dist;
18584 else if (bb_dist > 0)
18585 shortest_dist = MIN (bb_dist, shortest_dist);
18587 found = true;
18591 distance = shortest_dist;
18595 if (!found || redefined)
18596 return -1;
18598 return distance >> 1;
18601 /* Define this macro to tune LEA priority vs ADD, it take effect when
18602 there is a dilemma of choicing LEA or ADD
18603 Negative value: ADD is more preferred than LEA
18604 Zero: Netrual
18605 Positive value: LEA is more preferred than ADD*/
18606 #define IX86_LEA_PRIORITY 0
18608 /* Return true if usage of lea INSN has performance advantage
18609 over a sequence of instructions. Instructions sequence has
18610 SPLIT_COST cycles higher latency than lea latency. */
18612 static bool
18613 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18614 unsigned int regno2, int split_cost, bool has_scale)
18616 int dist_define, dist_use;
18618 /* For Silvermont if using a 2-source or 3-source LEA for
18619 non-destructive destination purposes, or due to wanting
18620 ability to use SCALE, the use of LEA is justified. */
18621 if (TARGET_SILVERMONT || TARGET_INTEL)
18623 if (has_scale)
18624 return true;
18625 if (split_cost < 1)
18626 return false;
18627 if (regno0 == regno1 || regno0 == regno2)
18628 return false;
18629 return true;
18632 dist_define = distance_non_agu_define (regno1, regno2, insn);
18633 dist_use = distance_agu_use (regno0, insn);
18635 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18637 /* If there is no non AGU operand definition, no AGU
18638 operand usage and split cost is 0 then both lea
18639 and non lea variants have same priority. Currently
18640 we prefer lea for 64 bit code and non lea on 32 bit
18641 code. */
18642 if (dist_use < 0 && split_cost == 0)
18643 return TARGET_64BIT || IX86_LEA_PRIORITY;
18644 else
18645 return true;
18648 /* With longer definitions distance lea is more preferable.
18649 Here we change it to take into account splitting cost and
18650 lea priority. */
18651 dist_define += split_cost + IX86_LEA_PRIORITY;
18653 /* If there is no use in memory addess then we just check
18654 that split cost exceeds AGU stall. */
18655 if (dist_use < 0)
18656 return dist_define > LEA_MAX_STALL;
18658 /* If this insn has both backward non-agu dependence and forward
18659 agu dependence, the one with short distance takes effect. */
18660 return dist_define >= dist_use;
18663 /* Return true if it is legal to clobber flags by INSN and
18664 false otherwise. */
18666 static bool
18667 ix86_ok_to_clobber_flags (rtx_insn *insn)
18669 basic_block bb = BLOCK_FOR_INSN (insn);
18670 df_ref use;
18671 bitmap live;
18673 while (insn)
18675 if (NONDEBUG_INSN_P (insn))
18677 FOR_EACH_INSN_USE (use, insn)
18678 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18679 return false;
18681 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18682 return true;
18685 if (insn == BB_END (bb))
18686 break;
18688 insn = NEXT_INSN (insn);
18691 live = df_get_live_out(bb);
18692 return !REGNO_REG_SET_P (live, FLAGS_REG);
18695 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18696 move and add to avoid AGU stalls. */
18698 bool
18699 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18701 unsigned int regno0, regno1, regno2;
18703 /* Check if we need to optimize. */
18704 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18705 return false;
18707 /* Check it is correct to split here. */
18708 if (!ix86_ok_to_clobber_flags(insn))
18709 return false;
18711 regno0 = true_regnum (operands[0]);
18712 regno1 = true_regnum (operands[1]);
18713 regno2 = true_regnum (operands[2]);
18715 /* We need to split only adds with non destructive
18716 destination operand. */
18717 if (regno0 == regno1 || regno0 == regno2)
18718 return false;
18719 else
18720 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18723 /* Return true if we should emit lea instruction instead of mov
18724 instruction. */
18726 bool
18727 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18729 unsigned int regno0, regno1;
18731 /* Check if we need to optimize. */
18732 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18733 return false;
18735 /* Use lea for reg to reg moves only. */
18736 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18737 return false;
18739 regno0 = true_regnum (operands[0]);
18740 regno1 = true_regnum (operands[1]);
18742 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18745 /* Return true if we need to split lea into a sequence of
18746 instructions to avoid AGU stalls. */
18748 bool
18749 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18751 unsigned int regno0, regno1, regno2;
18752 int split_cost;
18753 struct ix86_address parts;
18754 int ok;
18756 /* Check we need to optimize. */
18757 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18758 return false;
18760 /* The "at least two components" test below might not catch simple
18761 move or zero extension insns if parts.base is non-NULL and parts.disp
18762 is const0_rtx as the only components in the address, e.g. if the
18763 register is %rbp or %r13. As this test is much cheaper and moves or
18764 zero extensions are the common case, do this check first. */
18765 if (REG_P (operands[1])
18766 || (SImode_address_operand (operands[1], VOIDmode)
18767 && REG_P (XEXP (operands[1], 0))))
18768 return false;
18770 /* Check if it is OK to split here. */
18771 if (!ix86_ok_to_clobber_flags (insn))
18772 return false;
18774 ok = ix86_decompose_address (operands[1], &parts);
18775 gcc_assert (ok);
18777 /* There should be at least two components in the address. */
18778 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18779 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18780 return false;
18782 /* We should not split into add if non legitimate pic
18783 operand is used as displacement. */
18784 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18785 return false;
18787 regno0 = true_regnum (operands[0]) ;
18788 regno1 = INVALID_REGNUM;
18789 regno2 = INVALID_REGNUM;
18791 if (parts.base)
18792 regno1 = true_regnum (parts.base);
18793 if (parts.index)
18794 regno2 = true_regnum (parts.index);
18796 split_cost = 0;
18798 /* Compute how many cycles we will add to execution time
18799 if split lea into a sequence of instructions. */
18800 if (parts.base || parts.index)
18802 /* Have to use mov instruction if non desctructive
18803 destination form is used. */
18804 if (regno1 != regno0 && regno2 != regno0)
18805 split_cost += 1;
18807 /* Have to add index to base if both exist. */
18808 if (parts.base && parts.index)
18809 split_cost += 1;
18811 /* Have to use shift and adds if scale is 2 or greater. */
18812 if (parts.scale > 1)
18814 if (regno0 != regno1)
18815 split_cost += 1;
18816 else if (regno2 == regno0)
18817 split_cost += 4;
18818 else
18819 split_cost += parts.scale;
18822 /* Have to use add instruction with immediate if
18823 disp is non zero. */
18824 if (parts.disp && parts.disp != const0_rtx)
18825 split_cost += 1;
18827 /* Subtract the price of lea. */
18828 split_cost -= 1;
18831 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18832 parts.scale > 1);
18835 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18836 matches destination. RTX includes clobber of FLAGS_REG. */
18838 static void
18839 ix86_emit_binop (enum rtx_code code, machine_mode mode,
18840 rtx dst, rtx src)
18842 rtx op, clob;
18844 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src));
18845 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18847 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18850 /* Return true if regno1 def is nearest to the insn. */
18852 static bool
18853 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18855 rtx_insn *prev = insn;
18856 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18858 if (insn == start)
18859 return false;
18860 while (prev && prev != start)
18862 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18864 prev = PREV_INSN (prev);
18865 continue;
18867 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18868 return true;
18869 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18870 return false;
18871 prev = PREV_INSN (prev);
18874 /* None of the regs is defined in the bb. */
18875 return false;
18878 /* Split lea instructions into a sequence of instructions
18879 which are executed on ALU to avoid AGU stalls.
18880 It is assumed that it is allowed to clobber flags register
18881 at lea position. */
18883 void
18884 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
18886 unsigned int regno0, regno1, regno2;
18887 struct ix86_address parts;
18888 rtx target, tmp;
18889 int ok, adds;
18891 ok = ix86_decompose_address (operands[1], &parts);
18892 gcc_assert (ok);
18894 target = gen_lowpart (mode, operands[0]);
18896 regno0 = true_regnum (target);
18897 regno1 = INVALID_REGNUM;
18898 regno2 = INVALID_REGNUM;
18900 if (parts.base)
18902 parts.base = gen_lowpart (mode, parts.base);
18903 regno1 = true_regnum (parts.base);
18906 if (parts.index)
18908 parts.index = gen_lowpart (mode, parts.index);
18909 regno2 = true_regnum (parts.index);
18912 if (parts.disp)
18913 parts.disp = gen_lowpart (mode, parts.disp);
18915 if (parts.scale > 1)
18917 /* Case r1 = r1 + ... */
18918 if (regno1 == regno0)
18920 /* If we have a case r1 = r1 + C * r2 then we
18921 should use multiplication which is very
18922 expensive. Assume cost model is wrong if we
18923 have such case here. */
18924 gcc_assert (regno2 != regno0);
18926 for (adds = parts.scale; adds > 0; adds--)
18927 ix86_emit_binop (PLUS, mode, target, parts.index);
18929 else
18931 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18932 if (regno0 != regno2)
18933 emit_insn (gen_rtx_SET (target, parts.index));
18935 /* Use shift for scaling. */
18936 ix86_emit_binop (ASHIFT, mode, target,
18937 GEN_INT (exact_log2 (parts.scale)));
18939 if (parts.base)
18940 ix86_emit_binop (PLUS, mode, target, parts.base);
18942 if (parts.disp && parts.disp != const0_rtx)
18943 ix86_emit_binop (PLUS, mode, target, parts.disp);
18946 else if (!parts.base && !parts.index)
18948 gcc_assert(parts.disp);
18949 emit_insn (gen_rtx_SET (target, parts.disp));
18951 else
18953 if (!parts.base)
18955 if (regno0 != regno2)
18956 emit_insn (gen_rtx_SET (target, parts.index));
18958 else if (!parts.index)
18960 if (regno0 != regno1)
18961 emit_insn (gen_rtx_SET (target, parts.base));
18963 else
18965 if (regno0 == regno1)
18966 tmp = parts.index;
18967 else if (regno0 == regno2)
18968 tmp = parts.base;
18969 else
18971 rtx tmp1;
18973 /* Find better operand for SET instruction, depending
18974 on which definition is farther from the insn. */
18975 if (find_nearest_reg_def (insn, regno1, regno2))
18976 tmp = parts.index, tmp1 = parts.base;
18977 else
18978 tmp = parts.base, tmp1 = parts.index;
18980 emit_insn (gen_rtx_SET (target, tmp));
18982 if (parts.disp && parts.disp != const0_rtx)
18983 ix86_emit_binop (PLUS, mode, target, parts.disp);
18985 ix86_emit_binop (PLUS, mode, target, tmp1);
18986 return;
18989 ix86_emit_binop (PLUS, mode, target, tmp);
18992 if (parts.disp && parts.disp != const0_rtx)
18993 ix86_emit_binop (PLUS, mode, target, parts.disp);
18997 /* Return true if it is ok to optimize an ADD operation to LEA
18998 operation to avoid flag register consumation. For most processors,
18999 ADD is faster than LEA. For the processors like BONNELL, if the
19000 destination register of LEA holds an actual address which will be
19001 used soon, LEA is better and otherwise ADD is better. */
19003 bool
19004 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
19006 unsigned int regno0 = true_regnum (operands[0]);
19007 unsigned int regno1 = true_regnum (operands[1]);
19008 unsigned int regno2 = true_regnum (operands[2]);
19010 /* If a = b + c, (a!=b && a!=c), must use lea form. */
19011 if (regno0 != regno1 && regno0 != regno2)
19012 return true;
19014 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
19015 return false;
19017 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
19020 /* Return true if destination reg of SET_BODY is shift count of
19021 USE_BODY. */
19023 static bool
19024 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
19026 rtx set_dest;
19027 rtx shift_rtx;
19028 int i;
19030 /* Retrieve destination of SET_BODY. */
19031 switch (GET_CODE (set_body))
19033 case SET:
19034 set_dest = SET_DEST (set_body);
19035 if (!set_dest || !REG_P (set_dest))
19036 return false;
19037 break;
19038 case PARALLEL:
19039 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
19040 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
19041 use_body))
19042 return true;
19043 default:
19044 return false;
19045 break;
19048 /* Retrieve shift count of USE_BODY. */
19049 switch (GET_CODE (use_body))
19051 case SET:
19052 shift_rtx = XEXP (use_body, 1);
19053 break;
19054 case PARALLEL:
19055 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
19056 if (ix86_dep_by_shift_count_body (set_body,
19057 XVECEXP (use_body, 0, i)))
19058 return true;
19059 default:
19060 return false;
19061 break;
19064 if (shift_rtx
19065 && (GET_CODE (shift_rtx) == ASHIFT
19066 || GET_CODE (shift_rtx) == LSHIFTRT
19067 || GET_CODE (shift_rtx) == ASHIFTRT
19068 || GET_CODE (shift_rtx) == ROTATE
19069 || GET_CODE (shift_rtx) == ROTATERT))
19071 rtx shift_count = XEXP (shift_rtx, 1);
19073 /* Return true if shift count is dest of SET_BODY. */
19074 if (REG_P (shift_count))
19076 /* Add check since it can be invoked before register
19077 allocation in pre-reload schedule. */
19078 if (reload_completed
19079 && true_regnum (set_dest) == true_regnum (shift_count))
19080 return true;
19081 else if (REGNO(set_dest) == REGNO(shift_count))
19082 return true;
19086 return false;
19089 /* Return true if destination reg of SET_INSN is shift count of
19090 USE_INSN. */
19092 bool
19093 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
19095 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
19096 PATTERN (use_insn));
19099 /* Return TRUE or FALSE depending on whether the unary operator meets the
19100 appropriate constraints. */
19102 bool
19103 ix86_unary_operator_ok (enum rtx_code,
19104 machine_mode,
19105 rtx operands[2])
19107 /* If one of operands is memory, source and destination must match. */
19108 if ((MEM_P (operands[0])
19109 || MEM_P (operands[1]))
19110 && ! rtx_equal_p (operands[0], operands[1]))
19111 return false;
19112 return true;
19115 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
19116 are ok, keeping in mind the possible movddup alternative. */
19118 bool
19119 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
19121 if (MEM_P (operands[0]))
19122 return rtx_equal_p (operands[0], operands[1 + high]);
19123 if (MEM_P (operands[1]) && MEM_P (operands[2]))
19124 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
19125 return true;
19128 /* Post-reload splitter for converting an SF or DFmode value in an
19129 SSE register into an unsigned SImode. */
19131 void
19132 ix86_split_convert_uns_si_sse (rtx operands[])
19134 machine_mode vecmode;
19135 rtx value, large, zero_or_two31, input, two31, x;
19137 large = operands[1];
19138 zero_or_two31 = operands[2];
19139 input = operands[3];
19140 two31 = operands[4];
19141 vecmode = GET_MODE (large);
19142 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
19144 /* Load up the value into the low element. We must ensure that the other
19145 elements are valid floats -- zero is the easiest such value. */
19146 if (MEM_P (input))
19148 if (vecmode == V4SFmode)
19149 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
19150 else
19151 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
19153 else
19155 input = gen_rtx_REG (vecmode, REGNO (input));
19156 emit_move_insn (value, CONST0_RTX (vecmode));
19157 if (vecmode == V4SFmode)
19158 emit_insn (gen_sse_movss (value, value, input));
19159 else
19160 emit_insn (gen_sse2_movsd (value, value, input));
19163 emit_move_insn (large, two31);
19164 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
19166 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
19167 emit_insn (gen_rtx_SET (large, x));
19169 x = gen_rtx_AND (vecmode, zero_or_two31, large);
19170 emit_insn (gen_rtx_SET (zero_or_two31, x));
19172 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
19173 emit_insn (gen_rtx_SET (value, x));
19175 large = gen_rtx_REG (V4SImode, REGNO (large));
19176 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
19178 x = gen_rtx_REG (V4SImode, REGNO (value));
19179 if (vecmode == V4SFmode)
19180 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
19181 else
19182 emit_insn (gen_sse2_cvttpd2dq (x, value));
19183 value = x;
19185 emit_insn (gen_xorv4si3 (value, value, large));
19188 /* Convert an unsigned DImode value into a DFmode, using only SSE.
19189 Expects the 64-bit DImode to be supplied in a pair of integral
19190 registers. Requires SSE2; will use SSE3 if available. For x86_32,
19191 -mfpmath=sse, !optimize_size only. */
19193 void
19194 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
19196 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
19197 rtx int_xmm, fp_xmm;
19198 rtx biases, exponents;
19199 rtx x;
19201 int_xmm = gen_reg_rtx (V4SImode);
19202 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
19203 emit_insn (gen_movdi_to_sse (int_xmm, input));
19204 else if (TARGET_SSE_SPLIT_REGS)
19206 emit_clobber (int_xmm);
19207 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
19209 else
19211 x = gen_reg_rtx (V2DImode);
19212 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
19213 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
19216 x = gen_rtx_CONST_VECTOR (V4SImode,
19217 gen_rtvec (4, GEN_INT (0x43300000UL),
19218 GEN_INT (0x45300000UL),
19219 const0_rtx, const0_rtx));
19220 exponents = validize_mem (force_const_mem (V4SImode, x));
19222 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
19223 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
19225 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
19226 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
19227 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
19228 (0x1.0p84 + double(fp_value_hi_xmm)).
19229 Note these exponents differ by 32. */
19231 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
19233 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
19234 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
19235 real_ldexp (&bias_lo_rvt, &dconst1, 52);
19236 real_ldexp (&bias_hi_rvt, &dconst1, 84);
19237 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
19238 x = const_double_from_real_value (bias_hi_rvt, DFmode);
19239 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
19240 biases = validize_mem (force_const_mem (V2DFmode, biases));
19241 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
19243 /* Add the upper and lower DFmode values together. */
19244 if (TARGET_SSE3)
19245 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
19246 else
19248 x = copy_to_mode_reg (V2DFmode, fp_xmm);
19249 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
19250 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
19253 ix86_expand_vector_extract (false, target, fp_xmm, 0);
19256 /* Not used, but eases macroization of patterns. */
19257 void
19258 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
19260 gcc_unreachable ();
19263 /* Convert an unsigned SImode value into a DFmode. Only currently used
19264 for SSE, but applicable anywhere. */
19266 void
19267 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
19269 REAL_VALUE_TYPE TWO31r;
19270 rtx x, fp;
19272 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
19273 NULL, 1, OPTAB_DIRECT);
19275 fp = gen_reg_rtx (DFmode);
19276 emit_insn (gen_floatsidf2 (fp, x));
19278 real_ldexp (&TWO31r, &dconst1, 31);
19279 x = const_double_from_real_value (TWO31r, DFmode);
19281 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
19282 if (x != target)
19283 emit_move_insn (target, x);
19286 /* Convert a signed DImode value into a DFmode. Only used for SSE in
19287 32-bit mode; otherwise we have a direct convert instruction. */
19289 void
19290 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
19292 REAL_VALUE_TYPE TWO32r;
19293 rtx fp_lo, fp_hi, x;
19295 fp_lo = gen_reg_rtx (DFmode);
19296 fp_hi = gen_reg_rtx (DFmode);
19298 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
19300 real_ldexp (&TWO32r, &dconst1, 32);
19301 x = const_double_from_real_value (TWO32r, DFmode);
19302 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
19304 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
19306 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
19307 0, OPTAB_DIRECT);
19308 if (x != target)
19309 emit_move_insn (target, x);
19312 /* Convert an unsigned SImode value into a SFmode, using only SSE.
19313 For x86_32, -mfpmath=sse, !optimize_size only. */
19314 void
19315 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19317 REAL_VALUE_TYPE ONE16r;
19318 rtx fp_hi, fp_lo, int_hi, int_lo, x;
19320 real_ldexp (&ONE16r, &dconst1, 16);
19321 x = const_double_from_real_value (ONE16r, SFmode);
19322 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19323 NULL, 0, OPTAB_DIRECT);
19324 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19325 NULL, 0, OPTAB_DIRECT);
19326 fp_hi = gen_reg_rtx (SFmode);
19327 fp_lo = gen_reg_rtx (SFmode);
19328 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19329 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19330 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19331 0, OPTAB_DIRECT);
19332 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19333 0, OPTAB_DIRECT);
19334 if (!rtx_equal_p (target, fp_hi))
19335 emit_move_insn (target, fp_hi);
19338 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
19339 a vector of unsigned ints VAL to vector of floats TARGET. */
19341 void
19342 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19344 rtx tmp[8];
19345 REAL_VALUE_TYPE TWO16r;
19346 machine_mode intmode = GET_MODE (val);
19347 machine_mode fltmode = GET_MODE (target);
19348 rtx (*cvt) (rtx, rtx);
19350 if (intmode == V4SImode)
19351 cvt = gen_floatv4siv4sf2;
19352 else
19353 cvt = gen_floatv8siv8sf2;
19354 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19355 tmp[0] = force_reg (intmode, tmp[0]);
19356 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19357 OPTAB_DIRECT);
19358 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19359 NULL_RTX, 1, OPTAB_DIRECT);
19360 tmp[3] = gen_reg_rtx (fltmode);
19361 emit_insn (cvt (tmp[3], tmp[1]));
19362 tmp[4] = gen_reg_rtx (fltmode);
19363 emit_insn (cvt (tmp[4], tmp[2]));
19364 real_ldexp (&TWO16r, &dconst1, 16);
19365 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19366 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19367 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19368 OPTAB_DIRECT);
19369 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19370 OPTAB_DIRECT);
19371 if (tmp[7] != target)
19372 emit_move_insn (target, tmp[7]);
19375 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19376 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19377 This is done by doing just signed conversion if < 0x1p31, and otherwise by
19378 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
19381 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19383 REAL_VALUE_TYPE TWO31r;
19384 rtx two31r, tmp[4];
19385 machine_mode mode = GET_MODE (val);
19386 machine_mode scalarmode = GET_MODE_INNER (mode);
19387 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19388 rtx (*cmp) (rtx, rtx, rtx, rtx);
19389 int i;
19391 for (i = 0; i < 3; i++)
19392 tmp[i] = gen_reg_rtx (mode);
19393 real_ldexp (&TWO31r, &dconst1, 31);
19394 two31r = const_double_from_real_value (TWO31r, scalarmode);
19395 two31r = ix86_build_const_vector (mode, 1, two31r);
19396 two31r = force_reg (mode, two31r);
19397 switch (mode)
19399 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19400 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19401 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19402 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19403 default: gcc_unreachable ();
19405 tmp[3] = gen_rtx_LE (mode, two31r, val);
19406 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19407 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19408 0, OPTAB_DIRECT);
19409 if (intmode == V4SImode || TARGET_AVX2)
19410 *xorp = expand_simple_binop (intmode, ASHIFT,
19411 gen_lowpart (intmode, tmp[0]),
19412 GEN_INT (31), NULL_RTX, 0,
19413 OPTAB_DIRECT);
19414 else
19416 rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31);
19417 two31 = ix86_build_const_vector (intmode, 1, two31);
19418 *xorp = expand_simple_binop (intmode, AND,
19419 gen_lowpart (intmode, tmp[0]),
19420 two31, NULL_RTX, 0,
19421 OPTAB_DIRECT);
19423 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19424 0, OPTAB_DIRECT);
19427 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
19428 then replicate the value for all elements of the vector
19429 register. */
19432 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
19434 int i, n_elt;
19435 rtvec v;
19436 machine_mode scalar_mode;
19438 switch (mode)
19440 case V64QImode:
19441 case V32QImode:
19442 case V16QImode:
19443 case V32HImode:
19444 case V16HImode:
19445 case V8HImode:
19446 case V16SImode:
19447 case V8SImode:
19448 case V4SImode:
19449 case V8DImode:
19450 case V4DImode:
19451 case V2DImode:
19452 gcc_assert (vect);
19453 case V16SFmode:
19454 case V8SFmode:
19455 case V4SFmode:
19456 case V8DFmode:
19457 case V4DFmode:
19458 case V2DFmode:
19459 n_elt = GET_MODE_NUNITS (mode);
19460 v = rtvec_alloc (n_elt);
19461 scalar_mode = GET_MODE_INNER (mode);
19463 RTVEC_ELT (v, 0) = value;
19465 for (i = 1; i < n_elt; ++i)
19466 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19468 return gen_rtx_CONST_VECTOR (mode, v);
19470 default:
19471 gcc_unreachable ();
19475 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19476 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19477 for an SSE register. If VECT is true, then replicate the mask for
19478 all elements of the vector register. If INVERT is true, then create
19479 a mask excluding the sign bit. */
19482 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
19484 machine_mode vec_mode, imode;
19485 wide_int w;
19486 rtx mask, v;
19488 switch (mode)
19490 case V16SImode:
19491 case V16SFmode:
19492 case V8SImode:
19493 case V4SImode:
19494 case V8SFmode:
19495 case V4SFmode:
19496 vec_mode = mode;
19497 mode = GET_MODE_INNER (mode);
19498 imode = SImode;
19499 break;
19501 case V8DImode:
19502 case V4DImode:
19503 case V2DImode:
19504 case V8DFmode:
19505 case V4DFmode:
19506 case V2DFmode:
19507 vec_mode = mode;
19508 mode = GET_MODE_INNER (mode);
19509 imode = DImode;
19510 break;
19512 case TImode:
19513 case TFmode:
19514 vec_mode = VOIDmode;
19515 imode = TImode;
19516 break;
19518 default:
19519 gcc_unreachable ();
19522 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (mode) - 1,
19523 GET_MODE_BITSIZE (mode));
19524 if (invert)
19525 w = wi::bit_not (w);
19527 /* Force this value into the low part of a fp vector constant. */
19528 mask = immed_wide_int_const (w, imode);
19529 mask = gen_lowpart (mode, mask);
19531 if (vec_mode == VOIDmode)
19532 return force_reg (mode, mask);
19534 v = ix86_build_const_vector (vec_mode, vect, mask);
19535 return force_reg (vec_mode, v);
19538 /* Generate code for floating point ABS or NEG. */
19540 void
19541 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
19542 rtx operands[])
19544 rtx mask, set, dst, src;
19545 bool use_sse = false;
19546 bool vector_mode = VECTOR_MODE_P (mode);
19547 machine_mode vmode = mode;
19549 if (vector_mode)
19550 use_sse = true;
19551 else if (mode == TFmode)
19552 use_sse = true;
19553 else if (TARGET_SSE_MATH)
19555 use_sse = SSE_FLOAT_MODE_P (mode);
19556 if (mode == SFmode)
19557 vmode = V4SFmode;
19558 else if (mode == DFmode)
19559 vmode = V2DFmode;
19562 /* NEG and ABS performed with SSE use bitwise mask operations.
19563 Create the appropriate mask now. */
19564 if (use_sse)
19565 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19566 else
19567 mask = NULL_RTX;
19569 dst = operands[0];
19570 src = operands[1];
19572 set = gen_rtx_fmt_e (code, mode, src);
19573 set = gen_rtx_SET (dst, set);
19575 if (mask)
19577 rtx use, clob;
19578 rtvec par;
19580 use = gen_rtx_USE (VOIDmode, mask);
19581 if (vector_mode)
19582 par = gen_rtvec (2, set, use);
19583 else
19585 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19586 par = gen_rtvec (3, set, use, clob);
19588 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19590 else
19591 emit_insn (set);
19594 /* Expand a copysign operation. Special case operand 0 being a constant. */
19596 void
19597 ix86_expand_copysign (rtx operands[])
19599 machine_mode mode, vmode;
19600 rtx dest, op0, op1, mask, nmask;
19602 dest = operands[0];
19603 op0 = operands[1];
19604 op1 = operands[2];
19606 mode = GET_MODE (dest);
19608 if (mode == SFmode)
19609 vmode = V4SFmode;
19610 else if (mode == DFmode)
19611 vmode = V2DFmode;
19612 else
19613 vmode = mode;
19615 if (CONST_DOUBLE_P (op0))
19617 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19619 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19620 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19622 if (mode == SFmode || mode == DFmode)
19624 if (op0 == CONST0_RTX (mode))
19625 op0 = CONST0_RTX (vmode);
19626 else
19628 rtx v = ix86_build_const_vector (vmode, false, op0);
19630 op0 = force_reg (vmode, v);
19633 else if (op0 != CONST0_RTX (mode))
19634 op0 = force_reg (mode, op0);
19636 mask = ix86_build_signbit_mask (vmode, 0, 0);
19638 if (mode == SFmode)
19639 copysign_insn = gen_copysignsf3_const;
19640 else if (mode == DFmode)
19641 copysign_insn = gen_copysigndf3_const;
19642 else
19643 copysign_insn = gen_copysigntf3_const;
19645 emit_insn (copysign_insn (dest, op0, op1, mask));
19647 else
19649 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19651 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19652 mask = ix86_build_signbit_mask (vmode, 0, 0);
19654 if (mode == SFmode)
19655 copysign_insn = gen_copysignsf3_var;
19656 else if (mode == DFmode)
19657 copysign_insn = gen_copysigndf3_var;
19658 else
19659 copysign_insn = gen_copysigntf3_var;
19661 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19665 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19666 be a constant, and so has already been expanded into a vector constant. */
19668 void
19669 ix86_split_copysign_const (rtx operands[])
19671 machine_mode mode, vmode;
19672 rtx dest, op0, mask, x;
19674 dest = operands[0];
19675 op0 = operands[1];
19676 mask = operands[3];
19678 mode = GET_MODE (dest);
19679 vmode = GET_MODE (mask);
19681 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19682 x = gen_rtx_AND (vmode, dest, mask);
19683 emit_insn (gen_rtx_SET (dest, x));
19685 if (op0 != CONST0_RTX (vmode))
19687 x = gen_rtx_IOR (vmode, dest, op0);
19688 emit_insn (gen_rtx_SET (dest, x));
19692 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19693 so we have to do two masks. */
19695 void
19696 ix86_split_copysign_var (rtx operands[])
19698 machine_mode mode, vmode;
19699 rtx dest, scratch, op0, op1, mask, nmask, x;
19701 dest = operands[0];
19702 scratch = operands[1];
19703 op0 = operands[2];
19704 op1 = operands[3];
19705 nmask = operands[4];
19706 mask = operands[5];
19708 mode = GET_MODE (dest);
19709 vmode = GET_MODE (mask);
19711 if (rtx_equal_p (op0, op1))
19713 /* Shouldn't happen often (it's useless, obviously), but when it does
19714 we'd generate incorrect code if we continue below. */
19715 emit_move_insn (dest, op0);
19716 return;
19719 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19721 gcc_assert (REGNO (op1) == REGNO (scratch));
19723 x = gen_rtx_AND (vmode, scratch, mask);
19724 emit_insn (gen_rtx_SET (scratch, x));
19726 dest = mask;
19727 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19728 x = gen_rtx_NOT (vmode, dest);
19729 x = gen_rtx_AND (vmode, x, op0);
19730 emit_insn (gen_rtx_SET (dest, x));
19732 else
19734 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19736 x = gen_rtx_AND (vmode, scratch, mask);
19738 else /* alternative 2,4 */
19740 gcc_assert (REGNO (mask) == REGNO (scratch));
19741 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19742 x = gen_rtx_AND (vmode, scratch, op1);
19744 emit_insn (gen_rtx_SET (scratch, x));
19746 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19748 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19749 x = gen_rtx_AND (vmode, dest, nmask);
19751 else /* alternative 3,4 */
19753 gcc_assert (REGNO (nmask) == REGNO (dest));
19754 dest = nmask;
19755 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19756 x = gen_rtx_AND (vmode, dest, op0);
19758 emit_insn (gen_rtx_SET (dest, x));
19761 x = gen_rtx_IOR (vmode, dest, scratch);
19762 emit_insn (gen_rtx_SET (dest, x));
19765 /* Return TRUE or FALSE depending on whether the first SET in INSN
19766 has source and destination with matching CC modes, and that the
19767 CC mode is at least as constrained as REQ_MODE. */
19769 bool
19770 ix86_match_ccmode (rtx insn, machine_mode req_mode)
19772 rtx set;
19773 machine_mode set_mode;
19775 set = PATTERN (insn);
19776 if (GET_CODE (set) == PARALLEL)
19777 set = XVECEXP (set, 0, 0);
19778 gcc_assert (GET_CODE (set) == SET);
19779 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19781 set_mode = GET_MODE (SET_DEST (set));
19782 switch (set_mode)
19784 case CCNOmode:
19785 if (req_mode != CCNOmode
19786 && (req_mode != CCmode
19787 || XEXP (SET_SRC (set), 1) != const0_rtx))
19788 return false;
19789 break;
19790 case CCmode:
19791 if (req_mode == CCGCmode)
19792 return false;
19793 /* FALLTHRU */
19794 case CCGCmode:
19795 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19796 return false;
19797 /* FALLTHRU */
19798 case CCGOCmode:
19799 if (req_mode == CCZmode)
19800 return false;
19801 /* FALLTHRU */
19802 case CCZmode:
19803 break;
19805 case CCAmode:
19806 case CCCmode:
19807 case CCOmode:
19808 case CCPmode:
19809 case CCSmode:
19810 if (set_mode != req_mode)
19811 return false;
19812 break;
19814 default:
19815 gcc_unreachable ();
19818 return GET_MODE (SET_SRC (set)) == set_mode;
19821 /* Generate insn patterns to do an integer compare of OPERANDS. */
19823 static rtx
19824 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19826 machine_mode cmpmode;
19827 rtx tmp, flags;
19829 cmpmode = SELECT_CC_MODE (code, op0, op1);
19830 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19832 /* This is very simple, but making the interface the same as in the
19833 FP case makes the rest of the code easier. */
19834 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19835 emit_insn (gen_rtx_SET (flags, tmp));
19837 /* Return the test that should be put into the flags user, i.e.
19838 the bcc, scc, or cmov instruction. */
19839 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19842 /* Figure out whether to use ordered or unordered fp comparisons.
19843 Return the appropriate mode to use. */
19845 machine_mode
19846 ix86_fp_compare_mode (enum rtx_code)
19848 /* ??? In order to make all comparisons reversible, we do all comparisons
19849 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19850 all forms trapping and nontrapping comparisons, we can make inequality
19851 comparisons trapping again, since it results in better code when using
19852 FCOM based compares. */
19853 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19856 machine_mode
19857 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19859 machine_mode mode = GET_MODE (op0);
19861 if (SCALAR_FLOAT_MODE_P (mode))
19863 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19864 return ix86_fp_compare_mode (code);
19867 switch (code)
19869 /* Only zero flag is needed. */
19870 case EQ: /* ZF=0 */
19871 case NE: /* ZF!=0 */
19872 return CCZmode;
19873 /* Codes needing carry flag. */
19874 case GEU: /* CF=0 */
19875 case LTU: /* CF=1 */
19876 /* Detect overflow checks. They need just the carry flag. */
19877 if (GET_CODE (op0) == PLUS
19878 && rtx_equal_p (op1, XEXP (op0, 0)))
19879 return CCCmode;
19880 else
19881 return CCmode;
19882 case GTU: /* CF=0 & ZF=0 */
19883 case LEU: /* CF=1 | ZF=1 */
19884 return CCmode;
19885 /* Codes possibly doable only with sign flag when
19886 comparing against zero. */
19887 case GE: /* SF=OF or SF=0 */
19888 case LT: /* SF<>OF or SF=1 */
19889 if (op1 == const0_rtx)
19890 return CCGOCmode;
19891 else
19892 /* For other cases Carry flag is not required. */
19893 return CCGCmode;
19894 /* Codes doable only with sign flag when comparing
19895 against zero, but we miss jump instruction for it
19896 so we need to use relational tests against overflow
19897 that thus needs to be zero. */
19898 case GT: /* ZF=0 & SF=OF */
19899 case LE: /* ZF=1 | SF<>OF */
19900 if (op1 == const0_rtx)
19901 return CCNOmode;
19902 else
19903 return CCGCmode;
19904 /* strcmp pattern do (use flags) and combine may ask us for proper
19905 mode. */
19906 case USE:
19907 return CCmode;
19908 default:
19909 gcc_unreachable ();
19913 /* Return the fixed registers used for condition codes. */
19915 static bool
19916 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19918 *p1 = FLAGS_REG;
19919 *p2 = FPSR_REG;
19920 return true;
19923 /* If two condition code modes are compatible, return a condition code
19924 mode which is compatible with both. Otherwise, return
19925 VOIDmode. */
19927 static machine_mode
19928 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
19930 if (m1 == m2)
19931 return m1;
19933 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19934 return VOIDmode;
19936 if ((m1 == CCGCmode && m2 == CCGOCmode)
19937 || (m1 == CCGOCmode && m2 == CCGCmode))
19938 return CCGCmode;
19940 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19941 return m2;
19942 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19943 return m1;
19945 switch (m1)
19947 default:
19948 gcc_unreachable ();
19950 case CCmode:
19951 case CCGCmode:
19952 case CCGOCmode:
19953 case CCNOmode:
19954 case CCAmode:
19955 case CCCmode:
19956 case CCOmode:
19957 case CCPmode:
19958 case CCSmode:
19959 case CCZmode:
19960 switch (m2)
19962 default:
19963 return VOIDmode;
19965 case CCmode:
19966 case CCGCmode:
19967 case CCGOCmode:
19968 case CCNOmode:
19969 case CCAmode:
19970 case CCCmode:
19971 case CCOmode:
19972 case CCPmode:
19973 case CCSmode:
19974 case CCZmode:
19975 return CCmode;
19978 case CCFPmode:
19979 case CCFPUmode:
19980 /* These are only compatible with themselves, which we already
19981 checked above. */
19982 return VOIDmode;
19987 /* Return a comparison we can do and that it is equivalent to
19988 swap_condition (code) apart possibly from orderedness.
19989 But, never change orderedness if TARGET_IEEE_FP, returning
19990 UNKNOWN in that case if necessary. */
19992 static enum rtx_code
19993 ix86_fp_swap_condition (enum rtx_code code)
19995 switch (code)
19997 case GT: /* GTU - CF=0 & ZF=0 */
19998 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19999 case GE: /* GEU - CF=0 */
20000 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
20001 case UNLT: /* LTU - CF=1 */
20002 return TARGET_IEEE_FP ? UNKNOWN : GT;
20003 case UNLE: /* LEU - CF=1 | ZF=1 */
20004 return TARGET_IEEE_FP ? UNKNOWN : GE;
20005 default:
20006 return swap_condition (code);
20010 /* Return cost of comparison CODE using the best strategy for performance.
20011 All following functions do use number of instructions as a cost metrics.
20012 In future this should be tweaked to compute bytes for optimize_size and
20013 take into account performance of various instructions on various CPUs. */
20015 static int
20016 ix86_fp_comparison_cost (enum rtx_code code)
20018 int arith_cost;
20020 /* The cost of code using bit-twiddling on %ah. */
20021 switch (code)
20023 case UNLE:
20024 case UNLT:
20025 case LTGT:
20026 case GT:
20027 case GE:
20028 case UNORDERED:
20029 case ORDERED:
20030 case UNEQ:
20031 arith_cost = 4;
20032 break;
20033 case LT:
20034 case NE:
20035 case EQ:
20036 case UNGE:
20037 arith_cost = TARGET_IEEE_FP ? 5 : 4;
20038 break;
20039 case LE:
20040 case UNGT:
20041 arith_cost = TARGET_IEEE_FP ? 6 : 4;
20042 break;
20043 default:
20044 gcc_unreachable ();
20047 switch (ix86_fp_comparison_strategy (code))
20049 case IX86_FPCMP_COMI:
20050 return arith_cost > 4 ? 3 : 2;
20051 case IX86_FPCMP_SAHF:
20052 return arith_cost > 4 ? 4 : 3;
20053 default:
20054 return arith_cost;
20058 /* Return strategy to use for floating-point. We assume that fcomi is always
20059 preferrable where available, since that is also true when looking at size
20060 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
20062 enum ix86_fpcmp_strategy
20063 ix86_fp_comparison_strategy (enum rtx_code)
20065 /* Do fcomi/sahf based test when profitable. */
20067 if (TARGET_CMOVE)
20068 return IX86_FPCMP_COMI;
20070 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
20071 return IX86_FPCMP_SAHF;
20073 return IX86_FPCMP_ARITH;
20076 /* Swap, force into registers, or otherwise massage the two operands
20077 to a fp comparison. The operands are updated in place; the new
20078 comparison code is returned. */
20080 static enum rtx_code
20081 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
20083 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
20084 rtx op0 = *pop0, op1 = *pop1;
20085 machine_mode op_mode = GET_MODE (op0);
20086 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
20088 /* All of the unordered compare instructions only work on registers.
20089 The same is true of the fcomi compare instructions. The XFmode
20090 compare instructions require registers except when comparing
20091 against zero or when converting operand 1 from fixed point to
20092 floating point. */
20094 if (!is_sse
20095 && (fpcmp_mode == CCFPUmode
20096 || (op_mode == XFmode
20097 && ! (standard_80387_constant_p (op0) == 1
20098 || standard_80387_constant_p (op1) == 1)
20099 && GET_CODE (op1) != FLOAT)
20100 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
20102 op0 = force_reg (op_mode, op0);
20103 op1 = force_reg (op_mode, op1);
20105 else
20107 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
20108 things around if they appear profitable, otherwise force op0
20109 into a register. */
20111 if (standard_80387_constant_p (op0) == 0
20112 || (MEM_P (op0)
20113 && ! (standard_80387_constant_p (op1) == 0
20114 || MEM_P (op1))))
20116 enum rtx_code new_code = ix86_fp_swap_condition (code);
20117 if (new_code != UNKNOWN)
20119 std::swap (op0, op1);
20120 code = new_code;
20124 if (!REG_P (op0))
20125 op0 = force_reg (op_mode, op0);
20127 if (CONSTANT_P (op1))
20129 int tmp = standard_80387_constant_p (op1);
20130 if (tmp == 0)
20131 op1 = validize_mem (force_const_mem (op_mode, op1));
20132 else if (tmp == 1)
20134 if (TARGET_CMOVE)
20135 op1 = force_reg (op_mode, op1);
20137 else
20138 op1 = force_reg (op_mode, op1);
20142 /* Try to rearrange the comparison to make it cheaper. */
20143 if (ix86_fp_comparison_cost (code)
20144 > ix86_fp_comparison_cost (swap_condition (code))
20145 && (REG_P (op1) || can_create_pseudo_p ()))
20147 std::swap (op0, op1);
20148 code = swap_condition (code);
20149 if (!REG_P (op0))
20150 op0 = force_reg (op_mode, op0);
20153 *pop0 = op0;
20154 *pop1 = op1;
20155 return code;
20158 /* Convert comparison codes we use to represent FP comparison to integer
20159 code that will result in proper branch. Return UNKNOWN if no such code
20160 is available. */
20162 enum rtx_code
20163 ix86_fp_compare_code_to_integer (enum rtx_code code)
20165 switch (code)
20167 case GT:
20168 return GTU;
20169 case GE:
20170 return GEU;
20171 case ORDERED:
20172 case UNORDERED:
20173 return code;
20174 break;
20175 case UNEQ:
20176 return EQ;
20177 break;
20178 case UNLT:
20179 return LTU;
20180 break;
20181 case UNLE:
20182 return LEU;
20183 break;
20184 case LTGT:
20185 return NE;
20186 break;
20187 default:
20188 return UNKNOWN;
20192 /* Generate insn patterns to do a floating point compare of OPERANDS. */
20194 static rtx
20195 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
20197 machine_mode fpcmp_mode, intcmp_mode;
20198 rtx tmp, tmp2;
20200 fpcmp_mode = ix86_fp_compare_mode (code);
20201 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
20203 /* Do fcomi/sahf based test when profitable. */
20204 switch (ix86_fp_comparison_strategy (code))
20206 case IX86_FPCMP_COMI:
20207 intcmp_mode = fpcmp_mode;
20208 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20209 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
20210 emit_insn (tmp);
20211 break;
20213 case IX86_FPCMP_SAHF:
20214 intcmp_mode = fpcmp_mode;
20215 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20216 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
20218 if (!scratch)
20219 scratch = gen_reg_rtx (HImode);
20220 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
20221 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
20222 break;
20224 case IX86_FPCMP_ARITH:
20225 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
20226 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20227 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
20228 if (!scratch)
20229 scratch = gen_reg_rtx (HImode);
20230 emit_insn (gen_rtx_SET (scratch, tmp2));
20232 /* In the unordered case, we have to check C2 for NaN's, which
20233 doesn't happen to work out to anything nice combination-wise.
20234 So do some bit twiddling on the value we've got in AH to come
20235 up with an appropriate set of condition codes. */
20237 intcmp_mode = CCNOmode;
20238 switch (code)
20240 case GT:
20241 case UNGT:
20242 if (code == GT || !TARGET_IEEE_FP)
20244 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20245 code = EQ;
20247 else
20249 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20250 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20251 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
20252 intcmp_mode = CCmode;
20253 code = GEU;
20255 break;
20256 case LT:
20257 case UNLT:
20258 if (code == LT && TARGET_IEEE_FP)
20260 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20261 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
20262 intcmp_mode = CCmode;
20263 code = EQ;
20265 else
20267 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
20268 code = NE;
20270 break;
20271 case GE:
20272 case UNGE:
20273 if (code == GE || !TARGET_IEEE_FP)
20275 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
20276 code = EQ;
20278 else
20280 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20281 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20282 code = NE;
20284 break;
20285 case LE:
20286 case UNLE:
20287 if (code == LE && TARGET_IEEE_FP)
20289 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20290 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20291 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20292 intcmp_mode = CCmode;
20293 code = LTU;
20295 else
20297 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20298 code = NE;
20300 break;
20301 case EQ:
20302 case UNEQ:
20303 if (code == EQ && TARGET_IEEE_FP)
20305 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20306 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20307 intcmp_mode = CCmode;
20308 code = EQ;
20310 else
20312 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20313 code = NE;
20315 break;
20316 case NE:
20317 case LTGT:
20318 if (code == NE && TARGET_IEEE_FP)
20320 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20321 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20322 GEN_INT (0x40)));
20323 code = NE;
20325 else
20327 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20328 code = EQ;
20330 break;
20332 case UNORDERED:
20333 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20334 code = NE;
20335 break;
20336 case ORDERED:
20337 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20338 code = EQ;
20339 break;
20341 default:
20342 gcc_unreachable ();
20344 break;
20346 default:
20347 gcc_unreachable();
20350 /* Return the test that should be put into the flags user, i.e.
20351 the bcc, scc, or cmov instruction. */
20352 return gen_rtx_fmt_ee (code, VOIDmode,
20353 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20354 const0_rtx);
20357 static rtx
20358 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20360 rtx ret;
20362 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20363 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20365 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20367 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20368 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20370 else
20371 ret = ix86_expand_int_compare (code, op0, op1);
20373 return ret;
20376 void
20377 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20379 machine_mode mode = GET_MODE (op0);
20380 rtx tmp;
20382 switch (mode)
20384 case SFmode:
20385 case DFmode:
20386 case XFmode:
20387 case QImode:
20388 case HImode:
20389 case SImode:
20390 simple:
20391 tmp = ix86_expand_compare (code, op0, op1);
20392 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20393 gen_rtx_LABEL_REF (VOIDmode, label),
20394 pc_rtx);
20395 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
20396 return;
20398 case DImode:
20399 if (TARGET_64BIT)
20400 goto simple;
20401 case TImode:
20402 /* Expand DImode branch into multiple compare+branch. */
20404 rtx lo[2], hi[2];
20405 rtx_code_label *label2;
20406 enum rtx_code code1, code2, code3;
20407 machine_mode submode;
20409 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20411 std::swap (op0, op1);
20412 code = swap_condition (code);
20415 split_double_mode (mode, &op0, 1, lo+0, hi+0);
20416 split_double_mode (mode, &op1, 1, lo+1, hi+1);
20418 submode = mode == DImode ? SImode : DImode;
20420 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20421 avoid two branches. This costs one extra insn, so disable when
20422 optimizing for size. */
20424 if ((code == EQ || code == NE)
20425 && (!optimize_insn_for_size_p ()
20426 || hi[1] == const0_rtx || lo[1] == const0_rtx))
20428 rtx xor0, xor1;
20430 xor1 = hi[0];
20431 if (hi[1] != const0_rtx)
20432 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20433 NULL_RTX, 0, OPTAB_WIDEN);
20435 xor0 = lo[0];
20436 if (lo[1] != const0_rtx)
20437 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20438 NULL_RTX, 0, OPTAB_WIDEN);
20440 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20441 NULL_RTX, 0, OPTAB_WIDEN);
20443 ix86_expand_branch (code, tmp, const0_rtx, label);
20444 return;
20447 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20448 op1 is a constant and the low word is zero, then we can just
20449 examine the high word. Similarly for low word -1 and
20450 less-or-equal-than or greater-than. */
20452 if (CONST_INT_P (hi[1]))
20453 switch (code)
20455 case LT: case LTU: case GE: case GEU:
20456 if (lo[1] == const0_rtx)
20458 ix86_expand_branch (code, hi[0], hi[1], label);
20459 return;
20461 break;
20462 case LE: case LEU: case GT: case GTU:
20463 if (lo[1] == constm1_rtx)
20465 ix86_expand_branch (code, hi[0], hi[1], label);
20466 return;
20468 break;
20469 default:
20470 break;
20473 /* Otherwise, we need two or three jumps. */
20475 label2 = gen_label_rtx ();
20477 code1 = code;
20478 code2 = swap_condition (code);
20479 code3 = unsigned_condition (code);
20481 switch (code)
20483 case LT: case GT: case LTU: case GTU:
20484 break;
20486 case LE: code1 = LT; code2 = GT; break;
20487 case GE: code1 = GT; code2 = LT; break;
20488 case LEU: code1 = LTU; code2 = GTU; break;
20489 case GEU: code1 = GTU; code2 = LTU; break;
20491 case EQ: code1 = UNKNOWN; code2 = NE; break;
20492 case NE: code2 = UNKNOWN; break;
20494 default:
20495 gcc_unreachable ();
20499 * a < b =>
20500 * if (hi(a) < hi(b)) goto true;
20501 * if (hi(a) > hi(b)) goto false;
20502 * if (lo(a) < lo(b)) goto true;
20503 * false:
20506 if (code1 != UNKNOWN)
20507 ix86_expand_branch (code1, hi[0], hi[1], label);
20508 if (code2 != UNKNOWN)
20509 ix86_expand_branch (code2, hi[0], hi[1], label2);
20511 ix86_expand_branch (code3, lo[0], lo[1], label);
20513 if (code2 != UNKNOWN)
20514 emit_label (label2);
20515 return;
20518 default:
20519 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20520 goto simple;
20524 /* Split branch based on floating point condition. */
20525 void
20526 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20527 rtx target1, rtx target2, rtx tmp)
20529 rtx condition;
20530 rtx i;
20532 if (target2 != pc_rtx)
20534 std::swap (target1, target2);
20535 code = reverse_condition_maybe_unordered (code);
20538 condition = ix86_expand_fp_compare (code, op1, op2,
20539 tmp);
20541 i = emit_jump_insn (gen_rtx_SET
20542 (pc_rtx,
20543 gen_rtx_IF_THEN_ELSE (VOIDmode,
20544 condition, target1, target2)));
20545 if (split_branch_probability >= 0)
20546 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20549 void
20550 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20552 rtx ret;
20554 gcc_assert (GET_MODE (dest) == QImode);
20556 ret = ix86_expand_compare (code, op0, op1);
20557 PUT_MODE (ret, QImode);
20558 emit_insn (gen_rtx_SET (dest, ret));
20561 /* Expand comparison setting or clearing carry flag. Return true when
20562 successful and set pop for the operation. */
20563 static bool
20564 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20566 machine_mode mode =
20567 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20569 /* Do not handle double-mode compares that go through special path. */
20570 if (mode == (TARGET_64BIT ? TImode : DImode))
20571 return false;
20573 if (SCALAR_FLOAT_MODE_P (mode))
20575 rtx compare_op;
20576 rtx_insn *compare_seq;
20578 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20580 /* Shortcut: following common codes never translate
20581 into carry flag compares. */
20582 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20583 || code == ORDERED || code == UNORDERED)
20584 return false;
20586 /* These comparisons require zero flag; swap operands so they won't. */
20587 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20588 && !TARGET_IEEE_FP)
20590 std::swap (op0, op1);
20591 code = swap_condition (code);
20594 /* Try to expand the comparison and verify that we end up with
20595 carry flag based comparison. This fails to be true only when
20596 we decide to expand comparison using arithmetic that is not
20597 too common scenario. */
20598 start_sequence ();
20599 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20600 compare_seq = get_insns ();
20601 end_sequence ();
20603 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20604 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20605 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20606 else
20607 code = GET_CODE (compare_op);
20609 if (code != LTU && code != GEU)
20610 return false;
20612 emit_insn (compare_seq);
20613 *pop = compare_op;
20614 return true;
20617 if (!INTEGRAL_MODE_P (mode))
20618 return false;
20620 switch (code)
20622 case LTU:
20623 case GEU:
20624 break;
20626 /* Convert a==0 into (unsigned)a<1. */
20627 case EQ:
20628 case NE:
20629 if (op1 != const0_rtx)
20630 return false;
20631 op1 = const1_rtx;
20632 code = (code == EQ ? LTU : GEU);
20633 break;
20635 /* Convert a>b into b<a or a>=b-1. */
20636 case GTU:
20637 case LEU:
20638 if (CONST_INT_P (op1))
20640 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20641 /* Bail out on overflow. We still can swap operands but that
20642 would force loading of the constant into register. */
20643 if (op1 == const0_rtx
20644 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20645 return false;
20646 code = (code == GTU ? GEU : LTU);
20648 else
20650 std::swap (op0, op1);
20651 code = (code == GTU ? LTU : GEU);
20653 break;
20655 /* Convert a>=0 into (unsigned)a<0x80000000. */
20656 case LT:
20657 case GE:
20658 if (mode == DImode || op1 != const0_rtx)
20659 return false;
20660 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20661 code = (code == LT ? GEU : LTU);
20662 break;
20663 case LE:
20664 case GT:
20665 if (mode == DImode || op1 != constm1_rtx)
20666 return false;
20667 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20668 code = (code == LE ? GEU : LTU);
20669 break;
20671 default:
20672 return false;
20674 /* Swapping operands may cause constant to appear as first operand. */
20675 if (!nonimmediate_operand (op0, VOIDmode))
20677 if (!can_create_pseudo_p ())
20678 return false;
20679 op0 = force_reg (mode, op0);
20681 *pop = ix86_expand_compare (code, op0, op1);
20682 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20683 return true;
20686 bool
20687 ix86_expand_int_movcc (rtx operands[])
20689 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20690 rtx_insn *compare_seq;
20691 rtx compare_op;
20692 machine_mode mode = GET_MODE (operands[0]);
20693 bool sign_bit_compare_p = false;
20694 rtx op0 = XEXP (operands[1], 0);
20695 rtx op1 = XEXP (operands[1], 1);
20697 if (GET_MODE (op0) == TImode
20698 || (GET_MODE (op0) == DImode
20699 && !TARGET_64BIT))
20700 return false;
20702 start_sequence ();
20703 compare_op = ix86_expand_compare (code, op0, op1);
20704 compare_seq = get_insns ();
20705 end_sequence ();
20707 compare_code = GET_CODE (compare_op);
20709 if ((op1 == const0_rtx && (code == GE || code == LT))
20710 || (op1 == constm1_rtx && (code == GT || code == LE)))
20711 sign_bit_compare_p = true;
20713 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20714 HImode insns, we'd be swallowed in word prefix ops. */
20716 if ((mode != HImode || TARGET_FAST_PREFIX)
20717 && (mode != (TARGET_64BIT ? TImode : DImode))
20718 && CONST_INT_P (operands[2])
20719 && CONST_INT_P (operands[3]))
20721 rtx out = operands[0];
20722 HOST_WIDE_INT ct = INTVAL (operands[2]);
20723 HOST_WIDE_INT cf = INTVAL (operands[3]);
20724 HOST_WIDE_INT diff;
20726 diff = ct - cf;
20727 /* Sign bit compares are better done using shifts than we do by using
20728 sbb. */
20729 if (sign_bit_compare_p
20730 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20732 /* Detect overlap between destination and compare sources. */
20733 rtx tmp = out;
20735 if (!sign_bit_compare_p)
20737 rtx flags;
20738 bool fpcmp = false;
20740 compare_code = GET_CODE (compare_op);
20742 flags = XEXP (compare_op, 0);
20744 if (GET_MODE (flags) == CCFPmode
20745 || GET_MODE (flags) == CCFPUmode)
20747 fpcmp = true;
20748 compare_code
20749 = ix86_fp_compare_code_to_integer (compare_code);
20752 /* To simplify rest of code, restrict to the GEU case. */
20753 if (compare_code == LTU)
20755 std::swap (ct, cf);
20756 compare_code = reverse_condition (compare_code);
20757 code = reverse_condition (code);
20759 else
20761 if (fpcmp)
20762 PUT_CODE (compare_op,
20763 reverse_condition_maybe_unordered
20764 (GET_CODE (compare_op)));
20765 else
20766 PUT_CODE (compare_op,
20767 reverse_condition (GET_CODE (compare_op)));
20769 diff = ct - cf;
20771 if (reg_overlap_mentioned_p (out, op0)
20772 || reg_overlap_mentioned_p (out, op1))
20773 tmp = gen_reg_rtx (mode);
20775 if (mode == DImode)
20776 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20777 else
20778 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20779 flags, compare_op));
20781 else
20783 if (code == GT || code == GE)
20784 code = reverse_condition (code);
20785 else
20787 std::swap (ct, cf);
20788 diff = ct - cf;
20790 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20793 if (diff == 1)
20796 * cmpl op0,op1
20797 * sbbl dest,dest
20798 * [addl dest, ct]
20800 * Size 5 - 8.
20802 if (ct)
20803 tmp = expand_simple_binop (mode, PLUS,
20804 tmp, GEN_INT (ct),
20805 copy_rtx (tmp), 1, OPTAB_DIRECT);
20807 else if (cf == -1)
20810 * cmpl op0,op1
20811 * sbbl dest,dest
20812 * orl $ct, dest
20814 * Size 8.
20816 tmp = expand_simple_binop (mode, IOR,
20817 tmp, GEN_INT (ct),
20818 copy_rtx (tmp), 1, OPTAB_DIRECT);
20820 else if (diff == -1 && ct)
20823 * cmpl op0,op1
20824 * sbbl dest,dest
20825 * notl dest
20826 * [addl dest, cf]
20828 * Size 8 - 11.
20830 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20831 if (cf)
20832 tmp = expand_simple_binop (mode, PLUS,
20833 copy_rtx (tmp), GEN_INT (cf),
20834 copy_rtx (tmp), 1, OPTAB_DIRECT);
20836 else
20839 * cmpl op0,op1
20840 * sbbl dest,dest
20841 * [notl dest]
20842 * andl cf - ct, dest
20843 * [addl dest, ct]
20845 * Size 8 - 11.
20848 if (cf == 0)
20850 cf = ct;
20851 ct = 0;
20852 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20855 tmp = expand_simple_binop (mode, AND,
20856 copy_rtx (tmp),
20857 gen_int_mode (cf - ct, mode),
20858 copy_rtx (tmp), 1, OPTAB_DIRECT);
20859 if (ct)
20860 tmp = expand_simple_binop (mode, PLUS,
20861 copy_rtx (tmp), GEN_INT (ct),
20862 copy_rtx (tmp), 1, OPTAB_DIRECT);
20865 if (!rtx_equal_p (tmp, out))
20866 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20868 return true;
20871 if (diff < 0)
20873 machine_mode cmp_mode = GET_MODE (op0);
20874 enum rtx_code new_code;
20876 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20878 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20880 /* We may be reversing unordered compare to normal compare, that
20881 is not valid in general (we may convert non-trapping condition
20882 to trapping one), however on i386 we currently emit all
20883 comparisons unordered. */
20884 new_code = reverse_condition_maybe_unordered (code);
20886 else
20887 new_code = ix86_reverse_condition (code, cmp_mode);
20888 if (new_code != UNKNOWN)
20890 std::swap (ct, cf);
20891 diff = -diff;
20892 code = new_code;
20896 compare_code = UNKNOWN;
20897 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20898 && CONST_INT_P (op1))
20900 if (op1 == const0_rtx
20901 && (code == LT || code == GE))
20902 compare_code = code;
20903 else if (op1 == constm1_rtx)
20905 if (code == LE)
20906 compare_code = LT;
20907 else if (code == GT)
20908 compare_code = GE;
20912 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20913 if (compare_code != UNKNOWN
20914 && GET_MODE (op0) == GET_MODE (out)
20915 && (cf == -1 || ct == -1))
20917 /* If lea code below could be used, only optimize
20918 if it results in a 2 insn sequence. */
20920 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20921 || diff == 3 || diff == 5 || diff == 9)
20922 || (compare_code == LT && ct == -1)
20923 || (compare_code == GE && cf == -1))
20926 * notl op1 (if necessary)
20927 * sarl $31, op1
20928 * orl cf, op1
20930 if (ct != -1)
20932 cf = ct;
20933 ct = -1;
20934 code = reverse_condition (code);
20937 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20939 out = expand_simple_binop (mode, IOR,
20940 out, GEN_INT (cf),
20941 out, 1, OPTAB_DIRECT);
20942 if (out != operands[0])
20943 emit_move_insn (operands[0], out);
20945 return true;
20950 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20951 || diff == 3 || diff == 5 || diff == 9)
20952 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20953 && (mode != DImode
20954 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20957 * xorl dest,dest
20958 * cmpl op1,op2
20959 * setcc dest
20960 * lea cf(dest*(ct-cf)),dest
20962 * Size 14.
20964 * This also catches the degenerate setcc-only case.
20967 rtx tmp;
20968 int nops;
20970 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20972 nops = 0;
20973 /* On x86_64 the lea instruction operates on Pmode, so we need
20974 to get arithmetics done in proper mode to match. */
20975 if (diff == 1)
20976 tmp = copy_rtx (out);
20977 else
20979 rtx out1;
20980 out1 = copy_rtx (out);
20981 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20982 nops++;
20983 if (diff & 1)
20985 tmp = gen_rtx_PLUS (mode, tmp, out1);
20986 nops++;
20989 if (cf != 0)
20991 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20992 nops++;
20994 if (!rtx_equal_p (tmp, out))
20996 if (nops == 1)
20997 out = force_operand (tmp, copy_rtx (out));
20998 else
20999 emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp)));
21001 if (!rtx_equal_p (out, operands[0]))
21002 emit_move_insn (operands[0], copy_rtx (out));
21004 return true;
21008 * General case: Jumpful:
21009 * xorl dest,dest cmpl op1, op2
21010 * cmpl op1, op2 movl ct, dest
21011 * setcc dest jcc 1f
21012 * decl dest movl cf, dest
21013 * andl (cf-ct),dest 1:
21014 * addl ct,dest
21016 * Size 20. Size 14.
21018 * This is reasonably steep, but branch mispredict costs are
21019 * high on modern cpus, so consider failing only if optimizing
21020 * for space.
21023 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21024 && BRANCH_COST (optimize_insn_for_speed_p (),
21025 false) >= 2)
21027 if (cf == 0)
21029 machine_mode cmp_mode = GET_MODE (op0);
21030 enum rtx_code new_code;
21032 if (SCALAR_FLOAT_MODE_P (cmp_mode))
21034 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
21036 /* We may be reversing unordered compare to normal compare,
21037 that is not valid in general (we may convert non-trapping
21038 condition to trapping one), however on i386 we currently
21039 emit all comparisons unordered. */
21040 new_code = reverse_condition_maybe_unordered (code);
21042 else
21044 new_code = ix86_reverse_condition (code, cmp_mode);
21045 if (compare_code != UNKNOWN && new_code != UNKNOWN)
21046 compare_code = reverse_condition (compare_code);
21049 if (new_code != UNKNOWN)
21051 cf = ct;
21052 ct = 0;
21053 code = new_code;
21057 if (compare_code != UNKNOWN)
21059 /* notl op1 (if needed)
21060 sarl $31, op1
21061 andl (cf-ct), op1
21062 addl ct, op1
21064 For x < 0 (resp. x <= -1) there will be no notl,
21065 so if possible swap the constants to get rid of the
21066 complement.
21067 True/false will be -1/0 while code below (store flag
21068 followed by decrement) is 0/-1, so the constants need
21069 to be exchanged once more. */
21071 if (compare_code == GE || !cf)
21073 code = reverse_condition (code);
21074 compare_code = LT;
21076 else
21077 std::swap (ct, cf);
21079 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21081 else
21083 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21085 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
21086 constm1_rtx,
21087 copy_rtx (out), 1, OPTAB_DIRECT);
21090 out = expand_simple_binop (mode, AND, copy_rtx (out),
21091 gen_int_mode (cf - ct, mode),
21092 copy_rtx (out), 1, OPTAB_DIRECT);
21093 if (ct)
21094 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
21095 copy_rtx (out), 1, OPTAB_DIRECT);
21096 if (!rtx_equal_p (out, operands[0]))
21097 emit_move_insn (operands[0], copy_rtx (out));
21099 return true;
21103 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21105 /* Try a few things more with specific constants and a variable. */
21107 optab op;
21108 rtx var, orig_out, out, tmp;
21110 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
21111 return false;
21113 /* If one of the two operands is an interesting constant, load a
21114 constant with the above and mask it in with a logical operation. */
21116 if (CONST_INT_P (operands[2]))
21118 var = operands[3];
21119 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
21120 operands[3] = constm1_rtx, op = and_optab;
21121 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
21122 operands[3] = const0_rtx, op = ior_optab;
21123 else
21124 return false;
21126 else if (CONST_INT_P (operands[3]))
21128 var = operands[2];
21129 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
21130 operands[2] = constm1_rtx, op = and_optab;
21131 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
21132 operands[2] = const0_rtx, op = ior_optab;
21133 else
21134 return false;
21136 else
21137 return false;
21139 orig_out = operands[0];
21140 tmp = gen_reg_rtx (mode);
21141 operands[0] = tmp;
21143 /* Recurse to get the constant loaded. */
21144 if (ix86_expand_int_movcc (operands) == 0)
21145 return false;
21147 /* Mask in the interesting variable. */
21148 out = expand_binop (mode, op, var, tmp, orig_out, 0,
21149 OPTAB_WIDEN);
21150 if (!rtx_equal_p (out, orig_out))
21151 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
21153 return true;
21157 * For comparison with above,
21159 * movl cf,dest
21160 * movl ct,tmp
21161 * cmpl op1,op2
21162 * cmovcc tmp,dest
21164 * Size 15.
21167 if (! nonimmediate_operand (operands[2], mode))
21168 operands[2] = force_reg (mode, operands[2]);
21169 if (! nonimmediate_operand (operands[3], mode))
21170 operands[3] = force_reg (mode, operands[3]);
21172 if (! register_operand (operands[2], VOIDmode)
21173 && (mode == QImode
21174 || ! register_operand (operands[3], VOIDmode)))
21175 operands[2] = force_reg (mode, operands[2]);
21177 if (mode == QImode
21178 && ! register_operand (operands[3], VOIDmode))
21179 operands[3] = force_reg (mode, operands[3]);
21181 emit_insn (compare_seq);
21182 emit_insn (gen_rtx_SET (operands[0],
21183 gen_rtx_IF_THEN_ELSE (mode,
21184 compare_op, operands[2],
21185 operands[3])));
21186 return true;
21189 /* Swap, force into registers, or otherwise massage the two operands
21190 to an sse comparison with a mask result. Thus we differ a bit from
21191 ix86_prepare_fp_compare_args which expects to produce a flags result.
21193 The DEST operand exists to help determine whether to commute commutative
21194 operators. The POP0/POP1 operands are updated in place. The new
21195 comparison code is returned, or UNKNOWN if not implementable. */
21197 static enum rtx_code
21198 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
21199 rtx *pop0, rtx *pop1)
21201 switch (code)
21203 case LTGT:
21204 case UNEQ:
21205 /* AVX supports all the needed comparisons. */
21206 if (TARGET_AVX)
21207 break;
21208 /* We have no LTGT as an operator. We could implement it with
21209 NE & ORDERED, but this requires an extra temporary. It's
21210 not clear that it's worth it. */
21211 return UNKNOWN;
21213 case LT:
21214 case LE:
21215 case UNGT:
21216 case UNGE:
21217 /* These are supported directly. */
21218 break;
21220 case EQ:
21221 case NE:
21222 case UNORDERED:
21223 case ORDERED:
21224 /* AVX has 3 operand comparisons, no need to swap anything. */
21225 if (TARGET_AVX)
21226 break;
21227 /* For commutative operators, try to canonicalize the destination
21228 operand to be first in the comparison - this helps reload to
21229 avoid extra moves. */
21230 if (!dest || !rtx_equal_p (dest, *pop1))
21231 break;
21232 /* FALLTHRU */
21234 case GE:
21235 case GT:
21236 case UNLE:
21237 case UNLT:
21238 /* These are not supported directly before AVX, and furthermore
21239 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
21240 comparison operands to transform into something that is
21241 supported. */
21242 std::swap (*pop0, *pop1);
21243 code = swap_condition (code);
21244 break;
21246 default:
21247 gcc_unreachable ();
21250 return code;
21253 /* Detect conditional moves that exactly match min/max operational
21254 semantics. Note that this is IEEE safe, as long as we don't
21255 interchange the operands.
21257 Returns FALSE if this conditional move doesn't match a MIN/MAX,
21258 and TRUE if the operation is successful and instructions are emitted. */
21260 static bool
21261 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
21262 rtx cmp_op1, rtx if_true, rtx if_false)
21264 machine_mode mode;
21265 bool is_min;
21266 rtx tmp;
21268 if (code == LT)
21270 else if (code == UNGE)
21271 std::swap (if_true, if_false);
21272 else
21273 return false;
21275 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21276 is_min = true;
21277 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21278 is_min = false;
21279 else
21280 return false;
21282 mode = GET_MODE (dest);
21284 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21285 but MODE may be a vector mode and thus not appropriate. */
21286 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21288 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21289 rtvec v;
21291 if_true = force_reg (mode, if_true);
21292 v = gen_rtvec (2, if_true, if_false);
21293 tmp = gen_rtx_UNSPEC (mode, v, u);
21295 else
21297 code = is_min ? SMIN : SMAX;
21298 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21301 emit_insn (gen_rtx_SET (dest, tmp));
21302 return true;
21305 /* Expand an sse vector comparison. Return the register with the result. */
21307 static rtx
21308 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21309 rtx op_true, rtx op_false)
21311 machine_mode mode = GET_MODE (dest);
21312 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21314 /* In general case result of comparison can differ from operands' type. */
21315 machine_mode cmp_mode;
21317 /* In AVX512F the result of comparison is an integer mask. */
21318 bool maskcmp = false;
21319 rtx x;
21321 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21323 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21324 gcc_assert (cmp_mode != BLKmode);
21326 maskcmp = true;
21328 else
21329 cmp_mode = cmp_ops_mode;
21332 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21333 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21334 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21336 if (optimize
21337 || reg_overlap_mentioned_p (dest, op_true)
21338 || reg_overlap_mentioned_p (dest, op_false))
21339 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21341 /* Compare patterns for int modes are unspec in AVX512F only. */
21342 if (maskcmp && (code == GT || code == EQ))
21344 rtx (*gen)(rtx, rtx, rtx);
21346 switch (cmp_ops_mode)
21348 case V64QImode:
21349 gcc_assert (TARGET_AVX512BW);
21350 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
21351 break;
21352 case V32HImode:
21353 gcc_assert (TARGET_AVX512BW);
21354 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
21355 break;
21356 case V16SImode:
21357 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21358 break;
21359 case V8DImode:
21360 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21361 break;
21362 default:
21363 gen = NULL;
21366 if (gen)
21368 emit_insn (gen (dest, cmp_op0, cmp_op1));
21369 return dest;
21372 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21374 if (cmp_mode != mode && !maskcmp)
21376 x = force_reg (cmp_ops_mode, x);
21377 convert_move (dest, x, false);
21379 else
21380 emit_insn (gen_rtx_SET (dest, x));
21382 return dest;
21385 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21386 operations. This is used for both scalar and vector conditional moves. */
21388 static void
21389 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21391 machine_mode mode = GET_MODE (dest);
21392 machine_mode cmpmode = GET_MODE (cmp);
21394 /* In AVX512F the result of comparison is an integer mask. */
21395 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21397 rtx t2, t3, x;
21399 if (vector_all_ones_operand (op_true, mode)
21400 && rtx_equal_p (op_false, CONST0_RTX (mode))
21401 && !maskcmp)
21403 emit_insn (gen_rtx_SET (dest, cmp));
21405 else if (op_false == CONST0_RTX (mode)
21406 && !maskcmp)
21408 op_true = force_reg (mode, op_true);
21409 x = gen_rtx_AND (mode, cmp, op_true);
21410 emit_insn (gen_rtx_SET (dest, x));
21412 else if (op_true == CONST0_RTX (mode)
21413 && !maskcmp)
21415 op_false = force_reg (mode, op_false);
21416 x = gen_rtx_NOT (mode, cmp);
21417 x = gen_rtx_AND (mode, x, op_false);
21418 emit_insn (gen_rtx_SET (dest, x));
21420 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21421 && !maskcmp)
21423 op_false = force_reg (mode, op_false);
21424 x = gen_rtx_IOR (mode, cmp, op_false);
21425 emit_insn (gen_rtx_SET (dest, x));
21427 else if (TARGET_XOP
21428 && !maskcmp)
21430 op_true = force_reg (mode, op_true);
21432 if (!nonimmediate_operand (op_false, mode))
21433 op_false = force_reg (mode, op_false);
21435 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp,
21436 op_true,
21437 op_false)));
21439 else
21441 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21442 rtx d = dest;
21444 if (!nonimmediate_operand (op_true, mode))
21445 op_true = force_reg (mode, op_true);
21447 op_false = force_reg (mode, op_false);
21449 switch (mode)
21451 case V4SFmode:
21452 if (TARGET_SSE4_1)
21453 gen = gen_sse4_1_blendvps;
21454 break;
21455 case V2DFmode:
21456 if (TARGET_SSE4_1)
21457 gen = gen_sse4_1_blendvpd;
21458 break;
21459 case V16QImode:
21460 case V8HImode:
21461 case V4SImode:
21462 case V2DImode:
21463 if (TARGET_SSE4_1)
21465 gen = gen_sse4_1_pblendvb;
21466 if (mode != V16QImode)
21467 d = gen_reg_rtx (V16QImode);
21468 op_false = gen_lowpart (V16QImode, op_false);
21469 op_true = gen_lowpart (V16QImode, op_true);
21470 cmp = gen_lowpart (V16QImode, cmp);
21472 break;
21473 case V8SFmode:
21474 if (TARGET_AVX)
21475 gen = gen_avx_blendvps256;
21476 break;
21477 case V4DFmode:
21478 if (TARGET_AVX)
21479 gen = gen_avx_blendvpd256;
21480 break;
21481 case V32QImode:
21482 case V16HImode:
21483 case V8SImode:
21484 case V4DImode:
21485 if (TARGET_AVX2)
21487 gen = gen_avx2_pblendvb;
21488 if (mode != V32QImode)
21489 d = gen_reg_rtx (V32QImode);
21490 op_false = gen_lowpart (V32QImode, op_false);
21491 op_true = gen_lowpart (V32QImode, op_true);
21492 cmp = gen_lowpart (V32QImode, cmp);
21494 break;
21496 case V64QImode:
21497 gen = gen_avx512bw_blendmv64qi;
21498 break;
21499 case V32HImode:
21500 gen = gen_avx512bw_blendmv32hi;
21501 break;
21502 case V16SImode:
21503 gen = gen_avx512f_blendmv16si;
21504 break;
21505 case V8DImode:
21506 gen = gen_avx512f_blendmv8di;
21507 break;
21508 case V8DFmode:
21509 gen = gen_avx512f_blendmv8df;
21510 break;
21511 case V16SFmode:
21512 gen = gen_avx512f_blendmv16sf;
21513 break;
21515 default:
21516 break;
21519 if (gen != NULL)
21521 emit_insn (gen (d, op_false, op_true, cmp));
21522 if (d != dest)
21523 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21525 else
21527 op_true = force_reg (mode, op_true);
21529 t2 = gen_reg_rtx (mode);
21530 if (optimize)
21531 t3 = gen_reg_rtx (mode);
21532 else
21533 t3 = dest;
21535 x = gen_rtx_AND (mode, op_true, cmp);
21536 emit_insn (gen_rtx_SET (t2, x));
21538 x = gen_rtx_NOT (mode, cmp);
21539 x = gen_rtx_AND (mode, x, op_false);
21540 emit_insn (gen_rtx_SET (t3, x));
21542 x = gen_rtx_IOR (mode, t3, t2);
21543 emit_insn (gen_rtx_SET (dest, x));
21548 /* Expand a floating-point conditional move. Return true if successful. */
21550 bool
21551 ix86_expand_fp_movcc (rtx operands[])
21553 machine_mode mode = GET_MODE (operands[0]);
21554 enum rtx_code code = GET_CODE (operands[1]);
21555 rtx tmp, compare_op;
21556 rtx op0 = XEXP (operands[1], 0);
21557 rtx op1 = XEXP (operands[1], 1);
21559 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21561 machine_mode cmode;
21563 /* Since we've no cmove for sse registers, don't force bad register
21564 allocation just to gain access to it. Deny movcc when the
21565 comparison mode doesn't match the move mode. */
21566 cmode = GET_MODE (op0);
21567 if (cmode == VOIDmode)
21568 cmode = GET_MODE (op1);
21569 if (cmode != mode)
21570 return false;
21572 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21573 if (code == UNKNOWN)
21574 return false;
21576 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21577 operands[2], operands[3]))
21578 return true;
21580 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21581 operands[2], operands[3]);
21582 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21583 return true;
21586 if (GET_MODE (op0) == TImode
21587 || (GET_MODE (op0) == DImode
21588 && !TARGET_64BIT))
21589 return false;
21591 /* The floating point conditional move instructions don't directly
21592 support conditions resulting from a signed integer comparison. */
21594 compare_op = ix86_expand_compare (code, op0, op1);
21595 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21597 tmp = gen_reg_rtx (QImode);
21598 ix86_expand_setcc (tmp, code, op0, op1);
21600 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21603 emit_insn (gen_rtx_SET (operands[0],
21604 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21605 operands[2], operands[3])));
21607 return true;
21610 /* Expand a floating-point vector conditional move; a vcond operation
21611 rather than a movcc operation. */
21613 bool
21614 ix86_expand_fp_vcond (rtx operands[])
21616 enum rtx_code code = GET_CODE (operands[3]);
21617 rtx cmp;
21619 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21620 &operands[4], &operands[5]);
21621 if (code == UNKNOWN)
21623 rtx temp;
21624 switch (GET_CODE (operands[3]))
21626 case LTGT:
21627 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21628 operands[5], operands[0], operands[0]);
21629 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21630 operands[5], operands[1], operands[2]);
21631 code = AND;
21632 break;
21633 case UNEQ:
21634 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21635 operands[5], operands[0], operands[0]);
21636 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21637 operands[5], operands[1], operands[2]);
21638 code = IOR;
21639 break;
21640 default:
21641 gcc_unreachable ();
21643 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21644 OPTAB_DIRECT);
21645 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21646 return true;
21649 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21650 operands[5], operands[1], operands[2]))
21651 return true;
21653 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21654 operands[1], operands[2]);
21655 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21656 return true;
21659 /* Expand a signed/unsigned integral vector conditional move. */
21661 bool
21662 ix86_expand_int_vcond (rtx operands[])
21664 machine_mode data_mode = GET_MODE (operands[0]);
21665 machine_mode mode = GET_MODE (operands[4]);
21666 enum rtx_code code = GET_CODE (operands[3]);
21667 bool negate = false;
21668 rtx x, cop0, cop1;
21670 cop0 = operands[4];
21671 cop1 = operands[5];
21673 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21674 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21675 if ((code == LT || code == GE)
21676 && data_mode == mode
21677 && cop1 == CONST0_RTX (mode)
21678 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21679 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21680 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21681 && (GET_MODE_SIZE (data_mode) == 16
21682 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21684 rtx negop = operands[2 - (code == LT)];
21685 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21686 if (negop == CONST1_RTX (data_mode))
21688 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21689 operands[0], 1, OPTAB_DIRECT);
21690 if (res != operands[0])
21691 emit_move_insn (operands[0], res);
21692 return true;
21694 else if (GET_MODE_INNER (data_mode) != DImode
21695 && vector_all_ones_operand (negop, data_mode))
21697 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21698 operands[0], 0, OPTAB_DIRECT);
21699 if (res != operands[0])
21700 emit_move_insn (operands[0], res);
21701 return true;
21705 if (!nonimmediate_operand (cop1, mode))
21706 cop1 = force_reg (mode, cop1);
21707 if (!general_operand (operands[1], data_mode))
21708 operands[1] = force_reg (data_mode, operands[1]);
21709 if (!general_operand (operands[2], data_mode))
21710 operands[2] = force_reg (data_mode, operands[2]);
21712 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21713 if (TARGET_XOP
21714 && (mode == V16QImode || mode == V8HImode
21715 || mode == V4SImode || mode == V2DImode))
21717 else
21719 /* Canonicalize the comparison to EQ, GT, GTU. */
21720 switch (code)
21722 case EQ:
21723 case GT:
21724 case GTU:
21725 break;
21727 case NE:
21728 case LE:
21729 case LEU:
21730 code = reverse_condition (code);
21731 negate = true;
21732 break;
21734 case GE:
21735 case GEU:
21736 code = reverse_condition (code);
21737 negate = true;
21738 /* FALLTHRU */
21740 case LT:
21741 case LTU:
21742 std::swap (cop0, cop1);
21743 code = swap_condition (code);
21744 break;
21746 default:
21747 gcc_unreachable ();
21750 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21751 if (mode == V2DImode)
21753 switch (code)
21755 case EQ:
21756 /* SSE4.1 supports EQ. */
21757 if (!TARGET_SSE4_1)
21758 return false;
21759 break;
21761 case GT:
21762 case GTU:
21763 /* SSE4.2 supports GT/GTU. */
21764 if (!TARGET_SSE4_2)
21765 return false;
21766 break;
21768 default:
21769 gcc_unreachable ();
21773 /* Unsigned parallel compare is not supported by the hardware.
21774 Play some tricks to turn this into a signed comparison
21775 against 0. */
21776 if (code == GTU)
21778 cop0 = force_reg (mode, cop0);
21780 switch (mode)
21782 case V16SImode:
21783 case V8DImode:
21784 case V8SImode:
21785 case V4DImode:
21786 case V4SImode:
21787 case V2DImode:
21789 rtx t1, t2, mask;
21790 rtx (*gen_sub3) (rtx, rtx, rtx);
21792 switch (mode)
21794 case V16SImode: gen_sub3 = gen_subv16si3; break;
21795 case V8DImode: gen_sub3 = gen_subv8di3; break;
21796 case V8SImode: gen_sub3 = gen_subv8si3; break;
21797 case V4DImode: gen_sub3 = gen_subv4di3; break;
21798 case V4SImode: gen_sub3 = gen_subv4si3; break;
21799 case V2DImode: gen_sub3 = gen_subv2di3; break;
21800 default:
21801 gcc_unreachable ();
21803 /* Subtract (-(INT MAX) - 1) from both operands to make
21804 them signed. */
21805 mask = ix86_build_signbit_mask (mode, true, false);
21806 t1 = gen_reg_rtx (mode);
21807 emit_insn (gen_sub3 (t1, cop0, mask));
21809 t2 = gen_reg_rtx (mode);
21810 emit_insn (gen_sub3 (t2, cop1, mask));
21812 cop0 = t1;
21813 cop1 = t2;
21814 code = GT;
21816 break;
21818 case V64QImode:
21819 case V32HImode:
21820 case V32QImode:
21821 case V16HImode:
21822 case V16QImode:
21823 case V8HImode:
21824 /* Perform a parallel unsigned saturating subtraction. */
21825 x = gen_reg_rtx (mode);
21826 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0, cop1)));
21828 cop0 = x;
21829 cop1 = CONST0_RTX (mode);
21830 code = EQ;
21831 negate = !negate;
21832 break;
21834 default:
21835 gcc_unreachable ();
21840 /* Allow the comparison to be done in one mode, but the movcc to
21841 happen in another mode. */
21842 if (data_mode == mode)
21844 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21845 operands[1+negate], operands[2-negate]);
21847 else
21849 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21850 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21851 operands[1+negate], operands[2-negate]);
21852 if (GET_MODE (x) == mode)
21853 x = gen_lowpart (data_mode, x);
21856 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21857 operands[2-negate]);
21858 return true;
21861 /* AVX512F does support 64-byte integer vector operations,
21862 thus the longest vector we are faced with is V64QImode. */
21863 #define MAX_VECT_LEN 64
21865 struct expand_vec_perm_d
21867 rtx target, op0, op1;
21868 unsigned char perm[MAX_VECT_LEN];
21869 machine_mode vmode;
21870 unsigned char nelt;
21871 bool one_operand_p;
21872 bool testing_p;
21875 static bool
21876 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
21877 struct expand_vec_perm_d *d)
21879 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21880 expander, so args are either in d, or in op0, op1 etc. */
21881 machine_mode mode = GET_MODE (d ? d->op0 : op0);
21882 machine_mode maskmode = mode;
21883 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21885 switch (mode)
21887 case V8HImode:
21888 if (TARGET_AVX512VL && TARGET_AVX512BW)
21889 gen = gen_avx512vl_vpermi2varv8hi3;
21890 break;
21891 case V16HImode:
21892 if (TARGET_AVX512VL && TARGET_AVX512BW)
21893 gen = gen_avx512vl_vpermi2varv16hi3;
21894 break;
21895 case V64QImode:
21896 if (TARGET_AVX512VBMI)
21897 gen = gen_avx512bw_vpermi2varv64qi3;
21898 break;
21899 case V32HImode:
21900 if (TARGET_AVX512BW)
21901 gen = gen_avx512bw_vpermi2varv32hi3;
21902 break;
21903 case V4SImode:
21904 if (TARGET_AVX512VL)
21905 gen = gen_avx512vl_vpermi2varv4si3;
21906 break;
21907 case V8SImode:
21908 if (TARGET_AVX512VL)
21909 gen = gen_avx512vl_vpermi2varv8si3;
21910 break;
21911 case V16SImode:
21912 if (TARGET_AVX512F)
21913 gen = gen_avx512f_vpermi2varv16si3;
21914 break;
21915 case V4SFmode:
21916 if (TARGET_AVX512VL)
21918 gen = gen_avx512vl_vpermi2varv4sf3;
21919 maskmode = V4SImode;
21921 break;
21922 case V8SFmode:
21923 if (TARGET_AVX512VL)
21925 gen = gen_avx512vl_vpermi2varv8sf3;
21926 maskmode = V8SImode;
21928 break;
21929 case V16SFmode:
21930 if (TARGET_AVX512F)
21932 gen = gen_avx512f_vpermi2varv16sf3;
21933 maskmode = V16SImode;
21935 break;
21936 case V2DImode:
21937 if (TARGET_AVX512VL)
21938 gen = gen_avx512vl_vpermi2varv2di3;
21939 break;
21940 case V4DImode:
21941 if (TARGET_AVX512VL)
21942 gen = gen_avx512vl_vpermi2varv4di3;
21943 break;
21944 case V8DImode:
21945 if (TARGET_AVX512F)
21946 gen = gen_avx512f_vpermi2varv8di3;
21947 break;
21948 case V2DFmode:
21949 if (TARGET_AVX512VL)
21951 gen = gen_avx512vl_vpermi2varv2df3;
21952 maskmode = V2DImode;
21954 break;
21955 case V4DFmode:
21956 if (TARGET_AVX512VL)
21958 gen = gen_avx512vl_vpermi2varv4df3;
21959 maskmode = V4DImode;
21961 break;
21962 case V8DFmode:
21963 if (TARGET_AVX512F)
21965 gen = gen_avx512f_vpermi2varv8df3;
21966 maskmode = V8DImode;
21968 break;
21969 default:
21970 break;
21973 if (gen == NULL)
21974 return false;
21976 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21977 expander, so args are either in d, or in op0, op1 etc. */
21978 if (d)
21980 rtx vec[64];
21981 target = d->target;
21982 op0 = d->op0;
21983 op1 = d->op1;
21984 for (int i = 0; i < d->nelt; ++i)
21985 vec[i] = GEN_INT (d->perm[i]);
21986 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
21989 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
21990 return true;
21993 /* Expand a variable vector permutation. */
21995 void
21996 ix86_expand_vec_perm (rtx operands[])
21998 rtx target = operands[0];
21999 rtx op0 = operands[1];
22000 rtx op1 = operands[2];
22001 rtx mask = operands[3];
22002 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
22003 machine_mode mode = GET_MODE (op0);
22004 machine_mode maskmode = GET_MODE (mask);
22005 int w, e, i;
22006 bool one_operand_shuffle = rtx_equal_p (op0, op1);
22008 /* Number of elements in the vector. */
22009 w = GET_MODE_NUNITS (mode);
22010 e = GET_MODE_UNIT_SIZE (mode);
22011 gcc_assert (w <= 64);
22013 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
22014 return;
22016 if (TARGET_AVX2)
22018 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
22020 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
22021 an constant shuffle operand. With a tiny bit of effort we can
22022 use VPERMD instead. A re-interpretation stall for V4DFmode is
22023 unfortunate but there's no avoiding it.
22024 Similarly for V16HImode we don't have instructions for variable
22025 shuffling, while for V32QImode we can use after preparing suitable
22026 masks vpshufb; vpshufb; vpermq; vpor. */
22028 if (mode == V16HImode)
22030 maskmode = mode = V32QImode;
22031 w = 32;
22032 e = 1;
22034 else
22036 maskmode = mode = V8SImode;
22037 w = 8;
22038 e = 4;
22040 t1 = gen_reg_rtx (maskmode);
22042 /* Replicate the low bits of the V4DImode mask into V8SImode:
22043 mask = { A B C D }
22044 t1 = { A A B B C C D D }. */
22045 for (i = 0; i < w / 2; ++i)
22046 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
22047 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22048 vt = force_reg (maskmode, vt);
22049 mask = gen_lowpart (maskmode, mask);
22050 if (maskmode == V8SImode)
22051 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
22052 else
22053 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
22055 /* Multiply the shuffle indicies by two. */
22056 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
22057 OPTAB_DIRECT);
22059 /* Add one to the odd shuffle indicies:
22060 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
22061 for (i = 0; i < w / 2; ++i)
22063 vec[i * 2] = const0_rtx;
22064 vec[i * 2 + 1] = const1_rtx;
22066 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22067 vt = validize_mem (force_const_mem (maskmode, vt));
22068 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
22069 OPTAB_DIRECT);
22071 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
22072 operands[3] = mask = t1;
22073 target = gen_reg_rtx (mode);
22074 op0 = gen_lowpart (mode, op0);
22075 op1 = gen_lowpart (mode, op1);
22078 switch (mode)
22080 case V8SImode:
22081 /* The VPERMD and VPERMPS instructions already properly ignore
22082 the high bits of the shuffle elements. No need for us to
22083 perform an AND ourselves. */
22084 if (one_operand_shuffle)
22086 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
22087 if (target != operands[0])
22088 emit_move_insn (operands[0],
22089 gen_lowpart (GET_MODE (operands[0]), target));
22091 else
22093 t1 = gen_reg_rtx (V8SImode);
22094 t2 = gen_reg_rtx (V8SImode);
22095 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
22096 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
22097 goto merge_two;
22099 return;
22101 case V8SFmode:
22102 mask = gen_lowpart (V8SImode, mask);
22103 if (one_operand_shuffle)
22104 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
22105 else
22107 t1 = gen_reg_rtx (V8SFmode);
22108 t2 = gen_reg_rtx (V8SFmode);
22109 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
22110 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
22111 goto merge_two;
22113 return;
22115 case V4SImode:
22116 /* By combining the two 128-bit input vectors into one 256-bit
22117 input vector, we can use VPERMD and VPERMPS for the full
22118 two-operand shuffle. */
22119 t1 = gen_reg_rtx (V8SImode);
22120 t2 = gen_reg_rtx (V8SImode);
22121 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
22122 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22123 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
22124 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
22125 return;
22127 case V4SFmode:
22128 t1 = gen_reg_rtx (V8SFmode);
22129 t2 = gen_reg_rtx (V8SImode);
22130 mask = gen_lowpart (V4SImode, mask);
22131 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
22132 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22133 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
22134 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
22135 return;
22137 case V32QImode:
22138 t1 = gen_reg_rtx (V32QImode);
22139 t2 = gen_reg_rtx (V32QImode);
22140 t3 = gen_reg_rtx (V32QImode);
22141 vt2 = GEN_INT (-128);
22142 for (i = 0; i < 32; i++)
22143 vec[i] = vt2;
22144 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22145 vt = force_reg (V32QImode, vt);
22146 for (i = 0; i < 32; i++)
22147 vec[i] = i < 16 ? vt2 : const0_rtx;
22148 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22149 vt2 = force_reg (V32QImode, vt2);
22150 /* From mask create two adjusted masks, which contain the same
22151 bits as mask in the low 7 bits of each vector element.
22152 The first mask will have the most significant bit clear
22153 if it requests element from the same 128-bit lane
22154 and MSB set if it requests element from the other 128-bit lane.
22155 The second mask will have the opposite values of the MSB,
22156 and additionally will have its 128-bit lanes swapped.
22157 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
22158 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
22159 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
22160 stands for other 12 bytes. */
22161 /* The bit whether element is from the same lane or the other
22162 lane is bit 4, so shift it up by 3 to the MSB position. */
22163 t5 = gen_reg_rtx (V4DImode);
22164 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
22165 GEN_INT (3)));
22166 /* Clear MSB bits from the mask just in case it had them set. */
22167 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
22168 /* After this t1 will have MSB set for elements from other lane. */
22169 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
22170 /* Clear bits other than MSB. */
22171 emit_insn (gen_andv32qi3 (t1, t1, vt));
22172 /* Or in the lower bits from mask into t3. */
22173 emit_insn (gen_iorv32qi3 (t3, t1, t2));
22174 /* And invert MSB bits in t1, so MSB is set for elements from the same
22175 lane. */
22176 emit_insn (gen_xorv32qi3 (t1, t1, vt));
22177 /* Swap 128-bit lanes in t3. */
22178 t6 = gen_reg_rtx (V4DImode);
22179 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
22180 const2_rtx, GEN_INT (3),
22181 const0_rtx, const1_rtx));
22182 /* And or in the lower bits from mask into t1. */
22183 emit_insn (gen_iorv32qi3 (t1, t1, t2));
22184 if (one_operand_shuffle)
22186 /* Each of these shuffles will put 0s in places where
22187 element from the other 128-bit lane is needed, otherwise
22188 will shuffle in the requested value. */
22189 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
22190 gen_lowpart (V32QImode, t6)));
22191 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
22192 /* For t3 the 128-bit lanes are swapped again. */
22193 t7 = gen_reg_rtx (V4DImode);
22194 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
22195 const2_rtx, GEN_INT (3),
22196 const0_rtx, const1_rtx));
22197 /* And oring both together leads to the result. */
22198 emit_insn (gen_iorv32qi3 (target, t1,
22199 gen_lowpart (V32QImode, t7)));
22200 if (target != operands[0])
22201 emit_move_insn (operands[0],
22202 gen_lowpart (GET_MODE (operands[0]), target));
22203 return;
22206 t4 = gen_reg_rtx (V32QImode);
22207 /* Similarly to the above one_operand_shuffle code,
22208 just for repeated twice for each operand. merge_two:
22209 code will merge the two results together. */
22210 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
22211 gen_lowpart (V32QImode, t6)));
22212 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
22213 gen_lowpart (V32QImode, t6)));
22214 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
22215 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
22216 t7 = gen_reg_rtx (V4DImode);
22217 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
22218 const2_rtx, GEN_INT (3),
22219 const0_rtx, const1_rtx));
22220 t8 = gen_reg_rtx (V4DImode);
22221 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
22222 const2_rtx, GEN_INT (3),
22223 const0_rtx, const1_rtx));
22224 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
22225 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
22226 t1 = t4;
22227 t2 = t3;
22228 goto merge_two;
22230 default:
22231 gcc_assert (GET_MODE_SIZE (mode) <= 16);
22232 break;
22236 if (TARGET_XOP)
22238 /* The XOP VPPERM insn supports three inputs. By ignoring the
22239 one_operand_shuffle special case, we avoid creating another
22240 set of constant vectors in memory. */
22241 one_operand_shuffle = false;
22243 /* mask = mask & {2*w-1, ...} */
22244 vt = GEN_INT (2*w - 1);
22246 else
22248 /* mask = mask & {w-1, ...} */
22249 vt = GEN_INT (w - 1);
22252 for (i = 0; i < w; i++)
22253 vec[i] = vt;
22254 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22255 mask = expand_simple_binop (maskmode, AND, mask, vt,
22256 NULL_RTX, 0, OPTAB_DIRECT);
22258 /* For non-QImode operations, convert the word permutation control
22259 into a byte permutation control. */
22260 if (mode != V16QImode)
22262 mask = expand_simple_binop (maskmode, ASHIFT, mask,
22263 GEN_INT (exact_log2 (e)),
22264 NULL_RTX, 0, OPTAB_DIRECT);
22266 /* Convert mask to vector of chars. */
22267 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
22269 /* Replicate each of the input bytes into byte positions:
22270 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
22271 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
22272 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
22273 for (i = 0; i < 16; ++i)
22274 vec[i] = GEN_INT (i/e * e);
22275 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22276 vt = validize_mem (force_const_mem (V16QImode, vt));
22277 if (TARGET_XOP)
22278 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
22279 else
22280 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
22282 /* Convert it into the byte positions by doing
22283 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
22284 for (i = 0; i < 16; ++i)
22285 vec[i] = GEN_INT (i % e);
22286 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22287 vt = validize_mem (force_const_mem (V16QImode, vt));
22288 emit_insn (gen_addv16qi3 (mask, mask, vt));
22291 /* The actual shuffle operations all operate on V16QImode. */
22292 op0 = gen_lowpart (V16QImode, op0);
22293 op1 = gen_lowpart (V16QImode, op1);
22295 if (TARGET_XOP)
22297 if (GET_MODE (target) != V16QImode)
22298 target = gen_reg_rtx (V16QImode);
22299 emit_insn (gen_xop_pperm (target, op0, op1, mask));
22300 if (target != operands[0])
22301 emit_move_insn (operands[0],
22302 gen_lowpart (GET_MODE (operands[0]), target));
22304 else if (one_operand_shuffle)
22306 if (GET_MODE (target) != V16QImode)
22307 target = gen_reg_rtx (V16QImode);
22308 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
22309 if (target != operands[0])
22310 emit_move_insn (operands[0],
22311 gen_lowpart (GET_MODE (operands[0]), target));
22313 else
22315 rtx xops[6];
22316 bool ok;
22318 /* Shuffle the two input vectors independently. */
22319 t1 = gen_reg_rtx (V16QImode);
22320 t2 = gen_reg_rtx (V16QImode);
22321 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
22322 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
22324 merge_two:
22325 /* Then merge them together. The key is whether any given control
22326 element contained a bit set that indicates the second word. */
22327 mask = operands[3];
22328 vt = GEN_INT (w);
22329 if (maskmode == V2DImode && !TARGET_SSE4_1)
22331 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
22332 more shuffle to convert the V2DI input mask into a V4SI
22333 input mask. At which point the masking that expand_int_vcond
22334 will work as desired. */
22335 rtx t3 = gen_reg_rtx (V4SImode);
22336 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
22337 const0_rtx, const0_rtx,
22338 const2_rtx, const2_rtx));
22339 mask = t3;
22340 maskmode = V4SImode;
22341 e = w = 4;
22344 for (i = 0; i < w; i++)
22345 vec[i] = vt;
22346 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22347 vt = force_reg (maskmode, vt);
22348 mask = expand_simple_binop (maskmode, AND, mask, vt,
22349 NULL_RTX, 0, OPTAB_DIRECT);
22351 if (GET_MODE (target) != mode)
22352 target = gen_reg_rtx (mode);
22353 xops[0] = target;
22354 xops[1] = gen_lowpart (mode, t2);
22355 xops[2] = gen_lowpart (mode, t1);
22356 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
22357 xops[4] = mask;
22358 xops[5] = vt;
22359 ok = ix86_expand_int_vcond (xops);
22360 gcc_assert (ok);
22361 if (target != operands[0])
22362 emit_move_insn (operands[0],
22363 gen_lowpart (GET_MODE (operands[0]), target));
22367 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
22368 true if we should do zero extension, else sign extension. HIGH_P is
22369 true if we want the N/2 high elements, else the low elements. */
22371 void
22372 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
22374 machine_mode imode = GET_MODE (src);
22375 rtx tmp;
22377 if (TARGET_SSE4_1)
22379 rtx (*unpack)(rtx, rtx);
22380 rtx (*extract)(rtx, rtx) = NULL;
22381 machine_mode halfmode = BLKmode;
22383 switch (imode)
22385 case V64QImode:
22386 if (unsigned_p)
22387 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
22388 else
22389 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
22390 halfmode = V32QImode;
22391 extract
22392 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
22393 break;
22394 case V32QImode:
22395 if (unsigned_p)
22396 unpack = gen_avx2_zero_extendv16qiv16hi2;
22397 else
22398 unpack = gen_avx2_sign_extendv16qiv16hi2;
22399 halfmode = V16QImode;
22400 extract
22401 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22402 break;
22403 case V32HImode:
22404 if (unsigned_p)
22405 unpack = gen_avx512f_zero_extendv16hiv16si2;
22406 else
22407 unpack = gen_avx512f_sign_extendv16hiv16si2;
22408 halfmode = V16HImode;
22409 extract
22410 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22411 break;
22412 case V16HImode:
22413 if (unsigned_p)
22414 unpack = gen_avx2_zero_extendv8hiv8si2;
22415 else
22416 unpack = gen_avx2_sign_extendv8hiv8si2;
22417 halfmode = V8HImode;
22418 extract
22419 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22420 break;
22421 case V16SImode:
22422 if (unsigned_p)
22423 unpack = gen_avx512f_zero_extendv8siv8di2;
22424 else
22425 unpack = gen_avx512f_sign_extendv8siv8di2;
22426 halfmode = V8SImode;
22427 extract
22428 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22429 break;
22430 case V8SImode:
22431 if (unsigned_p)
22432 unpack = gen_avx2_zero_extendv4siv4di2;
22433 else
22434 unpack = gen_avx2_sign_extendv4siv4di2;
22435 halfmode = V4SImode;
22436 extract
22437 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22438 break;
22439 case V16QImode:
22440 if (unsigned_p)
22441 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22442 else
22443 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22444 break;
22445 case V8HImode:
22446 if (unsigned_p)
22447 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22448 else
22449 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22450 break;
22451 case V4SImode:
22452 if (unsigned_p)
22453 unpack = gen_sse4_1_zero_extendv2siv2di2;
22454 else
22455 unpack = gen_sse4_1_sign_extendv2siv2di2;
22456 break;
22457 default:
22458 gcc_unreachable ();
22461 if (GET_MODE_SIZE (imode) >= 32)
22463 tmp = gen_reg_rtx (halfmode);
22464 emit_insn (extract (tmp, src));
22466 else if (high_p)
22468 /* Shift higher 8 bytes to lower 8 bytes. */
22469 tmp = gen_reg_rtx (V1TImode);
22470 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22471 GEN_INT (64)));
22472 tmp = gen_lowpart (imode, tmp);
22474 else
22475 tmp = src;
22477 emit_insn (unpack (dest, tmp));
22479 else
22481 rtx (*unpack)(rtx, rtx, rtx);
22483 switch (imode)
22485 case V16QImode:
22486 if (high_p)
22487 unpack = gen_vec_interleave_highv16qi;
22488 else
22489 unpack = gen_vec_interleave_lowv16qi;
22490 break;
22491 case V8HImode:
22492 if (high_p)
22493 unpack = gen_vec_interleave_highv8hi;
22494 else
22495 unpack = gen_vec_interleave_lowv8hi;
22496 break;
22497 case V4SImode:
22498 if (high_p)
22499 unpack = gen_vec_interleave_highv4si;
22500 else
22501 unpack = gen_vec_interleave_lowv4si;
22502 break;
22503 default:
22504 gcc_unreachable ();
22507 if (unsigned_p)
22508 tmp = force_reg (imode, CONST0_RTX (imode));
22509 else
22510 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22511 src, pc_rtx, pc_rtx);
22513 rtx tmp2 = gen_reg_rtx (imode);
22514 emit_insn (unpack (tmp2, src, tmp));
22515 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22519 /* Expand conditional increment or decrement using adb/sbb instructions.
22520 The default case using setcc followed by the conditional move can be
22521 done by generic code. */
22522 bool
22523 ix86_expand_int_addcc (rtx operands[])
22525 enum rtx_code code = GET_CODE (operands[1]);
22526 rtx flags;
22527 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22528 rtx compare_op;
22529 rtx val = const0_rtx;
22530 bool fpcmp = false;
22531 machine_mode mode;
22532 rtx op0 = XEXP (operands[1], 0);
22533 rtx op1 = XEXP (operands[1], 1);
22535 if (operands[3] != const1_rtx
22536 && operands[3] != constm1_rtx)
22537 return false;
22538 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22539 return false;
22540 code = GET_CODE (compare_op);
22542 flags = XEXP (compare_op, 0);
22544 if (GET_MODE (flags) == CCFPmode
22545 || GET_MODE (flags) == CCFPUmode)
22547 fpcmp = true;
22548 code = ix86_fp_compare_code_to_integer (code);
22551 if (code != LTU)
22553 val = constm1_rtx;
22554 if (fpcmp)
22555 PUT_CODE (compare_op,
22556 reverse_condition_maybe_unordered
22557 (GET_CODE (compare_op)));
22558 else
22559 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22562 mode = GET_MODE (operands[0]);
22564 /* Construct either adc or sbb insn. */
22565 if ((code == LTU) == (operands[3] == constm1_rtx))
22567 switch (mode)
22569 case QImode:
22570 insn = gen_subqi3_carry;
22571 break;
22572 case HImode:
22573 insn = gen_subhi3_carry;
22574 break;
22575 case SImode:
22576 insn = gen_subsi3_carry;
22577 break;
22578 case DImode:
22579 insn = gen_subdi3_carry;
22580 break;
22581 default:
22582 gcc_unreachable ();
22585 else
22587 switch (mode)
22589 case QImode:
22590 insn = gen_addqi3_carry;
22591 break;
22592 case HImode:
22593 insn = gen_addhi3_carry;
22594 break;
22595 case SImode:
22596 insn = gen_addsi3_carry;
22597 break;
22598 case DImode:
22599 insn = gen_adddi3_carry;
22600 break;
22601 default:
22602 gcc_unreachable ();
22605 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22607 return true;
22611 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22612 but works for floating pointer parameters and nonoffsetable memories.
22613 For pushes, it returns just stack offsets; the values will be saved
22614 in the right order. Maximally three parts are generated. */
22616 static int
22617 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
22619 int size;
22621 if (!TARGET_64BIT)
22622 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22623 else
22624 size = (GET_MODE_SIZE (mode) + 4) / 8;
22626 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22627 gcc_assert (size >= 2 && size <= 4);
22629 /* Optimize constant pool reference to immediates. This is used by fp
22630 moves, that force all constants to memory to allow combining. */
22631 if (MEM_P (operand) && MEM_READONLY_P (operand))
22633 rtx tmp = maybe_get_pool_constant (operand);
22634 if (tmp)
22635 operand = tmp;
22638 if (MEM_P (operand) && !offsettable_memref_p (operand))
22640 /* The only non-offsetable memories we handle are pushes. */
22641 int ok = push_operand (operand, VOIDmode);
22643 gcc_assert (ok);
22645 operand = copy_rtx (operand);
22646 PUT_MODE (operand, word_mode);
22647 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22648 return size;
22651 if (GET_CODE (operand) == CONST_VECTOR)
22653 machine_mode imode = int_mode_for_mode (mode);
22654 /* Caution: if we looked through a constant pool memory above,
22655 the operand may actually have a different mode now. That's
22656 ok, since we want to pun this all the way back to an integer. */
22657 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22658 gcc_assert (operand != NULL);
22659 mode = imode;
22662 if (!TARGET_64BIT)
22664 if (mode == DImode)
22665 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22666 else
22668 int i;
22670 if (REG_P (operand))
22672 gcc_assert (reload_completed);
22673 for (i = 0; i < size; i++)
22674 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22676 else if (offsettable_memref_p (operand))
22678 operand = adjust_address (operand, SImode, 0);
22679 parts[0] = operand;
22680 for (i = 1; i < size; i++)
22681 parts[i] = adjust_address (operand, SImode, 4 * i);
22683 else if (CONST_DOUBLE_P (operand))
22685 REAL_VALUE_TYPE r;
22686 long l[4];
22688 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22689 switch (mode)
22691 case TFmode:
22692 real_to_target (l, &r, mode);
22693 parts[3] = gen_int_mode (l[3], SImode);
22694 parts[2] = gen_int_mode (l[2], SImode);
22695 break;
22696 case XFmode:
22697 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22698 long double may not be 80-bit. */
22699 real_to_target (l, &r, mode);
22700 parts[2] = gen_int_mode (l[2], SImode);
22701 break;
22702 case DFmode:
22703 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22704 break;
22705 default:
22706 gcc_unreachable ();
22708 parts[1] = gen_int_mode (l[1], SImode);
22709 parts[0] = gen_int_mode (l[0], SImode);
22711 else
22712 gcc_unreachable ();
22715 else
22717 if (mode == TImode)
22718 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22719 if (mode == XFmode || mode == TFmode)
22721 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22722 if (REG_P (operand))
22724 gcc_assert (reload_completed);
22725 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22726 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22728 else if (offsettable_memref_p (operand))
22730 operand = adjust_address (operand, DImode, 0);
22731 parts[0] = operand;
22732 parts[1] = adjust_address (operand, upper_mode, 8);
22734 else if (CONST_DOUBLE_P (operand))
22736 REAL_VALUE_TYPE r;
22737 long l[4];
22739 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22740 real_to_target (l, &r, mode);
22742 /* real_to_target puts 32-bit pieces in each long. */
22743 parts[0] =
22744 gen_int_mode
22745 ((l[0] & (HOST_WIDE_INT) 0xffffffff)
22746 | ((l[1] & (HOST_WIDE_INT) 0xffffffff) << 32),
22747 DImode);
22749 if (upper_mode == SImode)
22750 parts[1] = gen_int_mode (l[2], SImode);
22751 else
22752 parts[1] =
22753 gen_int_mode
22754 ((l[2] & (HOST_WIDE_INT) 0xffffffff)
22755 | ((l[3] & (HOST_WIDE_INT) 0xffffffff) << 32),
22756 DImode);
22758 else
22759 gcc_unreachable ();
22763 return size;
22766 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22767 Return false when normal moves are needed; true when all required
22768 insns have been emitted. Operands 2-4 contain the input values
22769 int the correct order; operands 5-7 contain the output values. */
22771 void
22772 ix86_split_long_move (rtx operands[])
22774 rtx part[2][4];
22775 int nparts, i, j;
22776 int push = 0;
22777 int collisions = 0;
22778 machine_mode mode = GET_MODE (operands[0]);
22779 bool collisionparts[4];
22781 /* The DFmode expanders may ask us to move double.
22782 For 64bit target this is single move. By hiding the fact
22783 here we simplify i386.md splitters. */
22784 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22786 /* Optimize constant pool reference to immediates. This is used by
22787 fp moves, that force all constants to memory to allow combining. */
22789 if (MEM_P (operands[1])
22790 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22791 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22792 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22793 if (push_operand (operands[0], VOIDmode))
22795 operands[0] = copy_rtx (operands[0]);
22796 PUT_MODE (operands[0], word_mode);
22798 else
22799 operands[0] = gen_lowpart (DImode, operands[0]);
22800 operands[1] = gen_lowpart (DImode, operands[1]);
22801 emit_move_insn (operands[0], operands[1]);
22802 return;
22805 /* The only non-offsettable memory we handle is push. */
22806 if (push_operand (operands[0], VOIDmode))
22807 push = 1;
22808 else
22809 gcc_assert (!MEM_P (operands[0])
22810 || offsettable_memref_p (operands[0]));
22812 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22813 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22815 /* When emitting push, take care for source operands on the stack. */
22816 if (push && MEM_P (operands[1])
22817 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22819 rtx src_base = XEXP (part[1][nparts - 1], 0);
22821 /* Compensate for the stack decrement by 4. */
22822 if (!TARGET_64BIT && nparts == 3
22823 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22824 src_base = plus_constant (Pmode, src_base, 4);
22826 /* src_base refers to the stack pointer and is
22827 automatically decreased by emitted push. */
22828 for (i = 0; i < nparts; i++)
22829 part[1][i] = change_address (part[1][i],
22830 GET_MODE (part[1][i]), src_base);
22833 /* We need to do copy in the right order in case an address register
22834 of the source overlaps the destination. */
22835 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22837 rtx tmp;
22839 for (i = 0; i < nparts; i++)
22841 collisionparts[i]
22842 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22843 if (collisionparts[i])
22844 collisions++;
22847 /* Collision in the middle part can be handled by reordering. */
22848 if (collisions == 1 && nparts == 3 && collisionparts [1])
22850 std::swap (part[0][1], part[0][2]);
22851 std::swap (part[1][1], part[1][2]);
22853 else if (collisions == 1
22854 && nparts == 4
22855 && (collisionparts [1] || collisionparts [2]))
22857 if (collisionparts [1])
22859 std::swap (part[0][1], part[0][2]);
22860 std::swap (part[1][1], part[1][2]);
22862 else
22864 std::swap (part[0][2], part[0][3]);
22865 std::swap (part[1][2], part[1][3]);
22869 /* If there are more collisions, we can't handle it by reordering.
22870 Do an lea to the last part and use only one colliding move. */
22871 else if (collisions > 1)
22873 rtx base, addr, tls_base = NULL_RTX;
22875 collisions = 1;
22877 base = part[0][nparts - 1];
22879 /* Handle the case when the last part isn't valid for lea.
22880 Happens in 64-bit mode storing the 12-byte XFmode. */
22881 if (GET_MODE (base) != Pmode)
22882 base = gen_rtx_REG (Pmode, REGNO (base));
22884 addr = XEXP (part[1][0], 0);
22885 if (TARGET_TLS_DIRECT_SEG_REFS)
22887 struct ix86_address parts;
22888 int ok = ix86_decompose_address (addr, &parts);
22889 gcc_assert (ok);
22890 if (parts.seg == DEFAULT_TLS_SEG_REG)
22892 /* It is not valid to use %gs: or %fs: in
22893 lea though, so we need to remove it from the
22894 address used for lea and add it to each individual
22895 memory loads instead. */
22896 addr = copy_rtx (addr);
22897 rtx *x = &addr;
22898 while (GET_CODE (*x) == PLUS)
22900 for (i = 0; i < 2; i++)
22902 rtx u = XEXP (*x, i);
22903 if (GET_CODE (u) == ZERO_EXTEND)
22904 u = XEXP (u, 0);
22905 if (GET_CODE (u) == UNSPEC
22906 && XINT (u, 1) == UNSPEC_TP)
22908 tls_base = XEXP (*x, i);
22909 *x = XEXP (*x, 1 - i);
22910 break;
22913 if (tls_base)
22914 break;
22915 x = &XEXP (*x, 0);
22917 gcc_assert (tls_base);
22920 emit_insn (gen_rtx_SET (base, addr));
22921 if (tls_base)
22922 base = gen_rtx_PLUS (GET_MODE (base), base, tls_base);
22923 part[1][0] = replace_equiv_address (part[1][0], base);
22924 for (i = 1; i < nparts; i++)
22926 if (tls_base)
22927 base = copy_rtx (base);
22928 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22929 part[1][i] = replace_equiv_address (part[1][i], tmp);
22934 if (push)
22936 if (!TARGET_64BIT)
22938 if (nparts == 3)
22940 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22941 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22942 stack_pointer_rtx, GEN_INT (-4)));
22943 emit_move_insn (part[0][2], part[1][2]);
22945 else if (nparts == 4)
22947 emit_move_insn (part[0][3], part[1][3]);
22948 emit_move_insn (part[0][2], part[1][2]);
22951 else
22953 /* In 64bit mode we don't have 32bit push available. In case this is
22954 register, it is OK - we will just use larger counterpart. We also
22955 retype memory - these comes from attempt to avoid REX prefix on
22956 moving of second half of TFmode value. */
22957 if (GET_MODE (part[1][1]) == SImode)
22959 switch (GET_CODE (part[1][1]))
22961 case MEM:
22962 part[1][1] = adjust_address (part[1][1], DImode, 0);
22963 break;
22965 case REG:
22966 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22967 break;
22969 default:
22970 gcc_unreachable ();
22973 if (GET_MODE (part[1][0]) == SImode)
22974 part[1][0] = part[1][1];
22977 emit_move_insn (part[0][1], part[1][1]);
22978 emit_move_insn (part[0][0], part[1][0]);
22979 return;
22982 /* Choose correct order to not overwrite the source before it is copied. */
22983 if ((REG_P (part[0][0])
22984 && REG_P (part[1][1])
22985 && (REGNO (part[0][0]) == REGNO (part[1][1])
22986 || (nparts == 3
22987 && REGNO (part[0][0]) == REGNO (part[1][2]))
22988 || (nparts == 4
22989 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22990 || (collisions > 0
22991 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22993 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22995 operands[2 + i] = part[0][j];
22996 operands[6 + i] = part[1][j];
22999 else
23001 for (i = 0; i < nparts; i++)
23003 operands[2 + i] = part[0][i];
23004 operands[6 + i] = part[1][i];
23008 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
23009 if (optimize_insn_for_size_p ())
23011 for (j = 0; j < nparts - 1; j++)
23012 if (CONST_INT_P (operands[6 + j])
23013 && operands[6 + j] != const0_rtx
23014 && REG_P (operands[2 + j]))
23015 for (i = j; i < nparts - 1; i++)
23016 if (CONST_INT_P (operands[7 + i])
23017 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
23018 operands[7 + i] = operands[2 + j];
23021 for (i = 0; i < nparts; i++)
23022 emit_move_insn (operands[2 + i], operands[6 + i]);
23024 return;
23027 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
23028 left shift by a constant, either using a single shift or
23029 a sequence of add instructions. */
23031 static void
23032 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
23034 rtx (*insn)(rtx, rtx, rtx);
23036 if (count == 1
23037 || (count * ix86_cost->add <= ix86_cost->shift_const
23038 && !optimize_insn_for_size_p ()))
23040 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
23041 while (count-- > 0)
23042 emit_insn (insn (operand, operand, operand));
23044 else
23046 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23047 emit_insn (insn (operand, operand, GEN_INT (count)));
23051 void
23052 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
23054 rtx (*gen_ashl3)(rtx, rtx, rtx);
23055 rtx (*gen_shld)(rtx, rtx, rtx);
23056 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23058 rtx low[2], high[2];
23059 int count;
23061 if (CONST_INT_P (operands[2]))
23063 split_double_mode (mode, operands, 2, low, high);
23064 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23066 if (count >= half_width)
23068 emit_move_insn (high[0], low[1]);
23069 emit_move_insn (low[0], const0_rtx);
23071 if (count > half_width)
23072 ix86_expand_ashl_const (high[0], count - half_width, mode);
23074 else
23076 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23078 if (!rtx_equal_p (operands[0], operands[1]))
23079 emit_move_insn (operands[0], operands[1]);
23081 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
23082 ix86_expand_ashl_const (low[0], count, mode);
23084 return;
23087 split_double_mode (mode, operands, 1, low, high);
23089 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23091 if (operands[1] == const1_rtx)
23093 /* Assuming we've chosen a QImode capable registers, then 1 << N
23094 can be done with two 32/64-bit shifts, no branches, no cmoves. */
23095 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
23097 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
23099 ix86_expand_clear (low[0]);
23100 ix86_expand_clear (high[0]);
23101 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
23103 d = gen_lowpart (QImode, low[0]);
23104 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23105 s = gen_rtx_EQ (QImode, flags, const0_rtx);
23106 emit_insn (gen_rtx_SET (d, s));
23108 d = gen_lowpart (QImode, high[0]);
23109 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23110 s = gen_rtx_NE (QImode, flags, const0_rtx);
23111 emit_insn (gen_rtx_SET (d, s));
23114 /* Otherwise, we can get the same results by manually performing
23115 a bit extract operation on bit 5/6, and then performing the two
23116 shifts. The two methods of getting 0/1 into low/high are exactly
23117 the same size. Avoiding the shift in the bit extract case helps
23118 pentium4 a bit; no one else seems to care much either way. */
23119 else
23121 machine_mode half_mode;
23122 rtx (*gen_lshr3)(rtx, rtx, rtx);
23123 rtx (*gen_and3)(rtx, rtx, rtx);
23124 rtx (*gen_xor3)(rtx, rtx, rtx);
23125 HOST_WIDE_INT bits;
23126 rtx x;
23128 if (mode == DImode)
23130 half_mode = SImode;
23131 gen_lshr3 = gen_lshrsi3;
23132 gen_and3 = gen_andsi3;
23133 gen_xor3 = gen_xorsi3;
23134 bits = 5;
23136 else
23138 half_mode = DImode;
23139 gen_lshr3 = gen_lshrdi3;
23140 gen_and3 = gen_anddi3;
23141 gen_xor3 = gen_xordi3;
23142 bits = 6;
23145 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
23146 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
23147 else
23148 x = gen_lowpart (half_mode, operands[2]);
23149 emit_insn (gen_rtx_SET (high[0], x));
23151 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
23152 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
23153 emit_move_insn (low[0], high[0]);
23154 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
23157 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23158 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
23159 return;
23162 if (operands[1] == constm1_rtx)
23164 /* For -1 << N, we can avoid the shld instruction, because we
23165 know that we're shifting 0...31/63 ones into a -1. */
23166 emit_move_insn (low[0], constm1_rtx);
23167 if (optimize_insn_for_size_p ())
23168 emit_move_insn (high[0], low[0]);
23169 else
23170 emit_move_insn (high[0], constm1_rtx);
23172 else
23174 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23176 if (!rtx_equal_p (operands[0], operands[1]))
23177 emit_move_insn (operands[0], operands[1]);
23179 split_double_mode (mode, operands, 1, low, high);
23180 emit_insn (gen_shld (high[0], low[0], operands[2]));
23183 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23185 if (TARGET_CMOVE && scratch)
23187 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23188 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23190 ix86_expand_clear (scratch);
23191 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
23193 else
23195 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23196 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23198 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
23202 void
23203 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
23205 rtx (*gen_ashr3)(rtx, rtx, rtx)
23206 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
23207 rtx (*gen_shrd)(rtx, rtx, rtx);
23208 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23210 rtx low[2], high[2];
23211 int count;
23213 if (CONST_INT_P (operands[2]))
23215 split_double_mode (mode, operands, 2, low, high);
23216 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23218 if (count == GET_MODE_BITSIZE (mode) - 1)
23220 emit_move_insn (high[0], high[1]);
23221 emit_insn (gen_ashr3 (high[0], high[0],
23222 GEN_INT (half_width - 1)));
23223 emit_move_insn (low[0], high[0]);
23226 else if (count >= half_width)
23228 emit_move_insn (low[0], high[1]);
23229 emit_move_insn (high[0], low[0]);
23230 emit_insn (gen_ashr3 (high[0], high[0],
23231 GEN_INT (half_width - 1)));
23233 if (count > half_width)
23234 emit_insn (gen_ashr3 (low[0], low[0],
23235 GEN_INT (count - half_width)));
23237 else
23239 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23241 if (!rtx_equal_p (operands[0], operands[1]))
23242 emit_move_insn (operands[0], operands[1]);
23244 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23245 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
23248 else
23250 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23252 if (!rtx_equal_p (operands[0], operands[1]))
23253 emit_move_insn (operands[0], operands[1]);
23255 split_double_mode (mode, operands, 1, low, high);
23257 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23258 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
23260 if (TARGET_CMOVE && scratch)
23262 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23263 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23265 emit_move_insn (scratch, high[0]);
23266 emit_insn (gen_ashr3 (scratch, scratch,
23267 GEN_INT (half_width - 1)));
23268 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23269 scratch));
23271 else
23273 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
23274 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
23276 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
23281 void
23282 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
23284 rtx (*gen_lshr3)(rtx, rtx, rtx)
23285 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
23286 rtx (*gen_shrd)(rtx, rtx, rtx);
23287 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23289 rtx low[2], high[2];
23290 int count;
23292 if (CONST_INT_P (operands[2]))
23294 split_double_mode (mode, operands, 2, low, high);
23295 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23297 if (count >= half_width)
23299 emit_move_insn (low[0], high[1]);
23300 ix86_expand_clear (high[0]);
23302 if (count > half_width)
23303 emit_insn (gen_lshr3 (low[0], low[0],
23304 GEN_INT (count - half_width)));
23306 else
23308 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23310 if (!rtx_equal_p (operands[0], operands[1]))
23311 emit_move_insn (operands[0], operands[1]);
23313 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23314 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
23317 else
23319 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23321 if (!rtx_equal_p (operands[0], operands[1]))
23322 emit_move_insn (operands[0], operands[1]);
23324 split_double_mode (mode, operands, 1, low, high);
23326 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23327 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
23329 if (TARGET_CMOVE && scratch)
23331 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23332 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23334 ix86_expand_clear (scratch);
23335 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23336 scratch));
23338 else
23340 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23341 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23343 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
23348 /* Predict just emitted jump instruction to be taken with probability PROB. */
23349 static void
23350 predict_jump (int prob)
23352 rtx insn = get_last_insn ();
23353 gcc_assert (JUMP_P (insn));
23354 add_int_reg_note (insn, REG_BR_PROB, prob);
23357 /* Helper function for the string operations below. Dest VARIABLE whether
23358 it is aligned to VALUE bytes. If true, jump to the label. */
23359 static rtx_code_label *
23360 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
23362 rtx_code_label *label = gen_label_rtx ();
23363 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
23364 if (GET_MODE (variable) == DImode)
23365 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
23366 else
23367 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
23368 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
23369 1, label);
23370 if (epilogue)
23371 predict_jump (REG_BR_PROB_BASE * 50 / 100);
23372 else
23373 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23374 return label;
23377 /* Adjust COUNTER by the VALUE. */
23378 static void
23379 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
23381 rtx (*gen_add)(rtx, rtx, rtx)
23382 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
23384 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
23387 /* Zero extend possibly SImode EXP to Pmode register. */
23389 ix86_zero_extend_to_Pmode (rtx exp)
23391 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
23394 /* Divide COUNTREG by SCALE. */
23395 static rtx
23396 scale_counter (rtx countreg, int scale)
23398 rtx sc;
23400 if (scale == 1)
23401 return countreg;
23402 if (CONST_INT_P (countreg))
23403 return GEN_INT (INTVAL (countreg) / scale);
23404 gcc_assert (REG_P (countreg));
23406 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
23407 GEN_INT (exact_log2 (scale)),
23408 NULL, 1, OPTAB_DIRECT);
23409 return sc;
23412 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
23413 DImode for constant loop counts. */
23415 static machine_mode
23416 counter_mode (rtx count_exp)
23418 if (GET_MODE (count_exp) != VOIDmode)
23419 return GET_MODE (count_exp);
23420 if (!CONST_INT_P (count_exp))
23421 return Pmode;
23422 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
23423 return DImode;
23424 return SImode;
23427 /* Copy the address to a Pmode register. This is used for x32 to
23428 truncate DImode TLS address to a SImode register. */
23430 static rtx
23431 ix86_copy_addr_to_reg (rtx addr)
23433 rtx reg;
23434 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23436 reg = copy_addr_to_reg (addr);
23437 REG_POINTER (reg) = 1;
23438 return reg;
23440 else
23442 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23443 reg = copy_to_mode_reg (DImode, addr);
23444 REG_POINTER (reg) = 1;
23445 return gen_rtx_SUBREG (SImode, reg, 0);
23449 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23450 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23451 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
23452 memory by VALUE (supposed to be in MODE).
23454 The size is rounded down to whole number of chunk size moved at once.
23455 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
23458 static void
23459 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23460 rtx destptr, rtx srcptr, rtx value,
23461 rtx count, machine_mode mode, int unroll,
23462 int expected_size, bool issetmem)
23464 rtx_code_label *out_label, *top_label;
23465 rtx iter, tmp;
23466 machine_mode iter_mode = counter_mode (count);
23467 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23468 rtx piece_size = GEN_INT (piece_size_n);
23469 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23470 rtx size;
23471 int i;
23473 top_label = gen_label_rtx ();
23474 out_label = gen_label_rtx ();
23475 iter = gen_reg_rtx (iter_mode);
23477 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23478 NULL, 1, OPTAB_DIRECT);
23479 /* Those two should combine. */
23480 if (piece_size == const1_rtx)
23482 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23483 true, out_label);
23484 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23486 emit_move_insn (iter, const0_rtx);
23488 emit_label (top_label);
23490 tmp = convert_modes (Pmode, iter_mode, iter, true);
23492 /* This assert could be relaxed - in this case we'll need to compute
23493 smallest power of two, containing in PIECE_SIZE_N and pass it to
23494 offset_address. */
23495 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23496 destmem = offset_address (destmem, tmp, piece_size_n);
23497 destmem = adjust_address (destmem, mode, 0);
23499 if (!issetmem)
23501 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23502 srcmem = adjust_address (srcmem, mode, 0);
23504 /* When unrolling for chips that reorder memory reads and writes,
23505 we can save registers by using single temporary.
23506 Also using 4 temporaries is overkill in 32bit mode. */
23507 if (!TARGET_64BIT && 0)
23509 for (i = 0; i < unroll; i++)
23511 if (i)
23513 destmem =
23514 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23515 srcmem =
23516 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23518 emit_move_insn (destmem, srcmem);
23521 else
23523 rtx tmpreg[4];
23524 gcc_assert (unroll <= 4);
23525 for (i = 0; i < unroll; i++)
23527 tmpreg[i] = gen_reg_rtx (mode);
23528 if (i)
23530 srcmem =
23531 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23533 emit_move_insn (tmpreg[i], srcmem);
23535 for (i = 0; i < unroll; i++)
23537 if (i)
23539 destmem =
23540 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23542 emit_move_insn (destmem, tmpreg[i]);
23546 else
23547 for (i = 0; i < unroll; i++)
23549 if (i)
23550 destmem =
23551 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23552 emit_move_insn (destmem, value);
23555 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23556 true, OPTAB_LIB_WIDEN);
23557 if (tmp != iter)
23558 emit_move_insn (iter, tmp);
23560 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23561 true, top_label);
23562 if (expected_size != -1)
23564 expected_size /= GET_MODE_SIZE (mode) * unroll;
23565 if (expected_size == 0)
23566 predict_jump (0);
23567 else if (expected_size > REG_BR_PROB_BASE)
23568 predict_jump (REG_BR_PROB_BASE - 1);
23569 else
23570 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23572 else
23573 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23574 iter = ix86_zero_extend_to_Pmode (iter);
23575 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23576 true, OPTAB_LIB_WIDEN);
23577 if (tmp != destptr)
23578 emit_move_insn (destptr, tmp);
23579 if (!issetmem)
23581 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23582 true, OPTAB_LIB_WIDEN);
23583 if (tmp != srcptr)
23584 emit_move_insn (srcptr, tmp);
23586 emit_label (out_label);
23589 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23590 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23591 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23592 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23593 ORIG_VALUE is the original value passed to memset to fill the memory with.
23594 Other arguments have same meaning as for previous function. */
23596 static void
23597 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23598 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23599 rtx count,
23600 machine_mode mode, bool issetmem)
23602 rtx destexp;
23603 rtx srcexp;
23604 rtx countreg;
23605 HOST_WIDE_INT rounded_count;
23607 /* If possible, it is shorter to use rep movs.
23608 TODO: Maybe it is better to move this logic to decide_alg. */
23609 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23610 && (!issetmem || orig_value == const0_rtx))
23611 mode = SImode;
23613 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23614 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23616 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23617 GET_MODE_SIZE (mode)));
23618 if (mode != QImode)
23620 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23621 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23622 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23624 else
23625 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23626 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23628 rounded_count = (INTVAL (count)
23629 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23630 destmem = shallow_copy_rtx (destmem);
23631 set_mem_size (destmem, rounded_count);
23633 else if (MEM_SIZE_KNOWN_P (destmem))
23634 clear_mem_size (destmem);
23636 if (issetmem)
23638 value = force_reg (mode, gen_lowpart (mode, value));
23639 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23641 else
23643 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23644 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23645 if (mode != QImode)
23647 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23648 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23649 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23651 else
23652 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23653 if (CONST_INT_P (count))
23655 rounded_count = (INTVAL (count)
23656 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23657 srcmem = shallow_copy_rtx (srcmem);
23658 set_mem_size (srcmem, rounded_count);
23660 else
23662 if (MEM_SIZE_KNOWN_P (srcmem))
23663 clear_mem_size (srcmem);
23665 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23666 destexp, srcexp));
23670 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23671 DESTMEM.
23672 SRC is passed by pointer to be updated on return.
23673 Return value is updated DST. */
23674 static rtx
23675 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23676 HOST_WIDE_INT size_to_move)
23678 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23679 enum insn_code code;
23680 machine_mode move_mode;
23681 int piece_size, i;
23683 /* Find the widest mode in which we could perform moves.
23684 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23685 it until move of such size is supported. */
23686 piece_size = 1 << floor_log2 (size_to_move);
23687 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23688 code = optab_handler (mov_optab, move_mode);
23689 while (code == CODE_FOR_nothing && piece_size > 1)
23691 piece_size >>= 1;
23692 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23693 code = optab_handler (mov_optab, move_mode);
23696 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23697 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23698 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23700 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23701 move_mode = mode_for_vector (word_mode, nunits);
23702 code = optab_handler (mov_optab, move_mode);
23703 if (code == CODE_FOR_nothing)
23705 move_mode = word_mode;
23706 piece_size = GET_MODE_SIZE (move_mode);
23707 code = optab_handler (mov_optab, move_mode);
23710 gcc_assert (code != CODE_FOR_nothing);
23712 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23713 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23715 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23716 gcc_assert (size_to_move % piece_size == 0);
23717 adjust = GEN_INT (piece_size);
23718 for (i = 0; i < size_to_move; i += piece_size)
23720 /* We move from memory to memory, so we'll need to do it via
23721 a temporary register. */
23722 tempreg = gen_reg_rtx (move_mode);
23723 emit_insn (GEN_FCN (code) (tempreg, src));
23724 emit_insn (GEN_FCN (code) (dst, tempreg));
23726 emit_move_insn (destptr,
23727 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23728 emit_move_insn (srcptr,
23729 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23731 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23732 piece_size);
23733 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23734 piece_size);
23737 /* Update DST and SRC rtx. */
23738 *srcmem = src;
23739 return dst;
23742 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23743 static void
23744 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23745 rtx destptr, rtx srcptr, rtx count, int max_size)
23747 rtx src, dest;
23748 if (CONST_INT_P (count))
23750 HOST_WIDE_INT countval = INTVAL (count);
23751 HOST_WIDE_INT epilogue_size = countval % max_size;
23752 int i;
23754 /* For now MAX_SIZE should be a power of 2. This assert could be
23755 relaxed, but it'll require a bit more complicated epilogue
23756 expanding. */
23757 gcc_assert ((max_size & (max_size - 1)) == 0);
23758 for (i = max_size; i >= 1; i >>= 1)
23760 if (epilogue_size & i)
23761 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23763 return;
23765 if (max_size > 8)
23767 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23768 count, 1, OPTAB_DIRECT);
23769 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23770 count, QImode, 1, 4, false);
23771 return;
23774 /* When there are stringops, we can cheaply increase dest and src pointers.
23775 Otherwise we save code size by maintaining offset (zero is readily
23776 available from preceding rep operation) and using x86 addressing modes.
23778 if (TARGET_SINGLE_STRINGOP)
23780 if (max_size > 4)
23782 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23783 src = change_address (srcmem, SImode, srcptr);
23784 dest = change_address (destmem, SImode, destptr);
23785 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23786 emit_label (label);
23787 LABEL_NUSES (label) = 1;
23789 if (max_size > 2)
23791 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23792 src = change_address (srcmem, HImode, srcptr);
23793 dest = change_address (destmem, HImode, destptr);
23794 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23795 emit_label (label);
23796 LABEL_NUSES (label) = 1;
23798 if (max_size > 1)
23800 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23801 src = change_address (srcmem, QImode, srcptr);
23802 dest = change_address (destmem, QImode, destptr);
23803 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23804 emit_label (label);
23805 LABEL_NUSES (label) = 1;
23808 else
23810 rtx offset = force_reg (Pmode, const0_rtx);
23811 rtx tmp;
23813 if (max_size > 4)
23815 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23816 src = change_address (srcmem, SImode, srcptr);
23817 dest = change_address (destmem, SImode, destptr);
23818 emit_move_insn (dest, src);
23819 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23820 true, OPTAB_LIB_WIDEN);
23821 if (tmp != offset)
23822 emit_move_insn (offset, tmp);
23823 emit_label (label);
23824 LABEL_NUSES (label) = 1;
23826 if (max_size > 2)
23828 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23829 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23830 src = change_address (srcmem, HImode, tmp);
23831 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23832 dest = change_address (destmem, HImode, tmp);
23833 emit_move_insn (dest, src);
23834 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23835 true, OPTAB_LIB_WIDEN);
23836 if (tmp != offset)
23837 emit_move_insn (offset, tmp);
23838 emit_label (label);
23839 LABEL_NUSES (label) = 1;
23841 if (max_size > 1)
23843 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23844 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23845 src = change_address (srcmem, QImode, tmp);
23846 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23847 dest = change_address (destmem, QImode, tmp);
23848 emit_move_insn (dest, src);
23849 emit_label (label);
23850 LABEL_NUSES (label) = 1;
23855 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23856 with value PROMOTED_VAL.
23857 SRC is passed by pointer to be updated on return.
23858 Return value is updated DST. */
23859 static rtx
23860 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23861 HOST_WIDE_INT size_to_move)
23863 rtx dst = destmem, adjust;
23864 enum insn_code code;
23865 machine_mode move_mode;
23866 int piece_size, i;
23868 /* Find the widest mode in which we could perform moves.
23869 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23870 it until move of such size is supported. */
23871 move_mode = GET_MODE (promoted_val);
23872 if (move_mode == VOIDmode)
23873 move_mode = QImode;
23874 if (size_to_move < GET_MODE_SIZE (move_mode))
23876 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23877 promoted_val = gen_lowpart (move_mode, promoted_val);
23879 piece_size = GET_MODE_SIZE (move_mode);
23880 code = optab_handler (mov_optab, move_mode);
23881 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23883 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23885 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23886 gcc_assert (size_to_move % piece_size == 0);
23887 adjust = GEN_INT (piece_size);
23888 for (i = 0; i < size_to_move; i += piece_size)
23890 if (piece_size <= GET_MODE_SIZE (word_mode))
23892 emit_insn (gen_strset (destptr, dst, promoted_val));
23893 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23894 piece_size);
23895 continue;
23898 emit_insn (GEN_FCN (code) (dst, promoted_val));
23900 emit_move_insn (destptr,
23901 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23903 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23904 piece_size);
23907 /* Update DST rtx. */
23908 return dst;
23910 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23911 static void
23912 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23913 rtx count, int max_size)
23915 count =
23916 expand_simple_binop (counter_mode (count), AND, count,
23917 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23918 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23919 gen_lowpart (QImode, value), count, QImode,
23920 1, max_size / 2, true);
23923 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23924 static void
23925 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23926 rtx count, int max_size)
23928 rtx dest;
23930 if (CONST_INT_P (count))
23932 HOST_WIDE_INT countval = INTVAL (count);
23933 HOST_WIDE_INT epilogue_size = countval % max_size;
23934 int i;
23936 /* For now MAX_SIZE should be a power of 2. This assert could be
23937 relaxed, but it'll require a bit more complicated epilogue
23938 expanding. */
23939 gcc_assert ((max_size & (max_size - 1)) == 0);
23940 for (i = max_size; i >= 1; i >>= 1)
23942 if (epilogue_size & i)
23944 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23945 destmem = emit_memset (destmem, destptr, vec_value, i);
23946 else
23947 destmem = emit_memset (destmem, destptr, value, i);
23950 return;
23952 if (max_size > 32)
23954 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23955 return;
23957 if (max_size > 16)
23959 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23960 if (TARGET_64BIT)
23962 dest = change_address (destmem, DImode, destptr);
23963 emit_insn (gen_strset (destptr, dest, value));
23964 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23965 emit_insn (gen_strset (destptr, dest, value));
23967 else
23969 dest = change_address (destmem, SImode, destptr);
23970 emit_insn (gen_strset (destptr, dest, value));
23971 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23972 emit_insn (gen_strset (destptr, dest, value));
23973 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23974 emit_insn (gen_strset (destptr, dest, value));
23975 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23976 emit_insn (gen_strset (destptr, dest, value));
23978 emit_label (label);
23979 LABEL_NUSES (label) = 1;
23981 if (max_size > 8)
23983 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
23984 if (TARGET_64BIT)
23986 dest = change_address (destmem, DImode, destptr);
23987 emit_insn (gen_strset (destptr, dest, value));
23989 else
23991 dest = change_address (destmem, SImode, destptr);
23992 emit_insn (gen_strset (destptr, dest, value));
23993 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23994 emit_insn (gen_strset (destptr, dest, value));
23996 emit_label (label);
23997 LABEL_NUSES (label) = 1;
23999 if (max_size > 4)
24001 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
24002 dest = change_address (destmem, SImode, destptr);
24003 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
24004 emit_label (label);
24005 LABEL_NUSES (label) = 1;
24007 if (max_size > 2)
24009 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
24010 dest = change_address (destmem, HImode, destptr);
24011 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
24012 emit_label (label);
24013 LABEL_NUSES (label) = 1;
24015 if (max_size > 1)
24017 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
24018 dest = change_address (destmem, QImode, destptr);
24019 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
24020 emit_label (label);
24021 LABEL_NUSES (label) = 1;
24025 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
24026 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
24027 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
24028 ignored.
24029 Return value is updated DESTMEM. */
24030 static rtx
24031 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
24032 rtx destptr, rtx srcptr, rtx value,
24033 rtx vec_value, rtx count, int align,
24034 int desired_alignment, bool issetmem)
24036 int i;
24037 for (i = 1; i < desired_alignment; i <<= 1)
24039 if (align <= i)
24041 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
24042 if (issetmem)
24044 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
24045 destmem = emit_memset (destmem, destptr, vec_value, i);
24046 else
24047 destmem = emit_memset (destmem, destptr, value, i);
24049 else
24050 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
24051 ix86_adjust_counter (count, i);
24052 emit_label (label);
24053 LABEL_NUSES (label) = 1;
24054 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
24057 return destmem;
24060 /* Test if COUNT&SIZE is nonzero and if so, expand movme
24061 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
24062 and jump to DONE_LABEL. */
24063 static void
24064 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
24065 rtx destptr, rtx srcptr,
24066 rtx value, rtx vec_value,
24067 rtx count, int size,
24068 rtx done_label, bool issetmem)
24070 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
24071 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
24072 rtx modesize;
24073 int n;
24075 /* If we do not have vector value to copy, we must reduce size. */
24076 if (issetmem)
24078 if (!vec_value)
24080 if (GET_MODE (value) == VOIDmode && size > 8)
24081 mode = Pmode;
24082 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
24083 mode = GET_MODE (value);
24085 else
24086 mode = GET_MODE (vec_value), value = vec_value;
24088 else
24090 /* Choose appropriate vector mode. */
24091 if (size >= 32)
24092 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
24093 else if (size >= 16)
24094 mode = TARGET_SSE ? V16QImode : DImode;
24095 srcmem = change_address (srcmem, mode, srcptr);
24097 destmem = change_address (destmem, mode, destptr);
24098 modesize = GEN_INT (GET_MODE_SIZE (mode));
24099 gcc_assert (GET_MODE_SIZE (mode) <= size);
24100 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24102 if (issetmem)
24103 emit_move_insn (destmem, gen_lowpart (mode, value));
24104 else
24106 emit_move_insn (destmem, srcmem);
24107 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24109 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24112 destmem = offset_address (destmem, count, 1);
24113 destmem = offset_address (destmem, GEN_INT (-2 * size),
24114 GET_MODE_SIZE (mode));
24115 if (!issetmem)
24117 srcmem = offset_address (srcmem, count, 1);
24118 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
24119 GET_MODE_SIZE (mode));
24121 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24123 if (issetmem)
24124 emit_move_insn (destmem, gen_lowpart (mode, value));
24125 else
24127 emit_move_insn (destmem, srcmem);
24128 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24130 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24132 emit_jump_insn (gen_jump (done_label));
24133 emit_barrier ();
24135 emit_label (label);
24136 LABEL_NUSES (label) = 1;
24139 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
24140 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
24141 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
24142 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
24143 DONE_LABEL is a label after the whole copying sequence. The label is created
24144 on demand if *DONE_LABEL is NULL.
24145 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
24146 bounds after the initial copies.
24148 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
24149 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
24150 we will dispatch to a library call for large blocks.
24152 In pseudocode we do:
24154 if (COUNT < SIZE)
24156 Assume that SIZE is 4. Bigger sizes are handled analogously
24157 if (COUNT & 4)
24159 copy 4 bytes from SRCPTR to DESTPTR
24160 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
24161 goto done_label
24163 if (!COUNT)
24164 goto done_label;
24165 copy 1 byte from SRCPTR to DESTPTR
24166 if (COUNT & 2)
24168 copy 2 bytes from SRCPTR to DESTPTR
24169 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
24172 else
24174 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
24175 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
24177 OLD_DESPTR = DESTPTR;
24178 Align DESTPTR up to DESIRED_ALIGN
24179 SRCPTR += DESTPTR - OLD_DESTPTR
24180 COUNT -= DEST_PTR - OLD_DESTPTR
24181 if (DYNAMIC_CHECK)
24182 Round COUNT down to multiple of SIZE
24183 << optional caller supplied zero size guard is here >>
24184 << optional caller suppplied dynamic check is here >>
24185 << caller supplied main copy loop is here >>
24187 done_label:
24189 static void
24190 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
24191 rtx *destptr, rtx *srcptr,
24192 machine_mode mode,
24193 rtx value, rtx vec_value,
24194 rtx *count,
24195 rtx_code_label **done_label,
24196 int size,
24197 int desired_align,
24198 int align,
24199 unsigned HOST_WIDE_INT *min_size,
24200 bool dynamic_check,
24201 bool issetmem)
24203 rtx_code_label *loop_label = NULL, *label;
24204 int n;
24205 rtx modesize;
24206 int prolog_size = 0;
24207 rtx mode_value;
24209 /* Chose proper value to copy. */
24210 if (issetmem && VECTOR_MODE_P (mode))
24211 mode_value = vec_value;
24212 else
24213 mode_value = value;
24214 gcc_assert (GET_MODE_SIZE (mode) <= size);
24216 /* See if block is big or small, handle small blocks. */
24217 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
24219 int size2 = size;
24220 loop_label = gen_label_rtx ();
24222 if (!*done_label)
24223 *done_label = gen_label_rtx ();
24225 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
24226 1, loop_label);
24227 size2 >>= 1;
24229 /* Handle sizes > 3. */
24230 for (;size2 > 2; size2 >>= 1)
24231 expand_small_movmem_or_setmem (destmem, srcmem,
24232 *destptr, *srcptr,
24233 value, vec_value,
24234 *count,
24235 size2, *done_label, issetmem);
24236 /* Nothing to copy? Jump to DONE_LABEL if so */
24237 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
24238 1, *done_label);
24240 /* Do a byte copy. */
24241 destmem = change_address (destmem, QImode, *destptr);
24242 if (issetmem)
24243 emit_move_insn (destmem, gen_lowpart (QImode, value));
24244 else
24246 srcmem = change_address (srcmem, QImode, *srcptr);
24247 emit_move_insn (destmem, srcmem);
24250 /* Handle sizes 2 and 3. */
24251 label = ix86_expand_aligntest (*count, 2, false);
24252 destmem = change_address (destmem, HImode, *destptr);
24253 destmem = offset_address (destmem, *count, 1);
24254 destmem = offset_address (destmem, GEN_INT (-2), 2);
24255 if (issetmem)
24256 emit_move_insn (destmem, gen_lowpart (HImode, value));
24257 else
24259 srcmem = change_address (srcmem, HImode, *srcptr);
24260 srcmem = offset_address (srcmem, *count, 1);
24261 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
24262 emit_move_insn (destmem, srcmem);
24265 emit_label (label);
24266 LABEL_NUSES (label) = 1;
24267 emit_jump_insn (gen_jump (*done_label));
24268 emit_barrier ();
24270 else
24271 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
24272 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
24274 /* Start memcpy for COUNT >= SIZE. */
24275 if (loop_label)
24277 emit_label (loop_label);
24278 LABEL_NUSES (loop_label) = 1;
24281 /* Copy first desired_align bytes. */
24282 if (!issetmem)
24283 srcmem = change_address (srcmem, mode, *srcptr);
24284 destmem = change_address (destmem, mode, *destptr);
24285 modesize = GEN_INT (GET_MODE_SIZE (mode));
24286 for (n = 0; prolog_size < desired_align - align; n++)
24288 if (issetmem)
24289 emit_move_insn (destmem, mode_value);
24290 else
24292 emit_move_insn (destmem, srcmem);
24293 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24295 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24296 prolog_size += GET_MODE_SIZE (mode);
24300 /* Copy last SIZE bytes. */
24301 destmem = offset_address (destmem, *count, 1);
24302 destmem = offset_address (destmem,
24303 GEN_INT (-size - prolog_size),
24305 if (issetmem)
24306 emit_move_insn (destmem, mode_value);
24307 else
24309 srcmem = offset_address (srcmem, *count, 1);
24310 srcmem = offset_address (srcmem,
24311 GEN_INT (-size - prolog_size),
24313 emit_move_insn (destmem, srcmem);
24315 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
24317 destmem = offset_address (destmem, modesize, 1);
24318 if (issetmem)
24319 emit_move_insn (destmem, mode_value);
24320 else
24322 srcmem = offset_address (srcmem, modesize, 1);
24323 emit_move_insn (destmem, srcmem);
24327 /* Align destination. */
24328 if (desired_align > 1 && desired_align > align)
24330 rtx saveddest = *destptr;
24332 gcc_assert (desired_align <= size);
24333 /* Align destptr up, place it to new register. */
24334 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
24335 GEN_INT (prolog_size),
24336 NULL_RTX, 1, OPTAB_DIRECT);
24337 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
24338 REG_POINTER (*destptr) = 1;
24339 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
24340 GEN_INT (-desired_align),
24341 *destptr, 1, OPTAB_DIRECT);
24342 /* See how many bytes we skipped. */
24343 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
24344 *destptr,
24345 saveddest, 1, OPTAB_DIRECT);
24346 /* Adjust srcptr and count. */
24347 if (!issetmem)
24348 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
24349 saveddest, *srcptr, 1, OPTAB_DIRECT);
24350 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24351 saveddest, *count, 1, OPTAB_DIRECT);
24352 /* We copied at most size + prolog_size. */
24353 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
24354 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
24355 else
24356 *min_size = 0;
24358 /* Our loops always round down the bock size, but for dispatch to library
24359 we need precise value. */
24360 if (dynamic_check)
24361 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
24362 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
24364 else
24366 gcc_assert (prolog_size == 0);
24367 /* Decrease count, so we won't end up copying last word twice. */
24368 if (!CONST_INT_P (*count))
24369 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24370 constm1_rtx, *count, 1, OPTAB_DIRECT);
24371 else
24372 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
24373 if (*min_size)
24374 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
24379 /* This function is like the previous one, except here we know how many bytes
24380 need to be copied. That allows us to update alignment not only of DST, which
24381 is returned, but also of SRC, which is passed as a pointer for that
24382 reason. */
24383 static rtx
24384 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
24385 rtx srcreg, rtx value, rtx vec_value,
24386 int desired_align, int align_bytes,
24387 bool issetmem)
24389 rtx src = NULL;
24390 rtx orig_dst = dst;
24391 rtx orig_src = NULL;
24392 int piece_size = 1;
24393 int copied_bytes = 0;
24395 if (!issetmem)
24397 gcc_assert (srcp != NULL);
24398 src = *srcp;
24399 orig_src = src;
24402 for (piece_size = 1;
24403 piece_size <= desired_align && copied_bytes < align_bytes;
24404 piece_size <<= 1)
24406 if (align_bytes & piece_size)
24408 if (issetmem)
24410 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
24411 dst = emit_memset (dst, destreg, vec_value, piece_size);
24412 else
24413 dst = emit_memset (dst, destreg, value, piece_size);
24415 else
24416 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
24417 copied_bytes += piece_size;
24420 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
24421 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24422 if (MEM_SIZE_KNOWN_P (orig_dst))
24423 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
24425 if (!issetmem)
24427 int src_align_bytes = get_mem_align_offset (src, desired_align
24428 * BITS_PER_UNIT);
24429 if (src_align_bytes >= 0)
24430 src_align_bytes = desired_align - src_align_bytes;
24431 if (src_align_bytes >= 0)
24433 unsigned int src_align;
24434 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
24436 if ((src_align_bytes & (src_align - 1))
24437 == (align_bytes & (src_align - 1)))
24438 break;
24440 if (src_align > (unsigned int) desired_align)
24441 src_align = desired_align;
24442 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24443 set_mem_align (src, src_align * BITS_PER_UNIT);
24445 if (MEM_SIZE_KNOWN_P (orig_src))
24446 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24447 *srcp = src;
24450 return dst;
24453 /* Return true if ALG can be used in current context.
24454 Assume we expand memset if MEMSET is true. */
24455 static bool
24456 alg_usable_p (enum stringop_alg alg, bool memset)
24458 if (alg == no_stringop)
24459 return false;
24460 if (alg == vector_loop)
24461 return TARGET_SSE || TARGET_AVX;
24462 /* Algorithms using the rep prefix want at least edi and ecx;
24463 additionally, memset wants eax and memcpy wants esi. Don't
24464 consider such algorithms if the user has appropriated those
24465 registers for their own purposes. */
24466 if (alg == rep_prefix_1_byte
24467 || alg == rep_prefix_4_byte
24468 || alg == rep_prefix_8_byte)
24469 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24470 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24471 return true;
24474 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
24475 static enum stringop_alg
24476 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24477 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24478 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24480 const struct stringop_algs * algs;
24481 bool optimize_for_speed;
24482 int max = 0;
24483 const struct processor_costs *cost;
24484 int i;
24485 bool any_alg_usable_p = false;
24487 *noalign = false;
24488 *dynamic_check = -1;
24490 /* Even if the string operation call is cold, we still might spend a lot
24491 of time processing large blocks. */
24492 if (optimize_function_for_size_p (cfun)
24493 || (optimize_insn_for_size_p ()
24494 && (max_size < 256
24495 || (expected_size != -1 && expected_size < 256))))
24496 optimize_for_speed = false;
24497 else
24498 optimize_for_speed = true;
24500 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24501 if (memset)
24502 algs = &cost->memset[TARGET_64BIT != 0];
24503 else
24504 algs = &cost->memcpy[TARGET_64BIT != 0];
24506 /* See maximal size for user defined algorithm. */
24507 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24509 enum stringop_alg candidate = algs->size[i].alg;
24510 bool usable = alg_usable_p (candidate, memset);
24511 any_alg_usable_p |= usable;
24513 if (candidate != libcall && candidate && usable)
24514 max = algs->size[i].max;
24517 /* If expected size is not known but max size is small enough
24518 so inline version is a win, set expected size into
24519 the range. */
24520 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24521 && expected_size == -1)
24522 expected_size = min_size / 2 + max_size / 2;
24524 /* If user specified the algorithm, honnor it if possible. */
24525 if (ix86_stringop_alg != no_stringop
24526 && alg_usable_p (ix86_stringop_alg, memset))
24527 return ix86_stringop_alg;
24528 /* rep; movq or rep; movl is the smallest variant. */
24529 else if (!optimize_for_speed)
24531 *noalign = true;
24532 if (!count || (count & 3) || (memset && !zero_memset))
24533 return alg_usable_p (rep_prefix_1_byte, memset)
24534 ? rep_prefix_1_byte : loop_1_byte;
24535 else
24536 return alg_usable_p (rep_prefix_4_byte, memset)
24537 ? rep_prefix_4_byte : loop;
24539 /* Very tiny blocks are best handled via the loop, REP is expensive to
24540 setup. */
24541 else if (expected_size != -1 && expected_size < 4)
24542 return loop_1_byte;
24543 else if (expected_size != -1)
24545 enum stringop_alg alg = libcall;
24546 bool alg_noalign = false;
24547 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24549 /* We get here if the algorithms that were not libcall-based
24550 were rep-prefix based and we are unable to use rep prefixes
24551 based on global register usage. Break out of the loop and
24552 use the heuristic below. */
24553 if (algs->size[i].max == 0)
24554 break;
24555 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24557 enum stringop_alg candidate = algs->size[i].alg;
24559 if (candidate != libcall && alg_usable_p (candidate, memset))
24561 alg = candidate;
24562 alg_noalign = algs->size[i].noalign;
24564 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24565 last non-libcall inline algorithm. */
24566 if (TARGET_INLINE_ALL_STRINGOPS)
24568 /* When the current size is best to be copied by a libcall,
24569 but we are still forced to inline, run the heuristic below
24570 that will pick code for medium sized blocks. */
24571 if (alg != libcall)
24573 *noalign = alg_noalign;
24574 return alg;
24576 else if (!any_alg_usable_p)
24577 break;
24579 else if (alg_usable_p (candidate, memset))
24581 *noalign = algs->size[i].noalign;
24582 return candidate;
24587 /* When asked to inline the call anyway, try to pick meaningful choice.
24588 We look for maximal size of block that is faster to copy by hand and
24589 take blocks of at most of that size guessing that average size will
24590 be roughly half of the block.
24592 If this turns out to be bad, we might simply specify the preferred
24593 choice in ix86_costs. */
24594 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24595 && (algs->unknown_size == libcall
24596 || !alg_usable_p (algs->unknown_size, memset)))
24598 enum stringop_alg alg;
24600 /* If there aren't any usable algorithms, then recursing on
24601 smaller sizes isn't going to find anything. Just return the
24602 simple byte-at-a-time copy loop. */
24603 if (!any_alg_usable_p)
24605 /* Pick something reasonable. */
24606 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24607 *dynamic_check = 128;
24608 return loop_1_byte;
24610 if (max <= 0)
24611 max = 4096;
24612 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24613 zero_memset, dynamic_check, noalign);
24614 gcc_assert (*dynamic_check == -1);
24615 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24616 *dynamic_check = max;
24617 else
24618 gcc_assert (alg != libcall);
24619 return alg;
24621 return (alg_usable_p (algs->unknown_size, memset)
24622 ? algs->unknown_size : libcall);
24625 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24626 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24627 static int
24628 decide_alignment (int align,
24629 enum stringop_alg alg,
24630 int expected_size,
24631 machine_mode move_mode)
24633 int desired_align = 0;
24635 gcc_assert (alg != no_stringop);
24637 if (alg == libcall)
24638 return 0;
24639 if (move_mode == VOIDmode)
24640 return 0;
24642 desired_align = GET_MODE_SIZE (move_mode);
24643 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24644 copying whole cacheline at once. */
24645 if (TARGET_PENTIUMPRO
24646 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24647 desired_align = 8;
24649 if (optimize_size)
24650 desired_align = 1;
24651 if (desired_align < align)
24652 desired_align = align;
24653 if (expected_size != -1 && expected_size < 4)
24654 desired_align = align;
24656 return desired_align;
24660 /* Helper function for memcpy. For QImode value 0xXY produce
24661 0xXYXYXYXY of wide specified by MODE. This is essentially
24662 a * 0x10101010, but we can do slightly better than
24663 synth_mult by unwinding the sequence by hand on CPUs with
24664 slow multiply. */
24665 static rtx
24666 promote_duplicated_reg (machine_mode mode, rtx val)
24668 machine_mode valmode = GET_MODE (val);
24669 rtx tmp;
24670 int nops = mode == DImode ? 3 : 2;
24672 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24673 if (val == const0_rtx)
24674 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24675 if (CONST_INT_P (val))
24677 HOST_WIDE_INT v = INTVAL (val) & 255;
24679 v |= v << 8;
24680 v |= v << 16;
24681 if (mode == DImode)
24682 v |= (v << 16) << 16;
24683 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24686 if (valmode == VOIDmode)
24687 valmode = QImode;
24688 if (valmode != QImode)
24689 val = gen_lowpart (QImode, val);
24690 if (mode == QImode)
24691 return val;
24692 if (!TARGET_PARTIAL_REG_STALL)
24693 nops--;
24694 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24695 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24696 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24697 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24699 rtx reg = convert_modes (mode, QImode, val, true);
24700 tmp = promote_duplicated_reg (mode, const1_rtx);
24701 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24702 OPTAB_DIRECT);
24704 else
24706 rtx reg = convert_modes (mode, QImode, val, true);
24708 if (!TARGET_PARTIAL_REG_STALL)
24709 if (mode == SImode)
24710 emit_insn (gen_movsi_insv_1 (reg, reg));
24711 else
24712 emit_insn (gen_movdi_insv_1 (reg, reg));
24713 else
24715 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24716 NULL, 1, OPTAB_DIRECT);
24717 reg =
24718 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24720 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24721 NULL, 1, OPTAB_DIRECT);
24722 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24723 if (mode == SImode)
24724 return reg;
24725 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24726 NULL, 1, OPTAB_DIRECT);
24727 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24728 return reg;
24732 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24733 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24734 alignment from ALIGN to DESIRED_ALIGN. */
24735 static rtx
24736 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24737 int align)
24739 rtx promoted_val;
24741 if (TARGET_64BIT
24742 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24743 promoted_val = promote_duplicated_reg (DImode, val);
24744 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24745 promoted_val = promote_duplicated_reg (SImode, val);
24746 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24747 promoted_val = promote_duplicated_reg (HImode, val);
24748 else
24749 promoted_val = val;
24751 return promoted_val;
24754 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24755 operations when profitable. The code depends upon architecture, block size
24756 and alignment, but always has one of the following overall structures:
24758 Aligned move sequence:
24760 1) Prologue guard: Conditional that jumps up to epilogues for small
24761 blocks that can be handled by epilogue alone. This is faster
24762 but also needed for correctness, since prologue assume the block
24763 is larger than the desired alignment.
24765 Optional dynamic check for size and libcall for large
24766 blocks is emitted here too, with -minline-stringops-dynamically.
24768 2) Prologue: copy first few bytes in order to get destination
24769 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24770 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24771 copied. We emit either a jump tree on power of two sized
24772 blocks, or a byte loop.
24774 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24775 with specified algorithm.
24777 4) Epilogue: code copying tail of the block that is too small to be
24778 handled by main body (or up to size guarded by prologue guard).
24780 Misaligned move sequence
24782 1) missaligned move prologue/epilogue containing:
24783 a) Prologue handling small memory blocks and jumping to done_label
24784 (skipped if blocks are known to be large enough)
24785 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24786 needed by single possibly misaligned move
24787 (skipped if alignment is not needed)
24788 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24790 2) Zero size guard dispatching to done_label, if needed
24792 3) dispatch to library call, if needed,
24794 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24795 with specified algorithm. */
24796 bool
24797 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24798 rtx align_exp, rtx expected_align_exp,
24799 rtx expected_size_exp, rtx min_size_exp,
24800 rtx max_size_exp, rtx probable_max_size_exp,
24801 bool issetmem)
24803 rtx destreg;
24804 rtx srcreg = NULL;
24805 rtx_code_label *label = NULL;
24806 rtx tmp;
24807 rtx_code_label *jump_around_label = NULL;
24808 HOST_WIDE_INT align = 1;
24809 unsigned HOST_WIDE_INT count = 0;
24810 HOST_WIDE_INT expected_size = -1;
24811 int size_needed = 0, epilogue_size_needed;
24812 int desired_align = 0, align_bytes = 0;
24813 enum stringop_alg alg;
24814 rtx promoted_val = NULL;
24815 rtx vec_promoted_val = NULL;
24816 bool force_loopy_epilogue = false;
24817 int dynamic_check;
24818 bool need_zero_guard = false;
24819 bool noalign;
24820 machine_mode move_mode = VOIDmode;
24821 int unroll_factor = 1;
24822 /* TODO: Once value ranges are available, fill in proper data. */
24823 unsigned HOST_WIDE_INT min_size = 0;
24824 unsigned HOST_WIDE_INT max_size = -1;
24825 unsigned HOST_WIDE_INT probable_max_size = -1;
24826 bool misaligned_prologue_used = false;
24828 if (CONST_INT_P (align_exp))
24829 align = INTVAL (align_exp);
24830 /* i386 can do misaligned access on reasonably increased cost. */
24831 if (CONST_INT_P (expected_align_exp)
24832 && INTVAL (expected_align_exp) > align)
24833 align = INTVAL (expected_align_exp);
24834 /* ALIGN is the minimum of destination and source alignment, but we care here
24835 just about destination alignment. */
24836 else if (!issetmem
24837 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24838 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24840 if (CONST_INT_P (count_exp))
24842 min_size = max_size = probable_max_size = count = expected_size
24843 = INTVAL (count_exp);
24844 /* When COUNT is 0, there is nothing to do. */
24845 if (!count)
24846 return true;
24848 else
24850 if (min_size_exp)
24851 min_size = INTVAL (min_size_exp);
24852 if (max_size_exp)
24853 max_size = INTVAL (max_size_exp);
24854 if (probable_max_size_exp)
24855 probable_max_size = INTVAL (probable_max_size_exp);
24856 if (CONST_INT_P (expected_size_exp))
24857 expected_size = INTVAL (expected_size_exp);
24860 /* Make sure we don't need to care about overflow later on. */
24861 if (count > (HOST_WIDE_INT_1U << 30))
24862 return false;
24864 /* Step 0: Decide on preferred algorithm, desired alignment and
24865 size of chunks to be copied by main loop. */
24866 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24867 issetmem,
24868 issetmem && val_exp == const0_rtx,
24869 &dynamic_check, &noalign);
24870 if (alg == libcall)
24871 return false;
24872 gcc_assert (alg != no_stringop);
24874 /* For now vector-version of memset is generated only for memory zeroing, as
24875 creating of promoted vector value is very cheap in this case. */
24876 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24877 alg = unrolled_loop;
24879 if (!count)
24880 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24881 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24882 if (!issetmem)
24883 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24885 unroll_factor = 1;
24886 move_mode = word_mode;
24887 switch (alg)
24889 case libcall:
24890 case no_stringop:
24891 case last_alg:
24892 gcc_unreachable ();
24893 case loop_1_byte:
24894 need_zero_guard = true;
24895 move_mode = QImode;
24896 break;
24897 case loop:
24898 need_zero_guard = true;
24899 break;
24900 case unrolled_loop:
24901 need_zero_guard = true;
24902 unroll_factor = (TARGET_64BIT ? 4 : 2);
24903 break;
24904 case vector_loop:
24905 need_zero_guard = true;
24906 unroll_factor = 4;
24907 /* Find the widest supported mode. */
24908 move_mode = word_mode;
24909 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24910 != CODE_FOR_nothing)
24911 move_mode = GET_MODE_WIDER_MODE (move_mode);
24913 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24914 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24915 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24917 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24918 move_mode = mode_for_vector (word_mode, nunits);
24919 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24920 move_mode = word_mode;
24922 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24923 break;
24924 case rep_prefix_8_byte:
24925 move_mode = DImode;
24926 break;
24927 case rep_prefix_4_byte:
24928 move_mode = SImode;
24929 break;
24930 case rep_prefix_1_byte:
24931 move_mode = QImode;
24932 break;
24934 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24935 epilogue_size_needed = size_needed;
24937 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24938 if (!TARGET_ALIGN_STRINGOPS || noalign)
24939 align = desired_align;
24941 /* Step 1: Prologue guard. */
24943 /* Alignment code needs count to be in register. */
24944 if (CONST_INT_P (count_exp) && desired_align > align)
24946 if (INTVAL (count_exp) > desired_align
24947 && INTVAL (count_exp) > size_needed)
24949 align_bytes
24950 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24951 if (align_bytes <= 0)
24952 align_bytes = 0;
24953 else
24954 align_bytes = desired_align - align_bytes;
24956 if (align_bytes == 0)
24957 count_exp = force_reg (counter_mode (count_exp), count_exp);
24959 gcc_assert (desired_align >= 1 && align >= 1);
24961 /* Misaligned move sequences handle both prologue and epilogue at once.
24962 Default code generation results in a smaller code for large alignments
24963 and also avoids redundant job when sizes are known precisely. */
24964 misaligned_prologue_used
24965 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24966 && MAX (desired_align, epilogue_size_needed) <= 32
24967 && desired_align <= epilogue_size_needed
24968 && ((desired_align > align && !align_bytes)
24969 || (!count && epilogue_size_needed > 1)));
24971 /* Do the cheap promotion to allow better CSE across the
24972 main loop and epilogue (ie one load of the big constant in the
24973 front of all code.
24974 For now the misaligned move sequences do not have fast path
24975 without broadcasting. */
24976 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24978 if (alg == vector_loop)
24980 gcc_assert (val_exp == const0_rtx);
24981 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24982 promoted_val = promote_duplicated_reg_to_size (val_exp,
24983 GET_MODE_SIZE (word_mode),
24984 desired_align, align);
24986 else
24988 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24989 desired_align, align);
24992 /* Misaligned move sequences handles both prologues and epilogues at once.
24993 Default code generation results in smaller code for large alignments and
24994 also avoids redundant job when sizes are known precisely. */
24995 if (misaligned_prologue_used)
24997 /* Misaligned move prologue handled small blocks by itself. */
24998 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24999 (dst, src, &destreg, &srcreg,
25000 move_mode, promoted_val, vec_promoted_val,
25001 &count_exp,
25002 &jump_around_label,
25003 desired_align < align
25004 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
25005 desired_align, align, &min_size, dynamic_check, issetmem);
25006 if (!issetmem)
25007 src = change_address (src, BLKmode, srcreg);
25008 dst = change_address (dst, BLKmode, destreg);
25009 set_mem_align (dst, desired_align * BITS_PER_UNIT);
25010 epilogue_size_needed = 0;
25011 if (need_zero_guard && !min_size)
25013 /* It is possible that we copied enough so the main loop will not
25014 execute. */
25015 gcc_assert (size_needed > 1);
25016 if (jump_around_label == NULL_RTX)
25017 jump_around_label = gen_label_rtx ();
25018 emit_cmp_and_jump_insns (count_exp,
25019 GEN_INT (size_needed),
25020 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
25021 if (expected_size == -1
25022 || expected_size < (desired_align - align) / 2 + size_needed)
25023 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25024 else
25025 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25028 /* Ensure that alignment prologue won't copy past end of block. */
25029 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
25031 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
25032 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
25033 Make sure it is power of 2. */
25034 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
25036 /* To improve performance of small blocks, we jump around the VAL
25037 promoting mode. This mean that if the promoted VAL is not constant,
25038 we might not use it in the epilogue and have to use byte
25039 loop variant. */
25040 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
25041 force_loopy_epilogue = true;
25042 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25043 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25045 /* If main algorithm works on QImode, no epilogue is needed.
25046 For small sizes just don't align anything. */
25047 if (size_needed == 1)
25048 desired_align = align;
25049 else
25050 goto epilogue;
25052 else if (!count
25053 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25055 label = gen_label_rtx ();
25056 emit_cmp_and_jump_insns (count_exp,
25057 GEN_INT (epilogue_size_needed),
25058 LTU, 0, counter_mode (count_exp), 1, label);
25059 if (expected_size == -1 || expected_size < epilogue_size_needed)
25060 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25061 else
25062 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25066 /* Emit code to decide on runtime whether library call or inline should be
25067 used. */
25068 if (dynamic_check != -1)
25070 if (!issetmem && CONST_INT_P (count_exp))
25072 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
25074 emit_block_move_via_libcall (dst, src, count_exp, false);
25075 count_exp = const0_rtx;
25076 goto epilogue;
25079 else
25081 rtx_code_label *hot_label = gen_label_rtx ();
25082 if (jump_around_label == NULL_RTX)
25083 jump_around_label = gen_label_rtx ();
25084 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
25085 LEU, 0, counter_mode (count_exp),
25086 1, hot_label);
25087 predict_jump (REG_BR_PROB_BASE * 90 / 100);
25088 if (issetmem)
25089 set_storage_via_libcall (dst, count_exp, val_exp, false);
25090 else
25091 emit_block_move_via_libcall (dst, src, count_exp, false);
25092 emit_jump (jump_around_label);
25093 emit_label (hot_label);
25097 /* Step 2: Alignment prologue. */
25098 /* Do the expensive promotion once we branched off the small blocks. */
25099 if (issetmem && !promoted_val)
25100 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25101 desired_align, align);
25103 if (desired_align > align && !misaligned_prologue_used)
25105 if (align_bytes == 0)
25107 /* Except for the first move in prologue, we no longer know
25108 constant offset in aliasing info. It don't seems to worth
25109 the pain to maintain it for the first move, so throw away
25110 the info early. */
25111 dst = change_address (dst, BLKmode, destreg);
25112 if (!issetmem)
25113 src = change_address (src, BLKmode, srcreg);
25114 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
25115 promoted_val, vec_promoted_val,
25116 count_exp, align, desired_align,
25117 issetmem);
25118 /* At most desired_align - align bytes are copied. */
25119 if (min_size < (unsigned)(desired_align - align))
25120 min_size = 0;
25121 else
25122 min_size -= desired_align - align;
25124 else
25126 /* If we know how many bytes need to be stored before dst is
25127 sufficiently aligned, maintain aliasing info accurately. */
25128 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
25129 srcreg,
25130 promoted_val,
25131 vec_promoted_val,
25132 desired_align,
25133 align_bytes,
25134 issetmem);
25136 count_exp = plus_constant (counter_mode (count_exp),
25137 count_exp, -align_bytes);
25138 count -= align_bytes;
25139 min_size -= align_bytes;
25140 max_size -= align_bytes;
25142 if (need_zero_guard
25143 && !min_size
25144 && (count < (unsigned HOST_WIDE_INT) size_needed
25145 || (align_bytes == 0
25146 && count < ((unsigned HOST_WIDE_INT) size_needed
25147 + desired_align - align))))
25149 /* It is possible that we copied enough so the main loop will not
25150 execute. */
25151 gcc_assert (size_needed > 1);
25152 if (label == NULL_RTX)
25153 label = gen_label_rtx ();
25154 emit_cmp_and_jump_insns (count_exp,
25155 GEN_INT (size_needed),
25156 LTU, 0, counter_mode (count_exp), 1, label);
25157 if (expected_size == -1
25158 || expected_size < (desired_align - align) / 2 + size_needed)
25159 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25160 else
25161 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25164 if (label && size_needed == 1)
25166 emit_label (label);
25167 LABEL_NUSES (label) = 1;
25168 label = NULL;
25169 epilogue_size_needed = 1;
25170 if (issetmem)
25171 promoted_val = val_exp;
25173 else if (label == NULL_RTX && !misaligned_prologue_used)
25174 epilogue_size_needed = size_needed;
25176 /* Step 3: Main loop. */
25178 switch (alg)
25180 case libcall:
25181 case no_stringop:
25182 case last_alg:
25183 gcc_unreachable ();
25184 case loop_1_byte:
25185 case loop:
25186 case unrolled_loop:
25187 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
25188 count_exp, move_mode, unroll_factor,
25189 expected_size, issetmem);
25190 break;
25191 case vector_loop:
25192 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
25193 vec_promoted_val, count_exp, move_mode,
25194 unroll_factor, expected_size, issetmem);
25195 break;
25196 case rep_prefix_8_byte:
25197 case rep_prefix_4_byte:
25198 case rep_prefix_1_byte:
25199 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
25200 val_exp, count_exp, move_mode, issetmem);
25201 break;
25203 /* Adjust properly the offset of src and dest memory for aliasing. */
25204 if (CONST_INT_P (count_exp))
25206 if (!issetmem)
25207 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
25208 (count / size_needed) * size_needed);
25209 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
25210 (count / size_needed) * size_needed);
25212 else
25214 if (!issetmem)
25215 src = change_address (src, BLKmode, srcreg);
25216 dst = change_address (dst, BLKmode, destreg);
25219 /* Step 4: Epilogue to copy the remaining bytes. */
25220 epilogue:
25221 if (label)
25223 /* When the main loop is done, COUNT_EXP might hold original count,
25224 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
25225 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
25226 bytes. Compensate if needed. */
25228 if (size_needed < epilogue_size_needed)
25230 tmp =
25231 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
25232 GEN_INT (size_needed - 1), count_exp, 1,
25233 OPTAB_DIRECT);
25234 if (tmp != count_exp)
25235 emit_move_insn (count_exp, tmp);
25237 emit_label (label);
25238 LABEL_NUSES (label) = 1;
25241 if (count_exp != const0_rtx && epilogue_size_needed > 1)
25243 if (force_loopy_epilogue)
25244 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
25245 epilogue_size_needed);
25246 else
25248 if (issetmem)
25249 expand_setmem_epilogue (dst, destreg, promoted_val,
25250 vec_promoted_val, count_exp,
25251 epilogue_size_needed);
25252 else
25253 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
25254 epilogue_size_needed);
25257 if (jump_around_label)
25258 emit_label (jump_around_label);
25259 return true;
25263 /* Expand the appropriate insns for doing strlen if not just doing
25264 repnz; scasb
25266 out = result, initialized with the start address
25267 align_rtx = alignment of the address.
25268 scratch = scratch register, initialized with the startaddress when
25269 not aligned, otherwise undefined
25271 This is just the body. It needs the initializations mentioned above and
25272 some address computing at the end. These things are done in i386.md. */
25274 static void
25275 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
25277 int align;
25278 rtx tmp;
25279 rtx_code_label *align_2_label = NULL;
25280 rtx_code_label *align_3_label = NULL;
25281 rtx_code_label *align_4_label = gen_label_rtx ();
25282 rtx_code_label *end_0_label = gen_label_rtx ();
25283 rtx mem;
25284 rtx tmpreg = gen_reg_rtx (SImode);
25285 rtx scratch = gen_reg_rtx (SImode);
25286 rtx cmp;
25288 align = 0;
25289 if (CONST_INT_P (align_rtx))
25290 align = INTVAL (align_rtx);
25292 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
25294 /* Is there a known alignment and is it less than 4? */
25295 if (align < 4)
25297 rtx scratch1 = gen_reg_rtx (Pmode);
25298 emit_move_insn (scratch1, out);
25299 /* Is there a known alignment and is it not 2? */
25300 if (align != 2)
25302 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
25303 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
25305 /* Leave just the 3 lower bits. */
25306 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
25307 NULL_RTX, 0, OPTAB_WIDEN);
25309 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25310 Pmode, 1, align_4_label);
25311 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
25312 Pmode, 1, align_2_label);
25313 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
25314 Pmode, 1, align_3_label);
25316 else
25318 /* Since the alignment is 2, we have to check 2 or 0 bytes;
25319 check if is aligned to 4 - byte. */
25321 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
25322 NULL_RTX, 0, OPTAB_WIDEN);
25324 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25325 Pmode, 1, align_4_label);
25328 mem = change_address (src, QImode, out);
25330 /* Now compare the bytes. */
25332 /* Compare the first n unaligned byte on a byte per byte basis. */
25333 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
25334 QImode, 1, end_0_label);
25336 /* Increment the address. */
25337 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25339 /* Not needed with an alignment of 2 */
25340 if (align != 2)
25342 emit_label (align_2_label);
25344 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25345 end_0_label);
25347 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25349 emit_label (align_3_label);
25352 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25353 end_0_label);
25355 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25358 /* Generate loop to check 4 bytes at a time. It is not a good idea to
25359 align this loop. It gives only huge programs, but does not help to
25360 speed up. */
25361 emit_label (align_4_label);
25363 mem = change_address (src, SImode, out);
25364 emit_move_insn (scratch, mem);
25365 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
25367 /* This formula yields a nonzero result iff one of the bytes is zero.
25368 This saves three branches inside loop and many cycles. */
25370 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
25371 emit_insn (gen_one_cmplsi2 (scratch, scratch));
25372 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
25373 emit_insn (gen_andsi3 (tmpreg, tmpreg,
25374 gen_int_mode (0x80808080, SImode)));
25375 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
25376 align_4_label);
25378 if (TARGET_CMOVE)
25380 rtx reg = gen_reg_rtx (SImode);
25381 rtx reg2 = gen_reg_rtx (Pmode);
25382 emit_move_insn (reg, tmpreg);
25383 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
25385 /* If zero is not in the first two bytes, move two bytes forward. */
25386 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25387 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25388 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25389 emit_insn (gen_rtx_SET (tmpreg,
25390 gen_rtx_IF_THEN_ELSE (SImode, tmp,
25391 reg,
25392 tmpreg)));
25393 /* Emit lea manually to avoid clobbering of flags. */
25394 emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx)));
25396 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25397 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25398 emit_insn (gen_rtx_SET (out,
25399 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
25400 reg2,
25401 out)));
25403 else
25405 rtx_code_label *end_2_label = gen_label_rtx ();
25406 /* Is zero in the first two bytes? */
25408 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25409 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25410 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
25411 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25412 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
25413 pc_rtx);
25414 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
25415 JUMP_LABEL (tmp) = end_2_label;
25417 /* Not in the first two. Move two bytes forward. */
25418 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
25419 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
25421 emit_label (end_2_label);
25425 /* Avoid branch in fixing the byte. */
25426 tmpreg = gen_lowpart (QImode, tmpreg);
25427 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
25428 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
25429 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
25430 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
25432 emit_label (end_0_label);
25435 /* Expand strlen. */
25437 bool
25438 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
25440 rtx addr, scratch1, scratch2, scratch3, scratch4;
25442 /* The generic case of strlen expander is long. Avoid it's
25443 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
25445 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25446 && !TARGET_INLINE_ALL_STRINGOPS
25447 && !optimize_insn_for_size_p ()
25448 && (!CONST_INT_P (align) || INTVAL (align) < 4))
25449 return false;
25451 addr = force_reg (Pmode, XEXP (src, 0));
25452 scratch1 = gen_reg_rtx (Pmode);
25454 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25455 && !optimize_insn_for_size_p ())
25457 /* Well it seems that some optimizer does not combine a call like
25458 foo(strlen(bar), strlen(bar));
25459 when the move and the subtraction is done here. It does calculate
25460 the length just once when these instructions are done inside of
25461 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
25462 often used and I use one fewer register for the lifetime of
25463 output_strlen_unroll() this is better. */
25465 emit_move_insn (out, addr);
25467 ix86_expand_strlensi_unroll_1 (out, src, align);
25469 /* strlensi_unroll_1 returns the address of the zero at the end of
25470 the string, like memchr(), so compute the length by subtracting
25471 the start address. */
25472 emit_insn (ix86_gen_sub3 (out, out, addr));
25474 else
25476 rtx unspec;
25478 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25479 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25480 return false;
25482 scratch2 = gen_reg_rtx (Pmode);
25483 scratch3 = gen_reg_rtx (Pmode);
25484 scratch4 = force_reg (Pmode, constm1_rtx);
25486 emit_move_insn (scratch3, addr);
25487 eoschar = force_reg (QImode, eoschar);
25489 src = replace_equiv_address_nv (src, scratch3);
25491 /* If .md starts supporting :P, this can be done in .md. */
25492 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25493 scratch4), UNSPEC_SCAS);
25494 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25495 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25496 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25498 return true;
25501 /* For given symbol (function) construct code to compute address of it's PLT
25502 entry in large x86-64 PIC model. */
25503 static rtx
25504 construct_plt_address (rtx symbol)
25506 rtx tmp, unspec;
25508 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25509 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25510 gcc_assert (Pmode == DImode);
25512 tmp = gen_reg_rtx (Pmode);
25513 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25515 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25516 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25517 return tmp;
25521 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25522 rtx callarg2,
25523 rtx pop, bool sibcall)
25525 rtx vec[3];
25526 rtx use = NULL, call;
25527 unsigned int vec_len = 0;
25529 if (pop == const0_rtx)
25530 pop = NULL;
25531 gcc_assert (!TARGET_64BIT || !pop);
25533 if (TARGET_MACHO && !TARGET_64BIT)
25535 #if TARGET_MACHO
25536 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25537 fnaddr = machopic_indirect_call_target (fnaddr);
25538 #endif
25540 else
25542 /* Static functions and indirect calls don't need the pic register. Also,
25543 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
25544 it an indirect call. */
25545 if (flag_pic
25546 && (!TARGET_64BIT
25547 || (ix86_cmodel == CM_LARGE_PIC
25548 && DEFAULT_ABI != MS_ABI))
25549 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25550 && !SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))
25551 && flag_plt
25552 && (SYMBOL_REF_DECL ((XEXP (fnaddr, 0))) == NULL_TREE
25553 || !lookup_attribute ("noplt",
25554 DECL_ATTRIBUTES (SYMBOL_REF_DECL (XEXP (fnaddr, 0))))))
25556 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25557 if (ix86_use_pseudo_pic_reg ())
25558 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25559 pic_offset_table_rtx);
25563 /* Skip setting up RAX register for -mskip-rax-setup when there are no
25564 parameters passed in vector registers. */
25565 if (TARGET_64BIT
25566 && (INTVAL (callarg2) > 0
25567 || (INTVAL (callarg2) == 0
25568 && (TARGET_SSE || !flag_skip_rax_setup))))
25570 rtx al = gen_rtx_REG (QImode, AX_REG);
25571 emit_move_insn (al, callarg2);
25572 use_reg (&use, al);
25575 if (ix86_cmodel == CM_LARGE_PIC
25576 && !TARGET_PECOFF
25577 && MEM_P (fnaddr)
25578 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25579 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25580 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25581 else if (sibcall
25582 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25583 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25585 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25586 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25589 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25591 if (retval)
25593 /* We should add bounds as destination register in case
25594 pointer with bounds may be returned. */
25595 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
25597 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
25598 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
25599 if (GET_CODE (retval) == PARALLEL)
25601 b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
25602 b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
25603 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
25604 retval = chkp_join_splitted_slot (retval, par);
25606 else
25608 retval = gen_rtx_PARALLEL (VOIDmode,
25609 gen_rtvec (3, retval, b0, b1));
25610 chkp_put_regs_to_expr_list (retval);
25614 call = gen_rtx_SET (retval, call);
25616 vec[vec_len++] = call;
25618 if (pop)
25620 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25621 pop = gen_rtx_SET (stack_pointer_rtx, pop);
25622 vec[vec_len++] = pop;
25625 if (TARGET_64BIT_MS_ABI
25626 && (!callarg2 || INTVAL (callarg2) != -2))
25628 int const cregs_size
25629 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25630 int i;
25632 for (i = 0; i < cregs_size; i++)
25634 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25635 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25637 clobber_reg (&use, gen_rtx_REG (mode, regno));
25641 if (vec_len > 1)
25642 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25643 call = emit_call_insn (call);
25644 if (use)
25645 CALL_INSN_FUNCTION_USAGE (call) = use;
25647 return call;
25650 /* Return true if the function being called was marked with attribute "noplt"
25651 or using -fno-plt and we are compiling for non-PIC and x86_64. We need to
25652 handle the non-PIC case in the backend because there is no easy interface
25653 for the front-end to force non-PLT calls to use the GOT. This is currently
25654 used only with 64-bit ELF targets to call the function marked "noplt"
25655 indirectly. */
25657 static bool
25658 ix86_nopic_noplt_attribute_p (rtx call_op)
25660 if (flag_pic || ix86_cmodel == CM_LARGE
25661 || !TARGET_64BIT || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
25662 || SYMBOL_REF_LOCAL_P (call_op))
25663 return false;
25665 tree symbol_decl = SYMBOL_REF_DECL (call_op);
25667 if (!flag_plt
25668 || (symbol_decl != NULL_TREE
25669 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
25670 return true;
25672 return false;
25675 /* Output the assembly for a call instruction. */
25677 const char *
25678 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25680 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25681 bool seh_nop_p = false;
25682 const char *xasm;
25684 if (SIBLING_CALL_P (insn))
25686 if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
25687 xasm = "%!jmp\t*%p0@GOTPCREL(%%rip)";
25688 else if (direct_p)
25689 xasm = "%!jmp\t%P0";
25690 /* SEH epilogue detection requires the indirect branch case
25691 to include REX.W. */
25692 else if (TARGET_SEH)
25693 xasm = "%!rex.W jmp %A0";
25694 else
25695 xasm = "%!jmp\t%A0";
25697 output_asm_insn (xasm, &call_op);
25698 return "";
25701 /* SEH unwinding can require an extra nop to be emitted in several
25702 circumstances. Determine if we have one of those. */
25703 if (TARGET_SEH)
25705 rtx_insn *i;
25707 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25709 /* If we get to another real insn, we don't need the nop. */
25710 if (INSN_P (i))
25711 break;
25713 /* If we get to the epilogue note, prevent a catch region from
25714 being adjacent to the standard epilogue sequence. If non-
25715 call-exceptions, we'll have done this during epilogue emission. */
25716 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25717 && !flag_non_call_exceptions
25718 && !can_throw_internal (insn))
25720 seh_nop_p = true;
25721 break;
25725 /* If we didn't find a real insn following the call, prevent the
25726 unwinder from looking into the next function. */
25727 if (i == NULL)
25728 seh_nop_p = true;
25731 if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
25732 xasm = "%!call\t*%p0@GOTPCREL(%%rip)";
25733 else if (direct_p)
25734 xasm = "%!call\t%P0";
25735 else
25736 xasm = "%!call\t%A0";
25738 output_asm_insn (xasm, &call_op);
25740 if (seh_nop_p)
25741 return "nop";
25743 return "";
25746 /* Clear stack slot assignments remembered from previous functions.
25747 This is called from INIT_EXPANDERS once before RTL is emitted for each
25748 function. */
25750 static struct machine_function *
25751 ix86_init_machine_status (void)
25753 struct machine_function *f;
25755 f = ggc_cleared_alloc<machine_function> ();
25756 f->use_fast_prologue_epilogue_nregs = -1;
25757 f->call_abi = ix86_abi;
25759 return f;
25762 /* Return a MEM corresponding to a stack slot with mode MODE.
25763 Allocate a new slot if necessary.
25765 The RTL for a function can have several slots available: N is
25766 which slot to use. */
25769 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
25771 struct stack_local_entry *s;
25773 gcc_assert (n < MAX_386_STACK_LOCALS);
25775 for (s = ix86_stack_locals; s; s = s->next)
25776 if (s->mode == mode && s->n == n)
25777 return validize_mem (copy_rtx (s->rtl));
25779 s = ggc_alloc<stack_local_entry> ();
25780 s->n = n;
25781 s->mode = mode;
25782 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25784 s->next = ix86_stack_locals;
25785 ix86_stack_locals = s;
25786 return validize_mem (copy_rtx (s->rtl));
25789 static void
25790 ix86_instantiate_decls (void)
25792 struct stack_local_entry *s;
25794 for (s = ix86_stack_locals; s; s = s->next)
25795 if (s->rtl != NULL_RTX)
25796 instantiate_decl_rtl (s->rtl);
25799 /* Check whether x86 address PARTS is a pc-relative address. */
25801 static bool
25802 rip_relative_addr_p (struct ix86_address *parts)
25804 rtx base, index, disp;
25806 base = parts->base;
25807 index = parts->index;
25808 disp = parts->disp;
25810 if (disp && !base && !index)
25812 if (TARGET_64BIT)
25814 rtx symbol = disp;
25816 if (GET_CODE (disp) == CONST)
25817 symbol = XEXP (disp, 0);
25818 if (GET_CODE (symbol) == PLUS
25819 && CONST_INT_P (XEXP (symbol, 1)))
25820 symbol = XEXP (symbol, 0);
25822 if (GET_CODE (symbol) == LABEL_REF
25823 || (GET_CODE (symbol) == SYMBOL_REF
25824 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25825 || (GET_CODE (symbol) == UNSPEC
25826 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25827 || XINT (symbol, 1) == UNSPEC_PCREL
25828 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25829 return true;
25832 return false;
25835 /* Calculate the length of the memory address in the instruction encoding.
25836 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25837 or other prefixes. We never generate addr32 prefix for LEA insn. */
25840 memory_address_length (rtx addr, bool lea)
25842 struct ix86_address parts;
25843 rtx base, index, disp;
25844 int len;
25845 int ok;
25847 if (GET_CODE (addr) == PRE_DEC
25848 || GET_CODE (addr) == POST_INC
25849 || GET_CODE (addr) == PRE_MODIFY
25850 || GET_CODE (addr) == POST_MODIFY)
25851 return 0;
25853 ok = ix86_decompose_address (addr, &parts);
25854 gcc_assert (ok);
25856 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25858 /* If this is not LEA instruction, add the length of addr32 prefix. */
25859 if (TARGET_64BIT && !lea
25860 && (SImode_address_operand (addr, VOIDmode)
25861 || (parts.base && GET_MODE (parts.base) == SImode)
25862 || (parts.index && GET_MODE (parts.index) == SImode)))
25863 len++;
25865 base = parts.base;
25866 index = parts.index;
25867 disp = parts.disp;
25869 if (base && GET_CODE (base) == SUBREG)
25870 base = SUBREG_REG (base);
25871 if (index && GET_CODE (index) == SUBREG)
25872 index = SUBREG_REG (index);
25874 gcc_assert (base == NULL_RTX || REG_P (base));
25875 gcc_assert (index == NULL_RTX || REG_P (index));
25877 /* Rule of thumb:
25878 - esp as the base always wants an index,
25879 - ebp as the base always wants a displacement,
25880 - r12 as the base always wants an index,
25881 - r13 as the base always wants a displacement. */
25883 /* Register Indirect. */
25884 if (base && !index && !disp)
25886 /* esp (for its index) and ebp (for its displacement) need
25887 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25888 code. */
25889 if (base == arg_pointer_rtx
25890 || base == frame_pointer_rtx
25891 || REGNO (base) == SP_REG
25892 || REGNO (base) == BP_REG
25893 || REGNO (base) == R12_REG
25894 || REGNO (base) == R13_REG)
25895 len++;
25898 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25899 is not disp32, but disp32(%rip), so for disp32
25900 SIB byte is needed, unless print_operand_address
25901 optimizes it into disp32(%rip) or (%rip) is implied
25902 by UNSPEC. */
25903 else if (disp && !base && !index)
25905 len += 4;
25906 if (rip_relative_addr_p (&parts))
25907 len++;
25909 else
25911 /* Find the length of the displacement constant. */
25912 if (disp)
25914 if (base && satisfies_constraint_K (disp))
25915 len += 1;
25916 else
25917 len += 4;
25919 /* ebp always wants a displacement. Similarly r13. */
25920 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25921 len++;
25923 /* An index requires the two-byte modrm form.... */
25924 if (index
25925 /* ...like esp (or r12), which always wants an index. */
25926 || base == arg_pointer_rtx
25927 || base == frame_pointer_rtx
25928 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25929 len++;
25932 return len;
25935 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25936 is set, expect that insn have 8bit immediate alternative. */
25938 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
25940 int len = 0;
25941 int i;
25942 extract_insn_cached (insn);
25943 for (i = recog_data.n_operands - 1; i >= 0; --i)
25944 if (CONSTANT_P (recog_data.operand[i]))
25946 enum attr_mode mode = get_attr_mode (insn);
25948 gcc_assert (!len);
25949 if (shortform && CONST_INT_P (recog_data.operand[i]))
25951 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25952 switch (mode)
25954 case MODE_QI:
25955 len = 1;
25956 continue;
25957 case MODE_HI:
25958 ival = trunc_int_for_mode (ival, HImode);
25959 break;
25960 case MODE_SI:
25961 ival = trunc_int_for_mode (ival, SImode);
25962 break;
25963 default:
25964 break;
25966 if (IN_RANGE (ival, -128, 127))
25968 len = 1;
25969 continue;
25972 switch (mode)
25974 case MODE_QI:
25975 len = 1;
25976 break;
25977 case MODE_HI:
25978 len = 2;
25979 break;
25980 case MODE_SI:
25981 len = 4;
25982 break;
25983 /* Immediates for DImode instructions are encoded
25984 as 32bit sign extended values. */
25985 case MODE_DI:
25986 len = 4;
25987 break;
25988 default:
25989 fatal_insn ("unknown insn mode", insn);
25992 return len;
25995 /* Compute default value for "length_address" attribute. */
25997 ix86_attr_length_address_default (rtx_insn *insn)
25999 int i;
26001 if (get_attr_type (insn) == TYPE_LEA)
26003 rtx set = PATTERN (insn), addr;
26005 if (GET_CODE (set) == PARALLEL)
26006 set = XVECEXP (set, 0, 0);
26008 gcc_assert (GET_CODE (set) == SET);
26010 addr = SET_SRC (set);
26012 return memory_address_length (addr, true);
26015 extract_insn_cached (insn);
26016 for (i = recog_data.n_operands - 1; i >= 0; --i)
26017 if (MEM_P (recog_data.operand[i]))
26019 constrain_operands_cached (insn, reload_completed);
26020 if (which_alternative != -1)
26022 const char *constraints = recog_data.constraints[i];
26023 int alt = which_alternative;
26025 while (*constraints == '=' || *constraints == '+')
26026 constraints++;
26027 while (alt-- > 0)
26028 while (*constraints++ != ',')
26030 /* Skip ignored operands. */
26031 if (*constraints == 'X')
26032 continue;
26034 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
26036 return 0;
26039 /* Compute default value for "length_vex" attribute. It includes
26040 2 or 3 byte VEX prefix and 1 opcode byte. */
26043 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
26044 bool has_vex_w)
26046 int i;
26048 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
26049 byte VEX prefix. */
26050 if (!has_0f_opcode || has_vex_w)
26051 return 3 + 1;
26053 /* We can always use 2 byte VEX prefix in 32bit. */
26054 if (!TARGET_64BIT)
26055 return 2 + 1;
26057 extract_insn_cached (insn);
26059 for (i = recog_data.n_operands - 1; i >= 0; --i)
26060 if (REG_P (recog_data.operand[i]))
26062 /* REX.W bit uses 3 byte VEX prefix. */
26063 if (GET_MODE (recog_data.operand[i]) == DImode
26064 && GENERAL_REG_P (recog_data.operand[i]))
26065 return 3 + 1;
26067 else
26069 /* REX.X or REX.B bits use 3 byte VEX prefix. */
26070 if (MEM_P (recog_data.operand[i])
26071 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
26072 return 3 + 1;
26075 return 2 + 1;
26078 /* Return the maximum number of instructions a cpu can issue. */
26080 static int
26081 ix86_issue_rate (void)
26083 switch (ix86_tune)
26085 case PROCESSOR_PENTIUM:
26086 case PROCESSOR_BONNELL:
26087 case PROCESSOR_SILVERMONT:
26088 case PROCESSOR_KNL:
26089 case PROCESSOR_INTEL:
26090 case PROCESSOR_K6:
26091 case PROCESSOR_BTVER2:
26092 case PROCESSOR_PENTIUM4:
26093 case PROCESSOR_NOCONA:
26094 return 2;
26096 case PROCESSOR_PENTIUMPRO:
26097 case PROCESSOR_ATHLON:
26098 case PROCESSOR_K8:
26099 case PROCESSOR_AMDFAM10:
26100 case PROCESSOR_GENERIC:
26101 case PROCESSOR_BTVER1:
26102 return 3;
26104 case PROCESSOR_BDVER1:
26105 case PROCESSOR_BDVER2:
26106 case PROCESSOR_BDVER3:
26107 case PROCESSOR_BDVER4:
26108 case PROCESSOR_CORE2:
26109 case PROCESSOR_NEHALEM:
26110 case PROCESSOR_SANDYBRIDGE:
26111 case PROCESSOR_HASWELL:
26112 return 4;
26114 default:
26115 return 1;
26119 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
26120 by DEP_INSN and nothing set by DEP_INSN. */
26122 static bool
26123 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
26125 rtx set, set2;
26127 /* Simplify the test for uninteresting insns. */
26128 if (insn_type != TYPE_SETCC
26129 && insn_type != TYPE_ICMOV
26130 && insn_type != TYPE_FCMOV
26131 && insn_type != TYPE_IBR)
26132 return false;
26134 if ((set = single_set (dep_insn)) != 0)
26136 set = SET_DEST (set);
26137 set2 = NULL_RTX;
26139 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
26140 && XVECLEN (PATTERN (dep_insn), 0) == 2
26141 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
26142 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
26144 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26145 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26147 else
26148 return false;
26150 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
26151 return false;
26153 /* This test is true if the dependent insn reads the flags but
26154 not any other potentially set register. */
26155 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
26156 return false;
26158 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
26159 return false;
26161 return true;
26164 /* Return true iff USE_INSN has a memory address with operands set by
26165 SET_INSN. */
26167 bool
26168 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
26170 int i;
26171 extract_insn_cached (use_insn);
26172 for (i = recog_data.n_operands - 1; i >= 0; --i)
26173 if (MEM_P (recog_data.operand[i]))
26175 rtx addr = XEXP (recog_data.operand[i], 0);
26176 return modified_in_p (addr, set_insn) != 0;
26178 return false;
26181 /* Helper function for exact_store_load_dependency.
26182 Return true if addr is found in insn. */
26183 static bool
26184 exact_dependency_1 (rtx addr, rtx insn)
26186 enum rtx_code code;
26187 const char *format_ptr;
26188 int i, j;
26190 code = GET_CODE (insn);
26191 switch (code)
26193 case MEM:
26194 if (rtx_equal_p (addr, insn))
26195 return true;
26196 break;
26197 case REG:
26198 CASE_CONST_ANY:
26199 case SYMBOL_REF:
26200 case CODE_LABEL:
26201 case PC:
26202 case CC0:
26203 case EXPR_LIST:
26204 return false;
26205 default:
26206 break;
26209 format_ptr = GET_RTX_FORMAT (code);
26210 for (i = 0; i < GET_RTX_LENGTH (code); i++)
26212 switch (*format_ptr++)
26214 case 'e':
26215 if (exact_dependency_1 (addr, XEXP (insn, i)))
26216 return true;
26217 break;
26218 case 'E':
26219 for (j = 0; j < XVECLEN (insn, i); j++)
26220 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
26221 return true;
26222 break;
26225 return false;
26228 /* Return true if there exists exact dependency for store & load, i.e.
26229 the same memory address is used in them. */
26230 static bool
26231 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
26233 rtx set1, set2;
26235 set1 = single_set (store);
26236 if (!set1)
26237 return false;
26238 if (!MEM_P (SET_DEST (set1)))
26239 return false;
26240 set2 = single_set (load);
26241 if (!set2)
26242 return false;
26243 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
26244 return true;
26245 return false;
26248 static int
26249 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26251 enum attr_type insn_type, dep_insn_type;
26252 enum attr_memory memory;
26253 rtx set, set2;
26254 int dep_insn_code_number;
26256 /* Anti and output dependencies have zero cost on all CPUs. */
26257 if (REG_NOTE_KIND (link) != 0)
26258 return 0;
26260 dep_insn_code_number = recog_memoized (dep_insn);
26262 /* If we can't recognize the insns, we can't really do anything. */
26263 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
26264 return cost;
26266 insn_type = get_attr_type (insn);
26267 dep_insn_type = get_attr_type (dep_insn);
26269 switch (ix86_tune)
26271 case PROCESSOR_PENTIUM:
26272 /* Address Generation Interlock adds a cycle of latency. */
26273 if (insn_type == TYPE_LEA)
26275 rtx addr = PATTERN (insn);
26277 if (GET_CODE (addr) == PARALLEL)
26278 addr = XVECEXP (addr, 0, 0);
26280 gcc_assert (GET_CODE (addr) == SET);
26282 addr = SET_SRC (addr);
26283 if (modified_in_p (addr, dep_insn))
26284 cost += 1;
26286 else if (ix86_agi_dependent (dep_insn, insn))
26287 cost += 1;
26289 /* ??? Compares pair with jump/setcc. */
26290 if (ix86_flags_dependent (insn, dep_insn, insn_type))
26291 cost = 0;
26293 /* Floating point stores require value to be ready one cycle earlier. */
26294 if (insn_type == TYPE_FMOV
26295 && get_attr_memory (insn) == MEMORY_STORE
26296 && !ix86_agi_dependent (dep_insn, insn))
26297 cost += 1;
26298 break;
26300 case PROCESSOR_PENTIUMPRO:
26301 /* INT->FP conversion is expensive. */
26302 if (get_attr_fp_int_src (dep_insn))
26303 cost += 5;
26305 /* There is one cycle extra latency between an FP op and a store. */
26306 if (insn_type == TYPE_FMOV
26307 && (set = single_set (dep_insn)) != NULL_RTX
26308 && (set2 = single_set (insn)) != NULL_RTX
26309 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
26310 && MEM_P (SET_DEST (set2)))
26311 cost += 1;
26313 memory = get_attr_memory (insn);
26315 /* Show ability of reorder buffer to hide latency of load by executing
26316 in parallel with previous instruction in case
26317 previous instruction is not needed to compute the address. */
26318 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26319 && !ix86_agi_dependent (dep_insn, insn))
26321 /* Claim moves to take one cycle, as core can issue one load
26322 at time and the next load can start cycle later. */
26323 if (dep_insn_type == TYPE_IMOV
26324 || dep_insn_type == TYPE_FMOV)
26325 cost = 1;
26326 else if (cost > 1)
26327 cost--;
26329 break;
26331 case PROCESSOR_K6:
26332 /* The esp dependency is resolved before
26333 the instruction is really finished. */
26334 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26335 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26336 return 1;
26338 /* INT->FP conversion is expensive. */
26339 if (get_attr_fp_int_src (dep_insn))
26340 cost += 5;
26342 memory = get_attr_memory (insn);
26344 /* Show ability of reorder buffer to hide latency of load by executing
26345 in parallel with previous instruction in case
26346 previous instruction is not needed to compute the address. */
26347 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26348 && !ix86_agi_dependent (dep_insn, insn))
26350 /* Claim moves to take one cycle, as core can issue one load
26351 at time and the next load can start cycle later. */
26352 if (dep_insn_type == TYPE_IMOV
26353 || dep_insn_type == TYPE_FMOV)
26354 cost = 1;
26355 else if (cost > 2)
26356 cost -= 2;
26357 else
26358 cost = 1;
26360 break;
26362 case PROCESSOR_AMDFAM10:
26363 case PROCESSOR_BDVER1:
26364 case PROCESSOR_BDVER2:
26365 case PROCESSOR_BDVER3:
26366 case PROCESSOR_BDVER4:
26367 case PROCESSOR_BTVER1:
26368 case PROCESSOR_BTVER2:
26369 case PROCESSOR_GENERIC:
26370 /* Stack engine allows to execute push&pop instructions in parall. */
26371 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26372 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26373 return 0;
26374 /* FALLTHRU */
26376 case PROCESSOR_ATHLON:
26377 case PROCESSOR_K8:
26378 memory = get_attr_memory (insn);
26380 /* Show ability of reorder buffer to hide latency of load by executing
26381 in parallel with previous instruction in case
26382 previous instruction is not needed to compute the address. */
26383 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26384 && !ix86_agi_dependent (dep_insn, insn))
26386 enum attr_unit unit = get_attr_unit (insn);
26387 int loadcost = 3;
26389 /* Because of the difference between the length of integer and
26390 floating unit pipeline preparation stages, the memory operands
26391 for floating point are cheaper.
26393 ??? For Athlon it the difference is most probably 2. */
26394 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
26395 loadcost = 3;
26396 else
26397 loadcost = TARGET_ATHLON ? 2 : 0;
26399 if (cost >= loadcost)
26400 cost -= loadcost;
26401 else
26402 cost = 0;
26404 break;
26406 case PROCESSOR_CORE2:
26407 case PROCESSOR_NEHALEM:
26408 case PROCESSOR_SANDYBRIDGE:
26409 case PROCESSOR_HASWELL:
26410 /* Stack engine allows to execute push&pop instructions in parall. */
26411 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26412 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26413 return 0;
26415 memory = get_attr_memory (insn);
26417 /* Show ability of reorder buffer to hide latency of load by executing
26418 in parallel with previous instruction in case
26419 previous instruction is not needed to compute the address. */
26420 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26421 && !ix86_agi_dependent (dep_insn, insn))
26423 if (cost >= 4)
26424 cost -= 4;
26425 else
26426 cost = 0;
26428 break;
26430 case PROCESSOR_SILVERMONT:
26431 case PROCESSOR_KNL:
26432 case PROCESSOR_INTEL:
26433 if (!reload_completed)
26434 return cost;
26436 /* Increase cost of integer loads. */
26437 memory = get_attr_memory (dep_insn);
26438 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26440 enum attr_unit unit = get_attr_unit (dep_insn);
26441 if (unit == UNIT_INTEGER && cost == 1)
26443 if (memory == MEMORY_LOAD)
26444 cost = 3;
26445 else
26447 /* Increase cost of ld/st for short int types only
26448 because of store forwarding issue. */
26449 rtx set = single_set (dep_insn);
26450 if (set && (GET_MODE (SET_DEST (set)) == QImode
26451 || GET_MODE (SET_DEST (set)) == HImode))
26453 /* Increase cost of store/load insn if exact
26454 dependence exists and it is load insn. */
26455 enum attr_memory insn_memory = get_attr_memory (insn);
26456 if (insn_memory == MEMORY_LOAD
26457 && exact_store_load_dependency (dep_insn, insn))
26458 cost = 3;
26464 default:
26465 break;
26468 return cost;
26471 /* How many alternative schedules to try. This should be as wide as the
26472 scheduling freedom in the DFA, but no wider. Making this value too
26473 large results extra work for the scheduler. */
26475 static int
26476 ia32_multipass_dfa_lookahead (void)
26478 switch (ix86_tune)
26480 case PROCESSOR_PENTIUM:
26481 return 2;
26483 case PROCESSOR_PENTIUMPRO:
26484 case PROCESSOR_K6:
26485 return 1;
26487 case PROCESSOR_BDVER1:
26488 case PROCESSOR_BDVER2:
26489 case PROCESSOR_BDVER3:
26490 case PROCESSOR_BDVER4:
26491 /* We use lookahead value 4 for BD both before and after reload
26492 schedules. Plan is to have value 8 included for O3. */
26493 return 4;
26495 case PROCESSOR_CORE2:
26496 case PROCESSOR_NEHALEM:
26497 case PROCESSOR_SANDYBRIDGE:
26498 case PROCESSOR_HASWELL:
26499 case PROCESSOR_BONNELL:
26500 case PROCESSOR_SILVERMONT:
26501 case PROCESSOR_KNL:
26502 case PROCESSOR_INTEL:
26503 /* Generally, we want haifa-sched:max_issue() to look ahead as far
26504 as many instructions can be executed on a cycle, i.e.,
26505 issue_rate. I wonder why tuning for many CPUs does not do this. */
26506 if (reload_completed)
26507 return ix86_issue_rate ();
26508 /* Don't use lookahead for pre-reload schedule to save compile time. */
26509 return 0;
26511 default:
26512 return 0;
26516 /* Return true if target platform supports macro-fusion. */
26518 static bool
26519 ix86_macro_fusion_p ()
26521 return TARGET_FUSE_CMP_AND_BRANCH;
26524 /* Check whether current microarchitecture support macro fusion
26525 for insn pair "CONDGEN + CONDJMP". Refer to
26526 "Intel Architectures Optimization Reference Manual". */
26528 static bool
26529 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
26531 rtx src, dest;
26532 enum rtx_code ccode;
26533 rtx compare_set = NULL_RTX, test_if, cond;
26534 rtx alu_set = NULL_RTX, addr = NULL_RTX;
26536 if (!any_condjump_p (condjmp))
26537 return false;
26539 if (get_attr_type (condgen) != TYPE_TEST
26540 && get_attr_type (condgen) != TYPE_ICMP
26541 && get_attr_type (condgen) != TYPE_INCDEC
26542 && get_attr_type (condgen) != TYPE_ALU)
26543 return false;
26545 compare_set = single_set (condgen);
26546 if (compare_set == NULL_RTX
26547 && !TARGET_FUSE_ALU_AND_BRANCH)
26548 return false;
26550 if (compare_set == NULL_RTX)
26552 int i;
26553 rtx pat = PATTERN (condgen);
26554 for (i = 0; i < XVECLEN (pat, 0); i++)
26555 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26557 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26558 if (GET_CODE (set_src) == COMPARE)
26559 compare_set = XVECEXP (pat, 0, i);
26560 else
26561 alu_set = XVECEXP (pat, 0, i);
26564 if (compare_set == NULL_RTX)
26565 return false;
26566 src = SET_SRC (compare_set);
26567 if (GET_CODE (src) != COMPARE)
26568 return false;
26570 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26571 supported. */
26572 if ((MEM_P (XEXP (src, 0))
26573 && CONST_INT_P (XEXP (src, 1)))
26574 || (MEM_P (XEXP (src, 1))
26575 && CONST_INT_P (XEXP (src, 0))))
26576 return false;
26578 /* No fusion for RIP-relative address. */
26579 if (MEM_P (XEXP (src, 0)))
26580 addr = XEXP (XEXP (src, 0), 0);
26581 else if (MEM_P (XEXP (src, 1)))
26582 addr = XEXP (XEXP (src, 1), 0);
26584 if (addr) {
26585 ix86_address parts;
26586 int ok = ix86_decompose_address (addr, &parts);
26587 gcc_assert (ok);
26589 if (rip_relative_addr_p (&parts))
26590 return false;
26593 test_if = SET_SRC (pc_set (condjmp));
26594 cond = XEXP (test_if, 0);
26595 ccode = GET_CODE (cond);
26596 /* Check whether conditional jump use Sign or Overflow Flags. */
26597 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26598 && (ccode == GE
26599 || ccode == GT
26600 || ccode == LE
26601 || ccode == LT))
26602 return false;
26604 /* Return true for TYPE_TEST and TYPE_ICMP. */
26605 if (get_attr_type (condgen) == TYPE_TEST
26606 || get_attr_type (condgen) == TYPE_ICMP)
26607 return true;
26609 /* The following is the case that macro-fusion for alu + jmp. */
26610 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26611 return false;
26613 /* No fusion for alu op with memory destination operand. */
26614 dest = SET_DEST (alu_set);
26615 if (MEM_P (dest))
26616 return false;
26618 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26619 supported. */
26620 if (get_attr_type (condgen) == TYPE_INCDEC
26621 && (ccode == GEU
26622 || ccode == GTU
26623 || ccode == LEU
26624 || ccode == LTU))
26625 return false;
26627 return true;
26630 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26631 execution. It is applied if
26632 (1) IMUL instruction is on the top of list;
26633 (2) There exists the only producer of independent IMUL instruction in
26634 ready list.
26635 Return index of IMUL producer if it was found and -1 otherwise. */
26636 static int
26637 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26639 rtx_insn *insn;
26640 rtx set, insn1, insn2;
26641 sd_iterator_def sd_it;
26642 dep_t dep;
26643 int index = -1;
26644 int i;
26646 if (!TARGET_BONNELL)
26647 return index;
26649 /* Check that IMUL instruction is on the top of ready list. */
26650 insn = ready[n_ready - 1];
26651 set = single_set (insn);
26652 if (!set)
26653 return index;
26654 if (!(GET_CODE (SET_SRC (set)) == MULT
26655 && GET_MODE (SET_SRC (set)) == SImode))
26656 return index;
26658 /* Search for producer of independent IMUL instruction. */
26659 for (i = n_ready - 2; i >= 0; i--)
26661 insn = ready[i];
26662 if (!NONDEBUG_INSN_P (insn))
26663 continue;
26664 /* Skip IMUL instruction. */
26665 insn2 = PATTERN (insn);
26666 if (GET_CODE (insn2) == PARALLEL)
26667 insn2 = XVECEXP (insn2, 0, 0);
26668 if (GET_CODE (insn2) == SET
26669 && GET_CODE (SET_SRC (insn2)) == MULT
26670 && GET_MODE (SET_SRC (insn2)) == SImode)
26671 continue;
26673 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26675 rtx con;
26676 con = DEP_CON (dep);
26677 if (!NONDEBUG_INSN_P (con))
26678 continue;
26679 insn1 = PATTERN (con);
26680 if (GET_CODE (insn1) == PARALLEL)
26681 insn1 = XVECEXP (insn1, 0, 0);
26683 if (GET_CODE (insn1) == SET
26684 && GET_CODE (SET_SRC (insn1)) == MULT
26685 && GET_MODE (SET_SRC (insn1)) == SImode)
26687 sd_iterator_def sd_it1;
26688 dep_t dep1;
26689 /* Check if there is no other dependee for IMUL. */
26690 index = i;
26691 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26693 rtx pro;
26694 pro = DEP_PRO (dep1);
26695 if (!NONDEBUG_INSN_P (pro))
26696 continue;
26697 if (pro != insn)
26698 index = -1;
26700 if (index >= 0)
26701 break;
26704 if (index >= 0)
26705 break;
26707 return index;
26710 /* Try to find the best candidate on the top of ready list if two insns
26711 have the same priority - candidate is best if its dependees were
26712 scheduled earlier. Applied for Silvermont only.
26713 Return true if top 2 insns must be interchanged. */
26714 static bool
26715 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26717 rtx_insn *top = ready[n_ready - 1];
26718 rtx_insn *next = ready[n_ready - 2];
26719 rtx set;
26720 sd_iterator_def sd_it;
26721 dep_t dep;
26722 int clock1 = -1;
26723 int clock2 = -1;
26724 #define INSN_TICK(INSN) (HID (INSN)->tick)
26726 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26727 return false;
26729 if (!NONDEBUG_INSN_P (top))
26730 return false;
26731 if (!NONJUMP_INSN_P (top))
26732 return false;
26733 if (!NONDEBUG_INSN_P (next))
26734 return false;
26735 if (!NONJUMP_INSN_P (next))
26736 return false;
26737 set = single_set (top);
26738 if (!set)
26739 return false;
26740 set = single_set (next);
26741 if (!set)
26742 return false;
26744 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26746 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26747 return false;
26748 /* Determine winner more precise. */
26749 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26751 rtx pro;
26752 pro = DEP_PRO (dep);
26753 if (!NONDEBUG_INSN_P (pro))
26754 continue;
26755 if (INSN_TICK (pro) > clock1)
26756 clock1 = INSN_TICK (pro);
26758 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26760 rtx pro;
26761 pro = DEP_PRO (dep);
26762 if (!NONDEBUG_INSN_P (pro))
26763 continue;
26764 if (INSN_TICK (pro) > clock2)
26765 clock2 = INSN_TICK (pro);
26768 if (clock1 == clock2)
26770 /* Determine winner - load must win. */
26771 enum attr_memory memory1, memory2;
26772 memory1 = get_attr_memory (top);
26773 memory2 = get_attr_memory (next);
26774 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26775 return true;
26777 return (bool) (clock2 < clock1);
26779 return false;
26780 #undef INSN_TICK
26783 /* Perform possible reodering of ready list for Atom/Silvermont only.
26784 Return issue rate. */
26785 static int
26786 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26787 int *pn_ready, int clock_var)
26789 int issue_rate = -1;
26790 int n_ready = *pn_ready;
26791 int i;
26792 rtx_insn *insn;
26793 int index = -1;
26795 /* Set up issue rate. */
26796 issue_rate = ix86_issue_rate ();
26798 /* Do reodering for BONNELL/SILVERMONT only. */
26799 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26800 return issue_rate;
26802 /* Nothing to do if ready list contains only 1 instruction. */
26803 if (n_ready <= 1)
26804 return issue_rate;
26806 /* Do reodering for post-reload scheduler only. */
26807 if (!reload_completed)
26808 return issue_rate;
26810 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26812 if (sched_verbose > 1)
26813 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26814 INSN_UID (ready[index]));
26816 /* Put IMUL producer (ready[index]) at the top of ready list. */
26817 insn = ready[index];
26818 for (i = index; i < n_ready - 1; i++)
26819 ready[i] = ready[i + 1];
26820 ready[n_ready - 1] = insn;
26821 return issue_rate;
26824 /* Skip selective scheduling since HID is not populated in it. */
26825 if (clock_var != 0
26826 && !sel_sched_p ()
26827 && swap_top_of_ready_list (ready, n_ready))
26829 if (sched_verbose > 1)
26830 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26831 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26832 /* Swap 2 top elements of ready list. */
26833 insn = ready[n_ready - 1];
26834 ready[n_ready - 1] = ready[n_ready - 2];
26835 ready[n_ready - 2] = insn;
26837 return issue_rate;
26840 static bool
26841 ix86_class_likely_spilled_p (reg_class_t);
26843 /* Returns true if lhs of insn is HW function argument register and set up
26844 is_spilled to true if it is likely spilled HW register. */
26845 static bool
26846 insn_is_function_arg (rtx insn, bool* is_spilled)
26848 rtx dst;
26850 if (!NONDEBUG_INSN_P (insn))
26851 return false;
26852 /* Call instructions are not movable, ignore it. */
26853 if (CALL_P (insn))
26854 return false;
26855 insn = PATTERN (insn);
26856 if (GET_CODE (insn) == PARALLEL)
26857 insn = XVECEXP (insn, 0, 0);
26858 if (GET_CODE (insn) != SET)
26859 return false;
26860 dst = SET_DEST (insn);
26861 if (REG_P (dst) && HARD_REGISTER_P (dst)
26862 && ix86_function_arg_regno_p (REGNO (dst)))
26864 /* Is it likely spilled HW register? */
26865 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26866 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26867 *is_spilled = true;
26868 return true;
26870 return false;
26873 /* Add output dependencies for chain of function adjacent arguments if only
26874 there is a move to likely spilled HW register. Return first argument
26875 if at least one dependence was added or NULL otherwise. */
26876 static rtx_insn *
26877 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26879 rtx_insn *insn;
26880 rtx_insn *last = call;
26881 rtx_insn *first_arg = NULL;
26882 bool is_spilled = false;
26884 head = PREV_INSN (head);
26886 /* Find nearest to call argument passing instruction. */
26887 while (true)
26889 last = PREV_INSN (last);
26890 if (last == head)
26891 return NULL;
26892 if (!NONDEBUG_INSN_P (last))
26893 continue;
26894 if (insn_is_function_arg (last, &is_spilled))
26895 break;
26896 return NULL;
26899 first_arg = last;
26900 while (true)
26902 insn = PREV_INSN (last);
26903 if (!INSN_P (insn))
26904 break;
26905 if (insn == head)
26906 break;
26907 if (!NONDEBUG_INSN_P (insn))
26909 last = insn;
26910 continue;
26912 if (insn_is_function_arg (insn, &is_spilled))
26914 /* Add output depdendence between two function arguments if chain
26915 of output arguments contains likely spilled HW registers. */
26916 if (is_spilled)
26917 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26918 first_arg = last = insn;
26920 else
26921 break;
26923 if (!is_spilled)
26924 return NULL;
26925 return first_arg;
26928 /* Add output or anti dependency from insn to first_arg to restrict its code
26929 motion. */
26930 static void
26931 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26933 rtx set;
26934 rtx tmp;
26936 /* Add anti dependencies for bounds stores. */
26937 if (INSN_P (insn)
26938 && GET_CODE (PATTERN (insn)) == PARALLEL
26939 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
26940 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
26942 add_dependence (first_arg, insn, REG_DEP_ANTI);
26943 return;
26946 set = single_set (insn);
26947 if (!set)
26948 return;
26949 tmp = SET_DEST (set);
26950 if (REG_P (tmp))
26952 /* Add output dependency to the first function argument. */
26953 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26954 return;
26956 /* Add anti dependency. */
26957 add_dependence (first_arg, insn, REG_DEP_ANTI);
26960 /* Avoid cross block motion of function argument through adding dependency
26961 from the first non-jump instruction in bb. */
26962 static void
26963 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26965 rtx_insn *insn = BB_END (bb);
26967 while (insn)
26969 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26971 rtx set = single_set (insn);
26972 if (set)
26974 avoid_func_arg_motion (arg, insn);
26975 return;
26978 if (insn == BB_HEAD (bb))
26979 return;
26980 insn = PREV_INSN (insn);
26984 /* Hook for pre-reload schedule - avoid motion of function arguments
26985 passed in likely spilled HW registers. */
26986 static void
26987 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26989 rtx_insn *insn;
26990 rtx_insn *first_arg = NULL;
26991 if (reload_completed)
26992 return;
26993 while (head != tail && DEBUG_INSN_P (head))
26994 head = NEXT_INSN (head);
26995 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26996 if (INSN_P (insn) && CALL_P (insn))
26998 first_arg = add_parameter_dependencies (insn, head);
26999 if (first_arg)
27001 /* Add dependee for first argument to predecessors if only
27002 region contains more than one block. */
27003 basic_block bb = BLOCK_FOR_INSN (insn);
27004 int rgn = CONTAINING_RGN (bb->index);
27005 int nr_blks = RGN_NR_BLOCKS (rgn);
27006 /* Skip trivial regions and region head blocks that can have
27007 predecessors outside of region. */
27008 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
27010 edge e;
27011 edge_iterator ei;
27013 /* Regions are SCCs with the exception of selective
27014 scheduling with pipelining of outer blocks enabled.
27015 So also check that immediate predecessors of a non-head
27016 block are in the same region. */
27017 FOR_EACH_EDGE (e, ei, bb->preds)
27019 /* Avoid creating of loop-carried dependencies through
27020 using topological ordering in the region. */
27021 if (rgn == CONTAINING_RGN (e->src->index)
27022 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
27023 add_dependee_for_func_arg (first_arg, e->src);
27026 insn = first_arg;
27027 if (insn == head)
27028 break;
27031 else if (first_arg)
27032 avoid_func_arg_motion (first_arg, insn);
27035 /* Hook for pre-reload schedule - set priority of moves from likely spilled
27036 HW registers to maximum, to schedule them at soon as possible. These are
27037 moves from function argument registers at the top of the function entry
27038 and moves from function return value registers after call. */
27039 static int
27040 ix86_adjust_priority (rtx_insn *insn, int priority)
27042 rtx set;
27044 if (reload_completed)
27045 return priority;
27047 if (!NONDEBUG_INSN_P (insn))
27048 return priority;
27050 set = single_set (insn);
27051 if (set)
27053 rtx tmp = SET_SRC (set);
27054 if (REG_P (tmp)
27055 && HARD_REGISTER_P (tmp)
27056 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
27057 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
27058 return current_sched_info->sched_max_insns_priority;
27061 return priority;
27064 /* Model decoder of Core 2/i7.
27065 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
27066 track the instruction fetch block boundaries and make sure that long
27067 (9+ bytes) instructions are assigned to D0. */
27069 /* Maximum length of an insn that can be handled by
27070 a secondary decoder unit. '8' for Core 2/i7. */
27071 static int core2i7_secondary_decoder_max_insn_size;
27073 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
27074 '16' for Core 2/i7. */
27075 static int core2i7_ifetch_block_size;
27077 /* Maximum number of instructions decoder can handle per cycle.
27078 '6' for Core 2/i7. */
27079 static int core2i7_ifetch_block_max_insns;
27081 typedef struct ix86_first_cycle_multipass_data_ *
27082 ix86_first_cycle_multipass_data_t;
27083 typedef const struct ix86_first_cycle_multipass_data_ *
27084 const_ix86_first_cycle_multipass_data_t;
27086 /* A variable to store target state across calls to max_issue within
27087 one cycle. */
27088 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
27089 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
27091 /* Initialize DATA. */
27092 static void
27093 core2i7_first_cycle_multipass_init (void *_data)
27095 ix86_first_cycle_multipass_data_t data
27096 = (ix86_first_cycle_multipass_data_t) _data;
27098 data->ifetch_block_len = 0;
27099 data->ifetch_block_n_insns = 0;
27100 data->ready_try_change = NULL;
27101 data->ready_try_change_size = 0;
27104 /* Advancing the cycle; reset ifetch block counts. */
27105 static void
27106 core2i7_dfa_post_advance_cycle (void)
27108 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
27110 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27112 data->ifetch_block_len = 0;
27113 data->ifetch_block_n_insns = 0;
27116 static int min_insn_size (rtx_insn *);
27118 /* Filter out insns from ready_try that the core will not be able to issue
27119 on current cycle due to decoder. */
27120 static void
27121 core2i7_first_cycle_multipass_filter_ready_try
27122 (const_ix86_first_cycle_multipass_data_t data,
27123 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
27125 while (n_ready--)
27127 rtx_insn *insn;
27128 int insn_size;
27130 if (ready_try[n_ready])
27131 continue;
27133 insn = get_ready_element (n_ready);
27134 insn_size = min_insn_size (insn);
27136 if (/* If this is a too long an insn for a secondary decoder ... */
27137 (!first_cycle_insn_p
27138 && insn_size > core2i7_secondary_decoder_max_insn_size)
27139 /* ... or it would not fit into the ifetch block ... */
27140 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
27141 /* ... or the decoder is full already ... */
27142 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
27143 /* ... mask the insn out. */
27145 ready_try[n_ready] = 1;
27147 if (data->ready_try_change)
27148 bitmap_set_bit (data->ready_try_change, n_ready);
27153 /* Prepare for a new round of multipass lookahead scheduling. */
27154 static void
27155 core2i7_first_cycle_multipass_begin (void *_data,
27156 signed char *ready_try, int n_ready,
27157 bool first_cycle_insn_p)
27159 ix86_first_cycle_multipass_data_t data
27160 = (ix86_first_cycle_multipass_data_t) _data;
27161 const_ix86_first_cycle_multipass_data_t prev_data
27162 = ix86_first_cycle_multipass_data;
27164 /* Restore the state from the end of the previous round. */
27165 data->ifetch_block_len = prev_data->ifetch_block_len;
27166 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
27168 /* Filter instructions that cannot be issued on current cycle due to
27169 decoder restrictions. */
27170 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27171 first_cycle_insn_p);
27174 /* INSN is being issued in current solution. Account for its impact on
27175 the decoder model. */
27176 static void
27177 core2i7_first_cycle_multipass_issue (void *_data,
27178 signed char *ready_try, int n_ready,
27179 rtx_insn *insn, const void *_prev_data)
27181 ix86_first_cycle_multipass_data_t data
27182 = (ix86_first_cycle_multipass_data_t) _data;
27183 const_ix86_first_cycle_multipass_data_t prev_data
27184 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
27186 int insn_size = min_insn_size (insn);
27188 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
27189 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
27190 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
27191 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27193 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
27194 if (!data->ready_try_change)
27196 data->ready_try_change = sbitmap_alloc (n_ready);
27197 data->ready_try_change_size = n_ready;
27199 else if (data->ready_try_change_size < n_ready)
27201 data->ready_try_change = sbitmap_resize (data->ready_try_change,
27202 n_ready, 0);
27203 data->ready_try_change_size = n_ready;
27205 bitmap_clear (data->ready_try_change);
27207 /* Filter out insns from ready_try that the core will not be able to issue
27208 on current cycle due to decoder. */
27209 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27210 false);
27213 /* Revert the effect on ready_try. */
27214 static void
27215 core2i7_first_cycle_multipass_backtrack (const void *_data,
27216 signed char *ready_try,
27217 int n_ready ATTRIBUTE_UNUSED)
27219 const_ix86_first_cycle_multipass_data_t data
27220 = (const_ix86_first_cycle_multipass_data_t) _data;
27221 unsigned int i = 0;
27222 sbitmap_iterator sbi;
27224 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
27225 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
27227 ready_try[i] = 0;
27231 /* Save the result of multipass lookahead scheduling for the next round. */
27232 static void
27233 core2i7_first_cycle_multipass_end (const void *_data)
27235 const_ix86_first_cycle_multipass_data_t data
27236 = (const_ix86_first_cycle_multipass_data_t) _data;
27237 ix86_first_cycle_multipass_data_t next_data
27238 = ix86_first_cycle_multipass_data;
27240 if (data != NULL)
27242 next_data->ifetch_block_len = data->ifetch_block_len;
27243 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
27247 /* Deallocate target data. */
27248 static void
27249 core2i7_first_cycle_multipass_fini (void *_data)
27251 ix86_first_cycle_multipass_data_t data
27252 = (ix86_first_cycle_multipass_data_t) _data;
27254 if (data->ready_try_change)
27256 sbitmap_free (data->ready_try_change);
27257 data->ready_try_change = NULL;
27258 data->ready_try_change_size = 0;
27262 /* Prepare for scheduling pass. */
27263 static void
27264 ix86_sched_init_global (FILE *, int, int)
27266 /* Install scheduling hooks for current CPU. Some of these hooks are used
27267 in time-critical parts of the scheduler, so we only set them up when
27268 they are actually used. */
27269 switch (ix86_tune)
27271 case PROCESSOR_CORE2:
27272 case PROCESSOR_NEHALEM:
27273 case PROCESSOR_SANDYBRIDGE:
27274 case PROCESSOR_HASWELL:
27275 /* Do not perform multipass scheduling for pre-reload schedule
27276 to save compile time. */
27277 if (reload_completed)
27279 targetm.sched.dfa_post_advance_cycle
27280 = core2i7_dfa_post_advance_cycle;
27281 targetm.sched.first_cycle_multipass_init
27282 = core2i7_first_cycle_multipass_init;
27283 targetm.sched.first_cycle_multipass_begin
27284 = core2i7_first_cycle_multipass_begin;
27285 targetm.sched.first_cycle_multipass_issue
27286 = core2i7_first_cycle_multipass_issue;
27287 targetm.sched.first_cycle_multipass_backtrack
27288 = core2i7_first_cycle_multipass_backtrack;
27289 targetm.sched.first_cycle_multipass_end
27290 = core2i7_first_cycle_multipass_end;
27291 targetm.sched.first_cycle_multipass_fini
27292 = core2i7_first_cycle_multipass_fini;
27294 /* Set decoder parameters. */
27295 core2i7_secondary_decoder_max_insn_size = 8;
27296 core2i7_ifetch_block_size = 16;
27297 core2i7_ifetch_block_max_insns = 6;
27298 break;
27300 /* ... Fall through ... */
27301 default:
27302 targetm.sched.dfa_post_advance_cycle = NULL;
27303 targetm.sched.first_cycle_multipass_init = NULL;
27304 targetm.sched.first_cycle_multipass_begin = NULL;
27305 targetm.sched.first_cycle_multipass_issue = NULL;
27306 targetm.sched.first_cycle_multipass_backtrack = NULL;
27307 targetm.sched.first_cycle_multipass_end = NULL;
27308 targetm.sched.first_cycle_multipass_fini = NULL;
27309 break;
27314 /* Compute the alignment given to a constant that is being placed in memory.
27315 EXP is the constant and ALIGN is the alignment that the object would
27316 ordinarily have.
27317 The value of this function is used instead of that alignment to align
27318 the object. */
27321 ix86_constant_alignment (tree exp, int align)
27323 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
27324 || TREE_CODE (exp) == INTEGER_CST)
27326 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
27327 return 64;
27328 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
27329 return 128;
27331 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
27332 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
27333 return BITS_PER_WORD;
27335 return align;
27338 /* Compute the alignment for a static variable.
27339 TYPE is the data type, and ALIGN is the alignment that
27340 the object would ordinarily have. The value of this function is used
27341 instead of that alignment to align the object. */
27344 ix86_data_alignment (tree type, int align, bool opt)
27346 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
27347 for symbols from other compilation units or symbols that don't need
27348 to bind locally. In order to preserve some ABI compatibility with
27349 those compilers, ensure we don't decrease alignment from what we
27350 used to assume. */
27352 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
27354 /* A data structure, equal or greater than the size of a cache line
27355 (64 bytes in the Pentium 4 and other recent Intel processors, including
27356 processors based on Intel Core microarchitecture) should be aligned
27357 so that its base address is a multiple of a cache line size. */
27359 int max_align
27360 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
27362 if (max_align < BITS_PER_WORD)
27363 max_align = BITS_PER_WORD;
27365 switch (ix86_align_data_type)
27367 case ix86_align_data_type_abi: opt = false; break;
27368 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
27369 case ix86_align_data_type_cacheline: break;
27372 if (opt
27373 && AGGREGATE_TYPE_P (type)
27374 && TYPE_SIZE (type)
27375 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
27377 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
27378 && align < max_align_compat)
27379 align = max_align_compat;
27380 if (wi::geu_p (TYPE_SIZE (type), max_align)
27381 && align < max_align)
27382 align = max_align;
27385 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27386 to 16byte boundary. */
27387 if (TARGET_64BIT)
27389 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
27390 && TYPE_SIZE (type)
27391 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27392 && wi::geu_p (TYPE_SIZE (type), 128)
27393 && align < 128)
27394 return 128;
27397 if (!opt)
27398 return align;
27400 if (TREE_CODE (type) == ARRAY_TYPE)
27402 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27403 return 64;
27404 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27405 return 128;
27407 else if (TREE_CODE (type) == COMPLEX_TYPE)
27410 if (TYPE_MODE (type) == DCmode && align < 64)
27411 return 64;
27412 if ((TYPE_MODE (type) == XCmode
27413 || TYPE_MODE (type) == TCmode) && align < 128)
27414 return 128;
27416 else if ((TREE_CODE (type) == RECORD_TYPE
27417 || TREE_CODE (type) == UNION_TYPE
27418 || TREE_CODE (type) == QUAL_UNION_TYPE)
27419 && TYPE_FIELDS (type))
27421 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27422 return 64;
27423 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27424 return 128;
27426 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27427 || TREE_CODE (type) == INTEGER_TYPE)
27429 if (TYPE_MODE (type) == DFmode && align < 64)
27430 return 64;
27431 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27432 return 128;
27435 return align;
27438 /* Compute the alignment for a local variable or a stack slot. EXP is
27439 the data type or decl itself, MODE is the widest mode available and
27440 ALIGN is the alignment that the object would ordinarily have. The
27441 value of this macro is used instead of that alignment to align the
27442 object. */
27444 unsigned int
27445 ix86_local_alignment (tree exp, machine_mode mode,
27446 unsigned int align)
27448 tree type, decl;
27450 if (exp && DECL_P (exp))
27452 type = TREE_TYPE (exp);
27453 decl = exp;
27455 else
27457 type = exp;
27458 decl = NULL;
27461 /* Don't do dynamic stack realignment for long long objects with
27462 -mpreferred-stack-boundary=2. */
27463 if (!TARGET_64BIT
27464 && align == 64
27465 && ix86_preferred_stack_boundary < 64
27466 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
27467 && (!type || !TYPE_USER_ALIGN (type))
27468 && (!decl || !DECL_USER_ALIGN (decl)))
27469 align = 32;
27471 /* If TYPE is NULL, we are allocating a stack slot for caller-save
27472 register in MODE. We will return the largest alignment of XF
27473 and DF. */
27474 if (!type)
27476 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
27477 align = GET_MODE_ALIGNMENT (DFmode);
27478 return align;
27481 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27482 to 16byte boundary. Exact wording is:
27484 An array uses the same alignment as its elements, except that a local or
27485 global array variable of length at least 16 bytes or
27486 a C99 variable-length array variable always has alignment of at least 16 bytes.
27488 This was added to allow use of aligned SSE instructions at arrays. This
27489 rule is meant for static storage (where compiler can not do the analysis
27490 by itself). We follow it for automatic variables only when convenient.
27491 We fully control everything in the function compiled and functions from
27492 other unit can not rely on the alignment.
27494 Exclude va_list type. It is the common case of local array where
27495 we can not benefit from the alignment.
27497 TODO: Probably one should optimize for size only when var is not escaping. */
27498 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
27499 && TARGET_SSE)
27501 if (AGGREGATE_TYPE_P (type)
27502 && (va_list_type_node == NULL_TREE
27503 || (TYPE_MAIN_VARIANT (type)
27504 != TYPE_MAIN_VARIANT (va_list_type_node)))
27505 && TYPE_SIZE (type)
27506 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27507 && wi::geu_p (TYPE_SIZE (type), 16)
27508 && align < 128)
27509 return 128;
27511 if (TREE_CODE (type) == ARRAY_TYPE)
27513 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27514 return 64;
27515 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27516 return 128;
27518 else if (TREE_CODE (type) == COMPLEX_TYPE)
27520 if (TYPE_MODE (type) == DCmode && align < 64)
27521 return 64;
27522 if ((TYPE_MODE (type) == XCmode
27523 || TYPE_MODE (type) == TCmode) && align < 128)
27524 return 128;
27526 else if ((TREE_CODE (type) == RECORD_TYPE
27527 || TREE_CODE (type) == UNION_TYPE
27528 || TREE_CODE (type) == QUAL_UNION_TYPE)
27529 && TYPE_FIELDS (type))
27531 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27532 return 64;
27533 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27534 return 128;
27536 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27537 || TREE_CODE (type) == INTEGER_TYPE)
27540 if (TYPE_MODE (type) == DFmode && align < 64)
27541 return 64;
27542 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27543 return 128;
27545 return align;
27548 /* Compute the minimum required alignment for dynamic stack realignment
27549 purposes for a local variable, parameter or a stack slot. EXP is
27550 the data type or decl itself, MODE is its mode and ALIGN is the
27551 alignment that the object would ordinarily have. */
27553 unsigned int
27554 ix86_minimum_alignment (tree exp, machine_mode mode,
27555 unsigned int align)
27557 tree type, decl;
27559 if (exp && DECL_P (exp))
27561 type = TREE_TYPE (exp);
27562 decl = exp;
27564 else
27566 type = exp;
27567 decl = NULL;
27570 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27571 return align;
27573 /* Don't do dynamic stack realignment for long long objects with
27574 -mpreferred-stack-boundary=2. */
27575 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27576 && (!type || !TYPE_USER_ALIGN (type))
27577 && (!decl || !DECL_USER_ALIGN (decl)))
27578 return 32;
27580 return align;
27583 /* Find a location for the static chain incoming to a nested function.
27584 This is a register, unless all free registers are used by arguments. */
27586 static rtx
27587 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
27589 unsigned regno;
27591 /* While this function won't be called by the middle-end when a static
27592 chain isn't needed, it's also used throughout the backend so it's
27593 easiest to keep this check centralized. */
27594 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
27595 return NULL;
27597 if (TARGET_64BIT)
27599 /* We always use R10 in 64-bit mode. */
27600 regno = R10_REG;
27602 else
27604 const_tree fntype, fndecl;
27605 unsigned int ccvt;
27607 /* By default in 32-bit mode we use ECX to pass the static chain. */
27608 regno = CX_REG;
27610 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
27612 fntype = TREE_TYPE (fndecl_or_type);
27613 fndecl = fndecl_or_type;
27615 else
27617 fntype = fndecl_or_type;
27618 fndecl = NULL;
27621 ccvt = ix86_get_callcvt (fntype);
27622 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27624 /* Fastcall functions use ecx/edx for arguments, which leaves
27625 us with EAX for the static chain.
27626 Thiscall functions use ecx for arguments, which also
27627 leaves us with EAX for the static chain. */
27628 regno = AX_REG;
27630 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27632 /* Thiscall functions use ecx for arguments, which leaves
27633 us with EAX and EDX for the static chain.
27634 We are using for abi-compatibility EAX. */
27635 regno = AX_REG;
27637 else if (ix86_function_regparm (fntype, fndecl) == 3)
27639 /* For regparm 3, we have no free call-clobbered registers in
27640 which to store the static chain. In order to implement this,
27641 we have the trampoline push the static chain to the stack.
27642 However, we can't push a value below the return address when
27643 we call the nested function directly, so we have to use an
27644 alternate entry point. For this we use ESI, and have the
27645 alternate entry point push ESI, so that things appear the
27646 same once we're executing the nested function. */
27647 if (incoming_p)
27649 if (fndecl == current_function_decl)
27650 ix86_static_chain_on_stack = true;
27651 return gen_frame_mem (SImode,
27652 plus_constant (Pmode,
27653 arg_pointer_rtx, -8));
27655 regno = SI_REG;
27659 return gen_rtx_REG (Pmode, regno);
27662 /* Emit RTL insns to initialize the variable parts of a trampoline.
27663 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27664 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27665 to be passed to the target function. */
27667 static void
27668 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27670 rtx mem, fnaddr;
27671 int opcode;
27672 int offset = 0;
27674 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27676 if (TARGET_64BIT)
27678 int size;
27680 /* Load the function address to r11. Try to load address using
27681 the shorter movl instead of movabs. We may want to support
27682 movq for kernel mode, but kernel does not use trampolines at
27683 the moment. FNADDR is a 32bit address and may not be in
27684 DImode when ptr_mode == SImode. Always use movl in this
27685 case. */
27686 if (ptr_mode == SImode
27687 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27689 fnaddr = copy_addr_to_reg (fnaddr);
27691 mem = adjust_address (m_tramp, HImode, offset);
27692 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27694 mem = adjust_address (m_tramp, SImode, offset + 2);
27695 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27696 offset += 6;
27698 else
27700 mem = adjust_address (m_tramp, HImode, offset);
27701 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27703 mem = adjust_address (m_tramp, DImode, offset + 2);
27704 emit_move_insn (mem, fnaddr);
27705 offset += 10;
27708 /* Load static chain using movabs to r10. Use the shorter movl
27709 instead of movabs when ptr_mode == SImode. */
27710 if (ptr_mode == SImode)
27712 opcode = 0xba41;
27713 size = 6;
27715 else
27717 opcode = 0xba49;
27718 size = 10;
27721 mem = adjust_address (m_tramp, HImode, offset);
27722 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27724 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27725 emit_move_insn (mem, chain_value);
27726 offset += size;
27728 /* Jump to r11; the last (unused) byte is a nop, only there to
27729 pad the write out to a single 32-bit store. */
27730 mem = adjust_address (m_tramp, SImode, offset);
27731 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27732 offset += 4;
27734 else
27736 rtx disp, chain;
27738 /* Depending on the static chain location, either load a register
27739 with a constant, or push the constant to the stack. All of the
27740 instructions are the same size. */
27741 chain = ix86_static_chain (fndecl, true);
27742 if (REG_P (chain))
27744 switch (REGNO (chain))
27746 case AX_REG:
27747 opcode = 0xb8; break;
27748 case CX_REG:
27749 opcode = 0xb9; break;
27750 default:
27751 gcc_unreachable ();
27754 else
27755 opcode = 0x68;
27757 mem = adjust_address (m_tramp, QImode, offset);
27758 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27760 mem = adjust_address (m_tramp, SImode, offset + 1);
27761 emit_move_insn (mem, chain_value);
27762 offset += 5;
27764 mem = adjust_address (m_tramp, QImode, offset);
27765 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27767 mem = adjust_address (m_tramp, SImode, offset + 1);
27769 /* Compute offset from the end of the jmp to the target function.
27770 In the case in which the trampoline stores the static chain on
27771 the stack, we need to skip the first insn which pushes the
27772 (call-saved) register static chain; this push is 1 byte. */
27773 offset += 5;
27774 disp = expand_binop (SImode, sub_optab, fnaddr,
27775 plus_constant (Pmode, XEXP (m_tramp, 0),
27776 offset - (MEM_P (chain) ? 1 : 0)),
27777 NULL_RTX, 1, OPTAB_DIRECT);
27778 emit_move_insn (mem, disp);
27781 gcc_assert (offset <= TRAMPOLINE_SIZE);
27783 #ifdef HAVE_ENABLE_EXECUTE_STACK
27784 #ifdef CHECK_EXECUTE_STACK_ENABLED
27785 if (CHECK_EXECUTE_STACK_ENABLED)
27786 #endif
27787 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27788 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27789 #endif
27792 /* The following file contains several enumerations and data structures
27793 built from the definitions in i386-builtin-types.def. */
27795 #include "i386-builtin-types.inc"
27797 /* Table for the ix86 builtin non-function types. */
27798 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27800 /* Retrieve an element from the above table, building some of
27801 the types lazily. */
27803 static tree
27804 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27806 unsigned int index;
27807 tree type, itype;
27809 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27811 type = ix86_builtin_type_tab[(int) tcode];
27812 if (type != NULL)
27813 return type;
27815 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27816 if (tcode <= IX86_BT_LAST_VECT)
27818 machine_mode mode;
27820 index = tcode - IX86_BT_LAST_PRIM - 1;
27821 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27822 mode = ix86_builtin_type_vect_mode[index];
27824 type = build_vector_type_for_mode (itype, mode);
27826 else
27828 int quals;
27830 index = tcode - IX86_BT_LAST_VECT - 1;
27831 if (tcode <= IX86_BT_LAST_PTR)
27832 quals = TYPE_UNQUALIFIED;
27833 else
27834 quals = TYPE_QUAL_CONST;
27836 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27837 if (quals != TYPE_UNQUALIFIED)
27838 itype = build_qualified_type (itype, quals);
27840 type = build_pointer_type (itype);
27843 ix86_builtin_type_tab[(int) tcode] = type;
27844 return type;
27847 /* Table for the ix86 builtin function types. */
27848 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27850 /* Retrieve an element from the above table, building some of
27851 the types lazily. */
27853 static tree
27854 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27856 tree type;
27858 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27860 type = ix86_builtin_func_type_tab[(int) tcode];
27861 if (type != NULL)
27862 return type;
27864 if (tcode <= IX86_BT_LAST_FUNC)
27866 unsigned start = ix86_builtin_func_start[(int) tcode];
27867 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27868 tree rtype, atype, args = void_list_node;
27869 unsigned i;
27871 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27872 for (i = after - 1; i > start; --i)
27874 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27875 args = tree_cons (NULL, atype, args);
27878 type = build_function_type (rtype, args);
27880 else
27882 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27883 enum ix86_builtin_func_type icode;
27885 icode = ix86_builtin_func_alias_base[index];
27886 type = ix86_get_builtin_func_type (icode);
27889 ix86_builtin_func_type_tab[(int) tcode] = type;
27890 return type;
27894 /* Codes for all the SSE/MMX builtins. */
27895 enum ix86_builtins
27897 IX86_BUILTIN_ADDPS,
27898 IX86_BUILTIN_ADDSS,
27899 IX86_BUILTIN_DIVPS,
27900 IX86_BUILTIN_DIVSS,
27901 IX86_BUILTIN_MULPS,
27902 IX86_BUILTIN_MULSS,
27903 IX86_BUILTIN_SUBPS,
27904 IX86_BUILTIN_SUBSS,
27906 IX86_BUILTIN_CMPEQPS,
27907 IX86_BUILTIN_CMPLTPS,
27908 IX86_BUILTIN_CMPLEPS,
27909 IX86_BUILTIN_CMPGTPS,
27910 IX86_BUILTIN_CMPGEPS,
27911 IX86_BUILTIN_CMPNEQPS,
27912 IX86_BUILTIN_CMPNLTPS,
27913 IX86_BUILTIN_CMPNLEPS,
27914 IX86_BUILTIN_CMPNGTPS,
27915 IX86_BUILTIN_CMPNGEPS,
27916 IX86_BUILTIN_CMPORDPS,
27917 IX86_BUILTIN_CMPUNORDPS,
27918 IX86_BUILTIN_CMPEQSS,
27919 IX86_BUILTIN_CMPLTSS,
27920 IX86_BUILTIN_CMPLESS,
27921 IX86_BUILTIN_CMPNEQSS,
27922 IX86_BUILTIN_CMPNLTSS,
27923 IX86_BUILTIN_CMPNLESS,
27924 IX86_BUILTIN_CMPORDSS,
27925 IX86_BUILTIN_CMPUNORDSS,
27927 IX86_BUILTIN_COMIEQSS,
27928 IX86_BUILTIN_COMILTSS,
27929 IX86_BUILTIN_COMILESS,
27930 IX86_BUILTIN_COMIGTSS,
27931 IX86_BUILTIN_COMIGESS,
27932 IX86_BUILTIN_COMINEQSS,
27933 IX86_BUILTIN_UCOMIEQSS,
27934 IX86_BUILTIN_UCOMILTSS,
27935 IX86_BUILTIN_UCOMILESS,
27936 IX86_BUILTIN_UCOMIGTSS,
27937 IX86_BUILTIN_UCOMIGESS,
27938 IX86_BUILTIN_UCOMINEQSS,
27940 IX86_BUILTIN_CVTPI2PS,
27941 IX86_BUILTIN_CVTPS2PI,
27942 IX86_BUILTIN_CVTSI2SS,
27943 IX86_BUILTIN_CVTSI642SS,
27944 IX86_BUILTIN_CVTSS2SI,
27945 IX86_BUILTIN_CVTSS2SI64,
27946 IX86_BUILTIN_CVTTPS2PI,
27947 IX86_BUILTIN_CVTTSS2SI,
27948 IX86_BUILTIN_CVTTSS2SI64,
27950 IX86_BUILTIN_MAXPS,
27951 IX86_BUILTIN_MAXSS,
27952 IX86_BUILTIN_MINPS,
27953 IX86_BUILTIN_MINSS,
27955 IX86_BUILTIN_LOADUPS,
27956 IX86_BUILTIN_STOREUPS,
27957 IX86_BUILTIN_MOVSS,
27959 IX86_BUILTIN_MOVHLPS,
27960 IX86_BUILTIN_MOVLHPS,
27961 IX86_BUILTIN_LOADHPS,
27962 IX86_BUILTIN_LOADLPS,
27963 IX86_BUILTIN_STOREHPS,
27964 IX86_BUILTIN_STORELPS,
27966 IX86_BUILTIN_MASKMOVQ,
27967 IX86_BUILTIN_MOVMSKPS,
27968 IX86_BUILTIN_PMOVMSKB,
27970 IX86_BUILTIN_MOVNTPS,
27971 IX86_BUILTIN_MOVNTQ,
27973 IX86_BUILTIN_LOADDQU,
27974 IX86_BUILTIN_STOREDQU,
27976 IX86_BUILTIN_PACKSSWB,
27977 IX86_BUILTIN_PACKSSDW,
27978 IX86_BUILTIN_PACKUSWB,
27980 IX86_BUILTIN_PADDB,
27981 IX86_BUILTIN_PADDW,
27982 IX86_BUILTIN_PADDD,
27983 IX86_BUILTIN_PADDQ,
27984 IX86_BUILTIN_PADDSB,
27985 IX86_BUILTIN_PADDSW,
27986 IX86_BUILTIN_PADDUSB,
27987 IX86_BUILTIN_PADDUSW,
27988 IX86_BUILTIN_PSUBB,
27989 IX86_BUILTIN_PSUBW,
27990 IX86_BUILTIN_PSUBD,
27991 IX86_BUILTIN_PSUBQ,
27992 IX86_BUILTIN_PSUBSB,
27993 IX86_BUILTIN_PSUBSW,
27994 IX86_BUILTIN_PSUBUSB,
27995 IX86_BUILTIN_PSUBUSW,
27997 IX86_BUILTIN_PAND,
27998 IX86_BUILTIN_PANDN,
27999 IX86_BUILTIN_POR,
28000 IX86_BUILTIN_PXOR,
28002 IX86_BUILTIN_PAVGB,
28003 IX86_BUILTIN_PAVGW,
28005 IX86_BUILTIN_PCMPEQB,
28006 IX86_BUILTIN_PCMPEQW,
28007 IX86_BUILTIN_PCMPEQD,
28008 IX86_BUILTIN_PCMPGTB,
28009 IX86_BUILTIN_PCMPGTW,
28010 IX86_BUILTIN_PCMPGTD,
28012 IX86_BUILTIN_PMADDWD,
28014 IX86_BUILTIN_PMAXSW,
28015 IX86_BUILTIN_PMAXUB,
28016 IX86_BUILTIN_PMINSW,
28017 IX86_BUILTIN_PMINUB,
28019 IX86_BUILTIN_PMULHUW,
28020 IX86_BUILTIN_PMULHW,
28021 IX86_BUILTIN_PMULLW,
28023 IX86_BUILTIN_PSADBW,
28024 IX86_BUILTIN_PSHUFW,
28026 IX86_BUILTIN_PSLLW,
28027 IX86_BUILTIN_PSLLD,
28028 IX86_BUILTIN_PSLLQ,
28029 IX86_BUILTIN_PSRAW,
28030 IX86_BUILTIN_PSRAD,
28031 IX86_BUILTIN_PSRLW,
28032 IX86_BUILTIN_PSRLD,
28033 IX86_BUILTIN_PSRLQ,
28034 IX86_BUILTIN_PSLLWI,
28035 IX86_BUILTIN_PSLLDI,
28036 IX86_BUILTIN_PSLLQI,
28037 IX86_BUILTIN_PSRAWI,
28038 IX86_BUILTIN_PSRADI,
28039 IX86_BUILTIN_PSRLWI,
28040 IX86_BUILTIN_PSRLDI,
28041 IX86_BUILTIN_PSRLQI,
28043 IX86_BUILTIN_PUNPCKHBW,
28044 IX86_BUILTIN_PUNPCKHWD,
28045 IX86_BUILTIN_PUNPCKHDQ,
28046 IX86_BUILTIN_PUNPCKLBW,
28047 IX86_BUILTIN_PUNPCKLWD,
28048 IX86_BUILTIN_PUNPCKLDQ,
28050 IX86_BUILTIN_SHUFPS,
28052 IX86_BUILTIN_RCPPS,
28053 IX86_BUILTIN_RCPSS,
28054 IX86_BUILTIN_RSQRTPS,
28055 IX86_BUILTIN_RSQRTPS_NR,
28056 IX86_BUILTIN_RSQRTSS,
28057 IX86_BUILTIN_RSQRTF,
28058 IX86_BUILTIN_SQRTPS,
28059 IX86_BUILTIN_SQRTPS_NR,
28060 IX86_BUILTIN_SQRTSS,
28062 IX86_BUILTIN_UNPCKHPS,
28063 IX86_BUILTIN_UNPCKLPS,
28065 IX86_BUILTIN_ANDPS,
28066 IX86_BUILTIN_ANDNPS,
28067 IX86_BUILTIN_ORPS,
28068 IX86_BUILTIN_XORPS,
28070 IX86_BUILTIN_EMMS,
28071 IX86_BUILTIN_LDMXCSR,
28072 IX86_BUILTIN_STMXCSR,
28073 IX86_BUILTIN_SFENCE,
28075 IX86_BUILTIN_FXSAVE,
28076 IX86_BUILTIN_FXRSTOR,
28077 IX86_BUILTIN_FXSAVE64,
28078 IX86_BUILTIN_FXRSTOR64,
28080 IX86_BUILTIN_XSAVE,
28081 IX86_BUILTIN_XRSTOR,
28082 IX86_BUILTIN_XSAVE64,
28083 IX86_BUILTIN_XRSTOR64,
28085 IX86_BUILTIN_XSAVEOPT,
28086 IX86_BUILTIN_XSAVEOPT64,
28088 IX86_BUILTIN_XSAVEC,
28089 IX86_BUILTIN_XSAVEC64,
28091 IX86_BUILTIN_XSAVES,
28092 IX86_BUILTIN_XRSTORS,
28093 IX86_BUILTIN_XSAVES64,
28094 IX86_BUILTIN_XRSTORS64,
28096 /* 3DNow! Original */
28097 IX86_BUILTIN_FEMMS,
28098 IX86_BUILTIN_PAVGUSB,
28099 IX86_BUILTIN_PF2ID,
28100 IX86_BUILTIN_PFACC,
28101 IX86_BUILTIN_PFADD,
28102 IX86_BUILTIN_PFCMPEQ,
28103 IX86_BUILTIN_PFCMPGE,
28104 IX86_BUILTIN_PFCMPGT,
28105 IX86_BUILTIN_PFMAX,
28106 IX86_BUILTIN_PFMIN,
28107 IX86_BUILTIN_PFMUL,
28108 IX86_BUILTIN_PFRCP,
28109 IX86_BUILTIN_PFRCPIT1,
28110 IX86_BUILTIN_PFRCPIT2,
28111 IX86_BUILTIN_PFRSQIT1,
28112 IX86_BUILTIN_PFRSQRT,
28113 IX86_BUILTIN_PFSUB,
28114 IX86_BUILTIN_PFSUBR,
28115 IX86_BUILTIN_PI2FD,
28116 IX86_BUILTIN_PMULHRW,
28118 /* 3DNow! Athlon Extensions */
28119 IX86_BUILTIN_PF2IW,
28120 IX86_BUILTIN_PFNACC,
28121 IX86_BUILTIN_PFPNACC,
28122 IX86_BUILTIN_PI2FW,
28123 IX86_BUILTIN_PSWAPDSI,
28124 IX86_BUILTIN_PSWAPDSF,
28126 /* SSE2 */
28127 IX86_BUILTIN_ADDPD,
28128 IX86_BUILTIN_ADDSD,
28129 IX86_BUILTIN_DIVPD,
28130 IX86_BUILTIN_DIVSD,
28131 IX86_BUILTIN_MULPD,
28132 IX86_BUILTIN_MULSD,
28133 IX86_BUILTIN_SUBPD,
28134 IX86_BUILTIN_SUBSD,
28136 IX86_BUILTIN_CMPEQPD,
28137 IX86_BUILTIN_CMPLTPD,
28138 IX86_BUILTIN_CMPLEPD,
28139 IX86_BUILTIN_CMPGTPD,
28140 IX86_BUILTIN_CMPGEPD,
28141 IX86_BUILTIN_CMPNEQPD,
28142 IX86_BUILTIN_CMPNLTPD,
28143 IX86_BUILTIN_CMPNLEPD,
28144 IX86_BUILTIN_CMPNGTPD,
28145 IX86_BUILTIN_CMPNGEPD,
28146 IX86_BUILTIN_CMPORDPD,
28147 IX86_BUILTIN_CMPUNORDPD,
28148 IX86_BUILTIN_CMPEQSD,
28149 IX86_BUILTIN_CMPLTSD,
28150 IX86_BUILTIN_CMPLESD,
28151 IX86_BUILTIN_CMPNEQSD,
28152 IX86_BUILTIN_CMPNLTSD,
28153 IX86_BUILTIN_CMPNLESD,
28154 IX86_BUILTIN_CMPORDSD,
28155 IX86_BUILTIN_CMPUNORDSD,
28157 IX86_BUILTIN_COMIEQSD,
28158 IX86_BUILTIN_COMILTSD,
28159 IX86_BUILTIN_COMILESD,
28160 IX86_BUILTIN_COMIGTSD,
28161 IX86_BUILTIN_COMIGESD,
28162 IX86_BUILTIN_COMINEQSD,
28163 IX86_BUILTIN_UCOMIEQSD,
28164 IX86_BUILTIN_UCOMILTSD,
28165 IX86_BUILTIN_UCOMILESD,
28166 IX86_BUILTIN_UCOMIGTSD,
28167 IX86_BUILTIN_UCOMIGESD,
28168 IX86_BUILTIN_UCOMINEQSD,
28170 IX86_BUILTIN_MAXPD,
28171 IX86_BUILTIN_MAXSD,
28172 IX86_BUILTIN_MINPD,
28173 IX86_BUILTIN_MINSD,
28175 IX86_BUILTIN_ANDPD,
28176 IX86_BUILTIN_ANDNPD,
28177 IX86_BUILTIN_ORPD,
28178 IX86_BUILTIN_XORPD,
28180 IX86_BUILTIN_SQRTPD,
28181 IX86_BUILTIN_SQRTSD,
28183 IX86_BUILTIN_UNPCKHPD,
28184 IX86_BUILTIN_UNPCKLPD,
28186 IX86_BUILTIN_SHUFPD,
28188 IX86_BUILTIN_LOADUPD,
28189 IX86_BUILTIN_STOREUPD,
28190 IX86_BUILTIN_MOVSD,
28192 IX86_BUILTIN_LOADHPD,
28193 IX86_BUILTIN_LOADLPD,
28195 IX86_BUILTIN_CVTDQ2PD,
28196 IX86_BUILTIN_CVTDQ2PS,
28198 IX86_BUILTIN_CVTPD2DQ,
28199 IX86_BUILTIN_CVTPD2PI,
28200 IX86_BUILTIN_CVTPD2PS,
28201 IX86_BUILTIN_CVTTPD2DQ,
28202 IX86_BUILTIN_CVTTPD2PI,
28204 IX86_BUILTIN_CVTPI2PD,
28205 IX86_BUILTIN_CVTSI2SD,
28206 IX86_BUILTIN_CVTSI642SD,
28208 IX86_BUILTIN_CVTSD2SI,
28209 IX86_BUILTIN_CVTSD2SI64,
28210 IX86_BUILTIN_CVTSD2SS,
28211 IX86_BUILTIN_CVTSS2SD,
28212 IX86_BUILTIN_CVTTSD2SI,
28213 IX86_BUILTIN_CVTTSD2SI64,
28215 IX86_BUILTIN_CVTPS2DQ,
28216 IX86_BUILTIN_CVTPS2PD,
28217 IX86_BUILTIN_CVTTPS2DQ,
28219 IX86_BUILTIN_MOVNTI,
28220 IX86_BUILTIN_MOVNTI64,
28221 IX86_BUILTIN_MOVNTPD,
28222 IX86_BUILTIN_MOVNTDQ,
28224 IX86_BUILTIN_MOVQ128,
28226 /* SSE2 MMX */
28227 IX86_BUILTIN_MASKMOVDQU,
28228 IX86_BUILTIN_MOVMSKPD,
28229 IX86_BUILTIN_PMOVMSKB128,
28231 IX86_BUILTIN_PACKSSWB128,
28232 IX86_BUILTIN_PACKSSDW128,
28233 IX86_BUILTIN_PACKUSWB128,
28235 IX86_BUILTIN_PADDB128,
28236 IX86_BUILTIN_PADDW128,
28237 IX86_BUILTIN_PADDD128,
28238 IX86_BUILTIN_PADDQ128,
28239 IX86_BUILTIN_PADDSB128,
28240 IX86_BUILTIN_PADDSW128,
28241 IX86_BUILTIN_PADDUSB128,
28242 IX86_BUILTIN_PADDUSW128,
28243 IX86_BUILTIN_PSUBB128,
28244 IX86_BUILTIN_PSUBW128,
28245 IX86_BUILTIN_PSUBD128,
28246 IX86_BUILTIN_PSUBQ128,
28247 IX86_BUILTIN_PSUBSB128,
28248 IX86_BUILTIN_PSUBSW128,
28249 IX86_BUILTIN_PSUBUSB128,
28250 IX86_BUILTIN_PSUBUSW128,
28252 IX86_BUILTIN_PAND128,
28253 IX86_BUILTIN_PANDN128,
28254 IX86_BUILTIN_POR128,
28255 IX86_BUILTIN_PXOR128,
28257 IX86_BUILTIN_PAVGB128,
28258 IX86_BUILTIN_PAVGW128,
28260 IX86_BUILTIN_PCMPEQB128,
28261 IX86_BUILTIN_PCMPEQW128,
28262 IX86_BUILTIN_PCMPEQD128,
28263 IX86_BUILTIN_PCMPGTB128,
28264 IX86_BUILTIN_PCMPGTW128,
28265 IX86_BUILTIN_PCMPGTD128,
28267 IX86_BUILTIN_PMADDWD128,
28269 IX86_BUILTIN_PMAXSW128,
28270 IX86_BUILTIN_PMAXUB128,
28271 IX86_BUILTIN_PMINSW128,
28272 IX86_BUILTIN_PMINUB128,
28274 IX86_BUILTIN_PMULUDQ,
28275 IX86_BUILTIN_PMULUDQ128,
28276 IX86_BUILTIN_PMULHUW128,
28277 IX86_BUILTIN_PMULHW128,
28278 IX86_BUILTIN_PMULLW128,
28280 IX86_BUILTIN_PSADBW128,
28281 IX86_BUILTIN_PSHUFHW,
28282 IX86_BUILTIN_PSHUFLW,
28283 IX86_BUILTIN_PSHUFD,
28285 IX86_BUILTIN_PSLLDQI128,
28286 IX86_BUILTIN_PSLLWI128,
28287 IX86_BUILTIN_PSLLDI128,
28288 IX86_BUILTIN_PSLLQI128,
28289 IX86_BUILTIN_PSRAWI128,
28290 IX86_BUILTIN_PSRADI128,
28291 IX86_BUILTIN_PSRLDQI128,
28292 IX86_BUILTIN_PSRLWI128,
28293 IX86_BUILTIN_PSRLDI128,
28294 IX86_BUILTIN_PSRLQI128,
28296 IX86_BUILTIN_PSLLDQ128,
28297 IX86_BUILTIN_PSLLW128,
28298 IX86_BUILTIN_PSLLD128,
28299 IX86_BUILTIN_PSLLQ128,
28300 IX86_BUILTIN_PSRAW128,
28301 IX86_BUILTIN_PSRAD128,
28302 IX86_BUILTIN_PSRLW128,
28303 IX86_BUILTIN_PSRLD128,
28304 IX86_BUILTIN_PSRLQ128,
28306 IX86_BUILTIN_PUNPCKHBW128,
28307 IX86_BUILTIN_PUNPCKHWD128,
28308 IX86_BUILTIN_PUNPCKHDQ128,
28309 IX86_BUILTIN_PUNPCKHQDQ128,
28310 IX86_BUILTIN_PUNPCKLBW128,
28311 IX86_BUILTIN_PUNPCKLWD128,
28312 IX86_BUILTIN_PUNPCKLDQ128,
28313 IX86_BUILTIN_PUNPCKLQDQ128,
28315 IX86_BUILTIN_CLFLUSH,
28316 IX86_BUILTIN_MFENCE,
28317 IX86_BUILTIN_LFENCE,
28318 IX86_BUILTIN_PAUSE,
28320 IX86_BUILTIN_FNSTENV,
28321 IX86_BUILTIN_FLDENV,
28322 IX86_BUILTIN_FNSTSW,
28323 IX86_BUILTIN_FNCLEX,
28325 IX86_BUILTIN_BSRSI,
28326 IX86_BUILTIN_BSRDI,
28327 IX86_BUILTIN_RDPMC,
28328 IX86_BUILTIN_RDTSC,
28329 IX86_BUILTIN_RDTSCP,
28330 IX86_BUILTIN_ROLQI,
28331 IX86_BUILTIN_ROLHI,
28332 IX86_BUILTIN_RORQI,
28333 IX86_BUILTIN_RORHI,
28335 /* SSE3. */
28336 IX86_BUILTIN_ADDSUBPS,
28337 IX86_BUILTIN_HADDPS,
28338 IX86_BUILTIN_HSUBPS,
28339 IX86_BUILTIN_MOVSHDUP,
28340 IX86_BUILTIN_MOVSLDUP,
28341 IX86_BUILTIN_ADDSUBPD,
28342 IX86_BUILTIN_HADDPD,
28343 IX86_BUILTIN_HSUBPD,
28344 IX86_BUILTIN_LDDQU,
28346 IX86_BUILTIN_MONITOR,
28347 IX86_BUILTIN_MWAIT,
28349 /* SSSE3. */
28350 IX86_BUILTIN_PHADDW,
28351 IX86_BUILTIN_PHADDD,
28352 IX86_BUILTIN_PHADDSW,
28353 IX86_BUILTIN_PHSUBW,
28354 IX86_BUILTIN_PHSUBD,
28355 IX86_BUILTIN_PHSUBSW,
28356 IX86_BUILTIN_PMADDUBSW,
28357 IX86_BUILTIN_PMULHRSW,
28358 IX86_BUILTIN_PSHUFB,
28359 IX86_BUILTIN_PSIGNB,
28360 IX86_BUILTIN_PSIGNW,
28361 IX86_BUILTIN_PSIGND,
28362 IX86_BUILTIN_PALIGNR,
28363 IX86_BUILTIN_PABSB,
28364 IX86_BUILTIN_PABSW,
28365 IX86_BUILTIN_PABSD,
28367 IX86_BUILTIN_PHADDW128,
28368 IX86_BUILTIN_PHADDD128,
28369 IX86_BUILTIN_PHADDSW128,
28370 IX86_BUILTIN_PHSUBW128,
28371 IX86_BUILTIN_PHSUBD128,
28372 IX86_BUILTIN_PHSUBSW128,
28373 IX86_BUILTIN_PMADDUBSW128,
28374 IX86_BUILTIN_PMULHRSW128,
28375 IX86_BUILTIN_PSHUFB128,
28376 IX86_BUILTIN_PSIGNB128,
28377 IX86_BUILTIN_PSIGNW128,
28378 IX86_BUILTIN_PSIGND128,
28379 IX86_BUILTIN_PALIGNR128,
28380 IX86_BUILTIN_PABSB128,
28381 IX86_BUILTIN_PABSW128,
28382 IX86_BUILTIN_PABSD128,
28384 /* AMDFAM10 - SSE4A New Instructions. */
28385 IX86_BUILTIN_MOVNTSD,
28386 IX86_BUILTIN_MOVNTSS,
28387 IX86_BUILTIN_EXTRQI,
28388 IX86_BUILTIN_EXTRQ,
28389 IX86_BUILTIN_INSERTQI,
28390 IX86_BUILTIN_INSERTQ,
28392 /* SSE4.1. */
28393 IX86_BUILTIN_BLENDPD,
28394 IX86_BUILTIN_BLENDPS,
28395 IX86_BUILTIN_BLENDVPD,
28396 IX86_BUILTIN_BLENDVPS,
28397 IX86_BUILTIN_PBLENDVB128,
28398 IX86_BUILTIN_PBLENDW128,
28400 IX86_BUILTIN_DPPD,
28401 IX86_BUILTIN_DPPS,
28403 IX86_BUILTIN_INSERTPS128,
28405 IX86_BUILTIN_MOVNTDQA,
28406 IX86_BUILTIN_MPSADBW128,
28407 IX86_BUILTIN_PACKUSDW128,
28408 IX86_BUILTIN_PCMPEQQ,
28409 IX86_BUILTIN_PHMINPOSUW128,
28411 IX86_BUILTIN_PMAXSB128,
28412 IX86_BUILTIN_PMAXSD128,
28413 IX86_BUILTIN_PMAXUD128,
28414 IX86_BUILTIN_PMAXUW128,
28416 IX86_BUILTIN_PMINSB128,
28417 IX86_BUILTIN_PMINSD128,
28418 IX86_BUILTIN_PMINUD128,
28419 IX86_BUILTIN_PMINUW128,
28421 IX86_BUILTIN_PMOVSXBW128,
28422 IX86_BUILTIN_PMOVSXBD128,
28423 IX86_BUILTIN_PMOVSXBQ128,
28424 IX86_BUILTIN_PMOVSXWD128,
28425 IX86_BUILTIN_PMOVSXWQ128,
28426 IX86_BUILTIN_PMOVSXDQ128,
28428 IX86_BUILTIN_PMOVZXBW128,
28429 IX86_BUILTIN_PMOVZXBD128,
28430 IX86_BUILTIN_PMOVZXBQ128,
28431 IX86_BUILTIN_PMOVZXWD128,
28432 IX86_BUILTIN_PMOVZXWQ128,
28433 IX86_BUILTIN_PMOVZXDQ128,
28435 IX86_BUILTIN_PMULDQ128,
28436 IX86_BUILTIN_PMULLD128,
28438 IX86_BUILTIN_ROUNDSD,
28439 IX86_BUILTIN_ROUNDSS,
28441 IX86_BUILTIN_ROUNDPD,
28442 IX86_BUILTIN_ROUNDPS,
28444 IX86_BUILTIN_FLOORPD,
28445 IX86_BUILTIN_CEILPD,
28446 IX86_BUILTIN_TRUNCPD,
28447 IX86_BUILTIN_RINTPD,
28448 IX86_BUILTIN_ROUNDPD_AZ,
28450 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
28451 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
28452 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
28454 IX86_BUILTIN_FLOORPS,
28455 IX86_BUILTIN_CEILPS,
28456 IX86_BUILTIN_TRUNCPS,
28457 IX86_BUILTIN_RINTPS,
28458 IX86_BUILTIN_ROUNDPS_AZ,
28460 IX86_BUILTIN_FLOORPS_SFIX,
28461 IX86_BUILTIN_CEILPS_SFIX,
28462 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
28464 IX86_BUILTIN_PTESTZ,
28465 IX86_BUILTIN_PTESTC,
28466 IX86_BUILTIN_PTESTNZC,
28468 IX86_BUILTIN_VEC_INIT_V2SI,
28469 IX86_BUILTIN_VEC_INIT_V4HI,
28470 IX86_BUILTIN_VEC_INIT_V8QI,
28471 IX86_BUILTIN_VEC_EXT_V2DF,
28472 IX86_BUILTIN_VEC_EXT_V2DI,
28473 IX86_BUILTIN_VEC_EXT_V4SF,
28474 IX86_BUILTIN_VEC_EXT_V4SI,
28475 IX86_BUILTIN_VEC_EXT_V8HI,
28476 IX86_BUILTIN_VEC_EXT_V2SI,
28477 IX86_BUILTIN_VEC_EXT_V4HI,
28478 IX86_BUILTIN_VEC_EXT_V16QI,
28479 IX86_BUILTIN_VEC_SET_V2DI,
28480 IX86_BUILTIN_VEC_SET_V4SF,
28481 IX86_BUILTIN_VEC_SET_V4SI,
28482 IX86_BUILTIN_VEC_SET_V8HI,
28483 IX86_BUILTIN_VEC_SET_V4HI,
28484 IX86_BUILTIN_VEC_SET_V16QI,
28486 IX86_BUILTIN_VEC_PACK_SFIX,
28487 IX86_BUILTIN_VEC_PACK_SFIX256,
28489 /* SSE4.2. */
28490 IX86_BUILTIN_CRC32QI,
28491 IX86_BUILTIN_CRC32HI,
28492 IX86_BUILTIN_CRC32SI,
28493 IX86_BUILTIN_CRC32DI,
28495 IX86_BUILTIN_PCMPESTRI128,
28496 IX86_BUILTIN_PCMPESTRM128,
28497 IX86_BUILTIN_PCMPESTRA128,
28498 IX86_BUILTIN_PCMPESTRC128,
28499 IX86_BUILTIN_PCMPESTRO128,
28500 IX86_BUILTIN_PCMPESTRS128,
28501 IX86_BUILTIN_PCMPESTRZ128,
28502 IX86_BUILTIN_PCMPISTRI128,
28503 IX86_BUILTIN_PCMPISTRM128,
28504 IX86_BUILTIN_PCMPISTRA128,
28505 IX86_BUILTIN_PCMPISTRC128,
28506 IX86_BUILTIN_PCMPISTRO128,
28507 IX86_BUILTIN_PCMPISTRS128,
28508 IX86_BUILTIN_PCMPISTRZ128,
28510 IX86_BUILTIN_PCMPGTQ,
28512 /* AES instructions */
28513 IX86_BUILTIN_AESENC128,
28514 IX86_BUILTIN_AESENCLAST128,
28515 IX86_BUILTIN_AESDEC128,
28516 IX86_BUILTIN_AESDECLAST128,
28517 IX86_BUILTIN_AESIMC128,
28518 IX86_BUILTIN_AESKEYGENASSIST128,
28520 /* PCLMUL instruction */
28521 IX86_BUILTIN_PCLMULQDQ128,
28523 /* AVX */
28524 IX86_BUILTIN_ADDPD256,
28525 IX86_BUILTIN_ADDPS256,
28526 IX86_BUILTIN_ADDSUBPD256,
28527 IX86_BUILTIN_ADDSUBPS256,
28528 IX86_BUILTIN_ANDPD256,
28529 IX86_BUILTIN_ANDPS256,
28530 IX86_BUILTIN_ANDNPD256,
28531 IX86_BUILTIN_ANDNPS256,
28532 IX86_BUILTIN_BLENDPD256,
28533 IX86_BUILTIN_BLENDPS256,
28534 IX86_BUILTIN_BLENDVPD256,
28535 IX86_BUILTIN_BLENDVPS256,
28536 IX86_BUILTIN_DIVPD256,
28537 IX86_BUILTIN_DIVPS256,
28538 IX86_BUILTIN_DPPS256,
28539 IX86_BUILTIN_HADDPD256,
28540 IX86_BUILTIN_HADDPS256,
28541 IX86_BUILTIN_HSUBPD256,
28542 IX86_BUILTIN_HSUBPS256,
28543 IX86_BUILTIN_MAXPD256,
28544 IX86_BUILTIN_MAXPS256,
28545 IX86_BUILTIN_MINPD256,
28546 IX86_BUILTIN_MINPS256,
28547 IX86_BUILTIN_MULPD256,
28548 IX86_BUILTIN_MULPS256,
28549 IX86_BUILTIN_ORPD256,
28550 IX86_BUILTIN_ORPS256,
28551 IX86_BUILTIN_SHUFPD256,
28552 IX86_BUILTIN_SHUFPS256,
28553 IX86_BUILTIN_SUBPD256,
28554 IX86_BUILTIN_SUBPS256,
28555 IX86_BUILTIN_XORPD256,
28556 IX86_BUILTIN_XORPS256,
28557 IX86_BUILTIN_CMPSD,
28558 IX86_BUILTIN_CMPSS,
28559 IX86_BUILTIN_CMPPD,
28560 IX86_BUILTIN_CMPPS,
28561 IX86_BUILTIN_CMPPD256,
28562 IX86_BUILTIN_CMPPS256,
28563 IX86_BUILTIN_CVTDQ2PD256,
28564 IX86_BUILTIN_CVTDQ2PS256,
28565 IX86_BUILTIN_CVTPD2PS256,
28566 IX86_BUILTIN_CVTPS2DQ256,
28567 IX86_BUILTIN_CVTPS2PD256,
28568 IX86_BUILTIN_CVTTPD2DQ256,
28569 IX86_BUILTIN_CVTPD2DQ256,
28570 IX86_BUILTIN_CVTTPS2DQ256,
28571 IX86_BUILTIN_EXTRACTF128PD256,
28572 IX86_BUILTIN_EXTRACTF128PS256,
28573 IX86_BUILTIN_EXTRACTF128SI256,
28574 IX86_BUILTIN_VZEROALL,
28575 IX86_BUILTIN_VZEROUPPER,
28576 IX86_BUILTIN_VPERMILVARPD,
28577 IX86_BUILTIN_VPERMILVARPS,
28578 IX86_BUILTIN_VPERMILVARPD256,
28579 IX86_BUILTIN_VPERMILVARPS256,
28580 IX86_BUILTIN_VPERMILPD,
28581 IX86_BUILTIN_VPERMILPS,
28582 IX86_BUILTIN_VPERMILPD256,
28583 IX86_BUILTIN_VPERMILPS256,
28584 IX86_BUILTIN_VPERMIL2PD,
28585 IX86_BUILTIN_VPERMIL2PS,
28586 IX86_BUILTIN_VPERMIL2PD256,
28587 IX86_BUILTIN_VPERMIL2PS256,
28588 IX86_BUILTIN_VPERM2F128PD256,
28589 IX86_BUILTIN_VPERM2F128PS256,
28590 IX86_BUILTIN_VPERM2F128SI256,
28591 IX86_BUILTIN_VBROADCASTSS,
28592 IX86_BUILTIN_VBROADCASTSD256,
28593 IX86_BUILTIN_VBROADCASTSS256,
28594 IX86_BUILTIN_VBROADCASTPD256,
28595 IX86_BUILTIN_VBROADCASTPS256,
28596 IX86_BUILTIN_VINSERTF128PD256,
28597 IX86_BUILTIN_VINSERTF128PS256,
28598 IX86_BUILTIN_VINSERTF128SI256,
28599 IX86_BUILTIN_LOADUPD256,
28600 IX86_BUILTIN_LOADUPS256,
28601 IX86_BUILTIN_STOREUPD256,
28602 IX86_BUILTIN_STOREUPS256,
28603 IX86_BUILTIN_LDDQU256,
28604 IX86_BUILTIN_MOVNTDQ256,
28605 IX86_BUILTIN_MOVNTPD256,
28606 IX86_BUILTIN_MOVNTPS256,
28607 IX86_BUILTIN_LOADDQU256,
28608 IX86_BUILTIN_STOREDQU256,
28609 IX86_BUILTIN_MASKLOADPD,
28610 IX86_BUILTIN_MASKLOADPS,
28611 IX86_BUILTIN_MASKSTOREPD,
28612 IX86_BUILTIN_MASKSTOREPS,
28613 IX86_BUILTIN_MASKLOADPD256,
28614 IX86_BUILTIN_MASKLOADPS256,
28615 IX86_BUILTIN_MASKSTOREPD256,
28616 IX86_BUILTIN_MASKSTOREPS256,
28617 IX86_BUILTIN_MOVSHDUP256,
28618 IX86_BUILTIN_MOVSLDUP256,
28619 IX86_BUILTIN_MOVDDUP256,
28621 IX86_BUILTIN_SQRTPD256,
28622 IX86_BUILTIN_SQRTPS256,
28623 IX86_BUILTIN_SQRTPS_NR256,
28624 IX86_BUILTIN_RSQRTPS256,
28625 IX86_BUILTIN_RSQRTPS_NR256,
28627 IX86_BUILTIN_RCPPS256,
28629 IX86_BUILTIN_ROUNDPD256,
28630 IX86_BUILTIN_ROUNDPS256,
28632 IX86_BUILTIN_FLOORPD256,
28633 IX86_BUILTIN_CEILPD256,
28634 IX86_BUILTIN_TRUNCPD256,
28635 IX86_BUILTIN_RINTPD256,
28636 IX86_BUILTIN_ROUNDPD_AZ256,
28638 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28639 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28640 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28642 IX86_BUILTIN_FLOORPS256,
28643 IX86_BUILTIN_CEILPS256,
28644 IX86_BUILTIN_TRUNCPS256,
28645 IX86_BUILTIN_RINTPS256,
28646 IX86_BUILTIN_ROUNDPS_AZ256,
28648 IX86_BUILTIN_FLOORPS_SFIX256,
28649 IX86_BUILTIN_CEILPS_SFIX256,
28650 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28652 IX86_BUILTIN_UNPCKHPD256,
28653 IX86_BUILTIN_UNPCKLPD256,
28654 IX86_BUILTIN_UNPCKHPS256,
28655 IX86_BUILTIN_UNPCKLPS256,
28657 IX86_BUILTIN_SI256_SI,
28658 IX86_BUILTIN_PS256_PS,
28659 IX86_BUILTIN_PD256_PD,
28660 IX86_BUILTIN_SI_SI256,
28661 IX86_BUILTIN_PS_PS256,
28662 IX86_BUILTIN_PD_PD256,
28664 IX86_BUILTIN_VTESTZPD,
28665 IX86_BUILTIN_VTESTCPD,
28666 IX86_BUILTIN_VTESTNZCPD,
28667 IX86_BUILTIN_VTESTZPS,
28668 IX86_BUILTIN_VTESTCPS,
28669 IX86_BUILTIN_VTESTNZCPS,
28670 IX86_BUILTIN_VTESTZPD256,
28671 IX86_BUILTIN_VTESTCPD256,
28672 IX86_BUILTIN_VTESTNZCPD256,
28673 IX86_BUILTIN_VTESTZPS256,
28674 IX86_BUILTIN_VTESTCPS256,
28675 IX86_BUILTIN_VTESTNZCPS256,
28676 IX86_BUILTIN_PTESTZ256,
28677 IX86_BUILTIN_PTESTC256,
28678 IX86_BUILTIN_PTESTNZC256,
28680 IX86_BUILTIN_MOVMSKPD256,
28681 IX86_BUILTIN_MOVMSKPS256,
28683 /* AVX2 */
28684 IX86_BUILTIN_MPSADBW256,
28685 IX86_BUILTIN_PABSB256,
28686 IX86_BUILTIN_PABSW256,
28687 IX86_BUILTIN_PABSD256,
28688 IX86_BUILTIN_PACKSSDW256,
28689 IX86_BUILTIN_PACKSSWB256,
28690 IX86_BUILTIN_PACKUSDW256,
28691 IX86_BUILTIN_PACKUSWB256,
28692 IX86_BUILTIN_PADDB256,
28693 IX86_BUILTIN_PADDW256,
28694 IX86_BUILTIN_PADDD256,
28695 IX86_BUILTIN_PADDQ256,
28696 IX86_BUILTIN_PADDSB256,
28697 IX86_BUILTIN_PADDSW256,
28698 IX86_BUILTIN_PADDUSB256,
28699 IX86_BUILTIN_PADDUSW256,
28700 IX86_BUILTIN_PALIGNR256,
28701 IX86_BUILTIN_AND256I,
28702 IX86_BUILTIN_ANDNOT256I,
28703 IX86_BUILTIN_PAVGB256,
28704 IX86_BUILTIN_PAVGW256,
28705 IX86_BUILTIN_PBLENDVB256,
28706 IX86_BUILTIN_PBLENDVW256,
28707 IX86_BUILTIN_PCMPEQB256,
28708 IX86_BUILTIN_PCMPEQW256,
28709 IX86_BUILTIN_PCMPEQD256,
28710 IX86_BUILTIN_PCMPEQQ256,
28711 IX86_BUILTIN_PCMPGTB256,
28712 IX86_BUILTIN_PCMPGTW256,
28713 IX86_BUILTIN_PCMPGTD256,
28714 IX86_BUILTIN_PCMPGTQ256,
28715 IX86_BUILTIN_PHADDW256,
28716 IX86_BUILTIN_PHADDD256,
28717 IX86_BUILTIN_PHADDSW256,
28718 IX86_BUILTIN_PHSUBW256,
28719 IX86_BUILTIN_PHSUBD256,
28720 IX86_BUILTIN_PHSUBSW256,
28721 IX86_BUILTIN_PMADDUBSW256,
28722 IX86_BUILTIN_PMADDWD256,
28723 IX86_BUILTIN_PMAXSB256,
28724 IX86_BUILTIN_PMAXSW256,
28725 IX86_BUILTIN_PMAXSD256,
28726 IX86_BUILTIN_PMAXUB256,
28727 IX86_BUILTIN_PMAXUW256,
28728 IX86_BUILTIN_PMAXUD256,
28729 IX86_BUILTIN_PMINSB256,
28730 IX86_BUILTIN_PMINSW256,
28731 IX86_BUILTIN_PMINSD256,
28732 IX86_BUILTIN_PMINUB256,
28733 IX86_BUILTIN_PMINUW256,
28734 IX86_BUILTIN_PMINUD256,
28735 IX86_BUILTIN_PMOVMSKB256,
28736 IX86_BUILTIN_PMOVSXBW256,
28737 IX86_BUILTIN_PMOVSXBD256,
28738 IX86_BUILTIN_PMOVSXBQ256,
28739 IX86_BUILTIN_PMOVSXWD256,
28740 IX86_BUILTIN_PMOVSXWQ256,
28741 IX86_BUILTIN_PMOVSXDQ256,
28742 IX86_BUILTIN_PMOVZXBW256,
28743 IX86_BUILTIN_PMOVZXBD256,
28744 IX86_BUILTIN_PMOVZXBQ256,
28745 IX86_BUILTIN_PMOVZXWD256,
28746 IX86_BUILTIN_PMOVZXWQ256,
28747 IX86_BUILTIN_PMOVZXDQ256,
28748 IX86_BUILTIN_PMULDQ256,
28749 IX86_BUILTIN_PMULHRSW256,
28750 IX86_BUILTIN_PMULHUW256,
28751 IX86_BUILTIN_PMULHW256,
28752 IX86_BUILTIN_PMULLW256,
28753 IX86_BUILTIN_PMULLD256,
28754 IX86_BUILTIN_PMULUDQ256,
28755 IX86_BUILTIN_POR256,
28756 IX86_BUILTIN_PSADBW256,
28757 IX86_BUILTIN_PSHUFB256,
28758 IX86_BUILTIN_PSHUFD256,
28759 IX86_BUILTIN_PSHUFHW256,
28760 IX86_BUILTIN_PSHUFLW256,
28761 IX86_BUILTIN_PSIGNB256,
28762 IX86_BUILTIN_PSIGNW256,
28763 IX86_BUILTIN_PSIGND256,
28764 IX86_BUILTIN_PSLLDQI256,
28765 IX86_BUILTIN_PSLLWI256,
28766 IX86_BUILTIN_PSLLW256,
28767 IX86_BUILTIN_PSLLDI256,
28768 IX86_BUILTIN_PSLLD256,
28769 IX86_BUILTIN_PSLLQI256,
28770 IX86_BUILTIN_PSLLQ256,
28771 IX86_BUILTIN_PSRAWI256,
28772 IX86_BUILTIN_PSRAW256,
28773 IX86_BUILTIN_PSRADI256,
28774 IX86_BUILTIN_PSRAD256,
28775 IX86_BUILTIN_PSRLDQI256,
28776 IX86_BUILTIN_PSRLWI256,
28777 IX86_BUILTIN_PSRLW256,
28778 IX86_BUILTIN_PSRLDI256,
28779 IX86_BUILTIN_PSRLD256,
28780 IX86_BUILTIN_PSRLQI256,
28781 IX86_BUILTIN_PSRLQ256,
28782 IX86_BUILTIN_PSUBB256,
28783 IX86_BUILTIN_PSUBW256,
28784 IX86_BUILTIN_PSUBD256,
28785 IX86_BUILTIN_PSUBQ256,
28786 IX86_BUILTIN_PSUBSB256,
28787 IX86_BUILTIN_PSUBSW256,
28788 IX86_BUILTIN_PSUBUSB256,
28789 IX86_BUILTIN_PSUBUSW256,
28790 IX86_BUILTIN_PUNPCKHBW256,
28791 IX86_BUILTIN_PUNPCKHWD256,
28792 IX86_BUILTIN_PUNPCKHDQ256,
28793 IX86_BUILTIN_PUNPCKHQDQ256,
28794 IX86_BUILTIN_PUNPCKLBW256,
28795 IX86_BUILTIN_PUNPCKLWD256,
28796 IX86_BUILTIN_PUNPCKLDQ256,
28797 IX86_BUILTIN_PUNPCKLQDQ256,
28798 IX86_BUILTIN_PXOR256,
28799 IX86_BUILTIN_MOVNTDQA256,
28800 IX86_BUILTIN_VBROADCASTSS_PS,
28801 IX86_BUILTIN_VBROADCASTSS_PS256,
28802 IX86_BUILTIN_VBROADCASTSD_PD256,
28803 IX86_BUILTIN_VBROADCASTSI256,
28804 IX86_BUILTIN_PBLENDD256,
28805 IX86_BUILTIN_PBLENDD128,
28806 IX86_BUILTIN_PBROADCASTB256,
28807 IX86_BUILTIN_PBROADCASTW256,
28808 IX86_BUILTIN_PBROADCASTD256,
28809 IX86_BUILTIN_PBROADCASTQ256,
28810 IX86_BUILTIN_PBROADCASTB128,
28811 IX86_BUILTIN_PBROADCASTW128,
28812 IX86_BUILTIN_PBROADCASTD128,
28813 IX86_BUILTIN_PBROADCASTQ128,
28814 IX86_BUILTIN_VPERMVARSI256,
28815 IX86_BUILTIN_VPERMDF256,
28816 IX86_BUILTIN_VPERMVARSF256,
28817 IX86_BUILTIN_VPERMDI256,
28818 IX86_BUILTIN_VPERMTI256,
28819 IX86_BUILTIN_VEXTRACT128I256,
28820 IX86_BUILTIN_VINSERT128I256,
28821 IX86_BUILTIN_MASKLOADD,
28822 IX86_BUILTIN_MASKLOADQ,
28823 IX86_BUILTIN_MASKLOADD256,
28824 IX86_BUILTIN_MASKLOADQ256,
28825 IX86_BUILTIN_MASKSTORED,
28826 IX86_BUILTIN_MASKSTOREQ,
28827 IX86_BUILTIN_MASKSTORED256,
28828 IX86_BUILTIN_MASKSTOREQ256,
28829 IX86_BUILTIN_PSLLVV4DI,
28830 IX86_BUILTIN_PSLLVV2DI,
28831 IX86_BUILTIN_PSLLVV8SI,
28832 IX86_BUILTIN_PSLLVV4SI,
28833 IX86_BUILTIN_PSRAVV8SI,
28834 IX86_BUILTIN_PSRAVV4SI,
28835 IX86_BUILTIN_PSRLVV4DI,
28836 IX86_BUILTIN_PSRLVV2DI,
28837 IX86_BUILTIN_PSRLVV8SI,
28838 IX86_BUILTIN_PSRLVV4SI,
28840 IX86_BUILTIN_GATHERSIV2DF,
28841 IX86_BUILTIN_GATHERSIV4DF,
28842 IX86_BUILTIN_GATHERDIV2DF,
28843 IX86_BUILTIN_GATHERDIV4DF,
28844 IX86_BUILTIN_GATHERSIV4SF,
28845 IX86_BUILTIN_GATHERSIV8SF,
28846 IX86_BUILTIN_GATHERDIV4SF,
28847 IX86_BUILTIN_GATHERDIV8SF,
28848 IX86_BUILTIN_GATHERSIV2DI,
28849 IX86_BUILTIN_GATHERSIV4DI,
28850 IX86_BUILTIN_GATHERDIV2DI,
28851 IX86_BUILTIN_GATHERDIV4DI,
28852 IX86_BUILTIN_GATHERSIV4SI,
28853 IX86_BUILTIN_GATHERSIV8SI,
28854 IX86_BUILTIN_GATHERDIV4SI,
28855 IX86_BUILTIN_GATHERDIV8SI,
28857 /* AVX512F */
28858 IX86_BUILTIN_SI512_SI256,
28859 IX86_BUILTIN_PD512_PD256,
28860 IX86_BUILTIN_PS512_PS256,
28861 IX86_BUILTIN_SI512_SI,
28862 IX86_BUILTIN_PD512_PD,
28863 IX86_BUILTIN_PS512_PS,
28864 IX86_BUILTIN_ADDPD512,
28865 IX86_BUILTIN_ADDPS512,
28866 IX86_BUILTIN_ADDSD_ROUND,
28867 IX86_BUILTIN_ADDSS_ROUND,
28868 IX86_BUILTIN_ALIGND512,
28869 IX86_BUILTIN_ALIGNQ512,
28870 IX86_BUILTIN_BLENDMD512,
28871 IX86_BUILTIN_BLENDMPD512,
28872 IX86_BUILTIN_BLENDMPS512,
28873 IX86_BUILTIN_BLENDMQ512,
28874 IX86_BUILTIN_BROADCASTF32X4_512,
28875 IX86_BUILTIN_BROADCASTF64X4_512,
28876 IX86_BUILTIN_BROADCASTI32X4_512,
28877 IX86_BUILTIN_BROADCASTI64X4_512,
28878 IX86_BUILTIN_BROADCASTSD512,
28879 IX86_BUILTIN_BROADCASTSS512,
28880 IX86_BUILTIN_CMPD512,
28881 IX86_BUILTIN_CMPPD512,
28882 IX86_BUILTIN_CMPPS512,
28883 IX86_BUILTIN_CMPQ512,
28884 IX86_BUILTIN_CMPSD_MASK,
28885 IX86_BUILTIN_CMPSS_MASK,
28886 IX86_BUILTIN_COMIDF,
28887 IX86_BUILTIN_COMISF,
28888 IX86_BUILTIN_COMPRESSPD512,
28889 IX86_BUILTIN_COMPRESSPDSTORE512,
28890 IX86_BUILTIN_COMPRESSPS512,
28891 IX86_BUILTIN_COMPRESSPSSTORE512,
28892 IX86_BUILTIN_CVTDQ2PD512,
28893 IX86_BUILTIN_CVTDQ2PS512,
28894 IX86_BUILTIN_CVTPD2DQ512,
28895 IX86_BUILTIN_CVTPD2PS512,
28896 IX86_BUILTIN_CVTPD2UDQ512,
28897 IX86_BUILTIN_CVTPH2PS512,
28898 IX86_BUILTIN_CVTPS2DQ512,
28899 IX86_BUILTIN_CVTPS2PD512,
28900 IX86_BUILTIN_CVTPS2PH512,
28901 IX86_BUILTIN_CVTPS2UDQ512,
28902 IX86_BUILTIN_CVTSD2SS_ROUND,
28903 IX86_BUILTIN_CVTSI2SD64,
28904 IX86_BUILTIN_CVTSI2SS32,
28905 IX86_BUILTIN_CVTSI2SS64,
28906 IX86_BUILTIN_CVTSS2SD_ROUND,
28907 IX86_BUILTIN_CVTTPD2DQ512,
28908 IX86_BUILTIN_CVTTPD2UDQ512,
28909 IX86_BUILTIN_CVTTPS2DQ512,
28910 IX86_BUILTIN_CVTTPS2UDQ512,
28911 IX86_BUILTIN_CVTUDQ2PD512,
28912 IX86_BUILTIN_CVTUDQ2PS512,
28913 IX86_BUILTIN_CVTUSI2SD32,
28914 IX86_BUILTIN_CVTUSI2SD64,
28915 IX86_BUILTIN_CVTUSI2SS32,
28916 IX86_BUILTIN_CVTUSI2SS64,
28917 IX86_BUILTIN_DIVPD512,
28918 IX86_BUILTIN_DIVPS512,
28919 IX86_BUILTIN_DIVSD_ROUND,
28920 IX86_BUILTIN_DIVSS_ROUND,
28921 IX86_BUILTIN_EXPANDPD512,
28922 IX86_BUILTIN_EXPANDPD512Z,
28923 IX86_BUILTIN_EXPANDPDLOAD512,
28924 IX86_BUILTIN_EXPANDPDLOAD512Z,
28925 IX86_BUILTIN_EXPANDPS512,
28926 IX86_BUILTIN_EXPANDPS512Z,
28927 IX86_BUILTIN_EXPANDPSLOAD512,
28928 IX86_BUILTIN_EXPANDPSLOAD512Z,
28929 IX86_BUILTIN_EXTRACTF32X4,
28930 IX86_BUILTIN_EXTRACTF64X4,
28931 IX86_BUILTIN_EXTRACTI32X4,
28932 IX86_BUILTIN_EXTRACTI64X4,
28933 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28934 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28935 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28936 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28937 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28938 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28939 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28940 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28941 IX86_BUILTIN_GETEXPPD512,
28942 IX86_BUILTIN_GETEXPPS512,
28943 IX86_BUILTIN_GETEXPSD128,
28944 IX86_BUILTIN_GETEXPSS128,
28945 IX86_BUILTIN_GETMANTPD512,
28946 IX86_BUILTIN_GETMANTPS512,
28947 IX86_BUILTIN_GETMANTSD128,
28948 IX86_BUILTIN_GETMANTSS128,
28949 IX86_BUILTIN_INSERTF32X4,
28950 IX86_BUILTIN_INSERTF64X4,
28951 IX86_BUILTIN_INSERTI32X4,
28952 IX86_BUILTIN_INSERTI64X4,
28953 IX86_BUILTIN_LOADAPD512,
28954 IX86_BUILTIN_LOADAPS512,
28955 IX86_BUILTIN_LOADDQUDI512,
28956 IX86_BUILTIN_LOADDQUSI512,
28957 IX86_BUILTIN_LOADUPD512,
28958 IX86_BUILTIN_LOADUPS512,
28959 IX86_BUILTIN_MAXPD512,
28960 IX86_BUILTIN_MAXPS512,
28961 IX86_BUILTIN_MAXSD_ROUND,
28962 IX86_BUILTIN_MAXSS_ROUND,
28963 IX86_BUILTIN_MINPD512,
28964 IX86_BUILTIN_MINPS512,
28965 IX86_BUILTIN_MINSD_ROUND,
28966 IX86_BUILTIN_MINSS_ROUND,
28967 IX86_BUILTIN_MOVAPD512,
28968 IX86_BUILTIN_MOVAPS512,
28969 IX86_BUILTIN_MOVDDUP512,
28970 IX86_BUILTIN_MOVDQA32LOAD512,
28971 IX86_BUILTIN_MOVDQA32STORE512,
28972 IX86_BUILTIN_MOVDQA32_512,
28973 IX86_BUILTIN_MOVDQA64LOAD512,
28974 IX86_BUILTIN_MOVDQA64STORE512,
28975 IX86_BUILTIN_MOVDQA64_512,
28976 IX86_BUILTIN_MOVNTDQ512,
28977 IX86_BUILTIN_MOVNTDQA512,
28978 IX86_BUILTIN_MOVNTPD512,
28979 IX86_BUILTIN_MOVNTPS512,
28980 IX86_BUILTIN_MOVSHDUP512,
28981 IX86_BUILTIN_MOVSLDUP512,
28982 IX86_BUILTIN_MULPD512,
28983 IX86_BUILTIN_MULPS512,
28984 IX86_BUILTIN_MULSD_ROUND,
28985 IX86_BUILTIN_MULSS_ROUND,
28986 IX86_BUILTIN_PABSD512,
28987 IX86_BUILTIN_PABSQ512,
28988 IX86_BUILTIN_PADDD512,
28989 IX86_BUILTIN_PADDQ512,
28990 IX86_BUILTIN_PANDD512,
28991 IX86_BUILTIN_PANDND512,
28992 IX86_BUILTIN_PANDNQ512,
28993 IX86_BUILTIN_PANDQ512,
28994 IX86_BUILTIN_PBROADCASTD512,
28995 IX86_BUILTIN_PBROADCASTD512_GPR,
28996 IX86_BUILTIN_PBROADCASTMB512,
28997 IX86_BUILTIN_PBROADCASTMW512,
28998 IX86_BUILTIN_PBROADCASTQ512,
28999 IX86_BUILTIN_PBROADCASTQ512_GPR,
29000 IX86_BUILTIN_PCMPEQD512_MASK,
29001 IX86_BUILTIN_PCMPEQQ512_MASK,
29002 IX86_BUILTIN_PCMPGTD512_MASK,
29003 IX86_BUILTIN_PCMPGTQ512_MASK,
29004 IX86_BUILTIN_PCOMPRESSD512,
29005 IX86_BUILTIN_PCOMPRESSDSTORE512,
29006 IX86_BUILTIN_PCOMPRESSQ512,
29007 IX86_BUILTIN_PCOMPRESSQSTORE512,
29008 IX86_BUILTIN_PEXPANDD512,
29009 IX86_BUILTIN_PEXPANDD512Z,
29010 IX86_BUILTIN_PEXPANDDLOAD512,
29011 IX86_BUILTIN_PEXPANDDLOAD512Z,
29012 IX86_BUILTIN_PEXPANDQ512,
29013 IX86_BUILTIN_PEXPANDQ512Z,
29014 IX86_BUILTIN_PEXPANDQLOAD512,
29015 IX86_BUILTIN_PEXPANDQLOAD512Z,
29016 IX86_BUILTIN_PMAXSD512,
29017 IX86_BUILTIN_PMAXSQ512,
29018 IX86_BUILTIN_PMAXUD512,
29019 IX86_BUILTIN_PMAXUQ512,
29020 IX86_BUILTIN_PMINSD512,
29021 IX86_BUILTIN_PMINSQ512,
29022 IX86_BUILTIN_PMINUD512,
29023 IX86_BUILTIN_PMINUQ512,
29024 IX86_BUILTIN_PMOVDB512,
29025 IX86_BUILTIN_PMOVDB512_MEM,
29026 IX86_BUILTIN_PMOVDW512,
29027 IX86_BUILTIN_PMOVDW512_MEM,
29028 IX86_BUILTIN_PMOVQB512,
29029 IX86_BUILTIN_PMOVQB512_MEM,
29030 IX86_BUILTIN_PMOVQD512,
29031 IX86_BUILTIN_PMOVQD512_MEM,
29032 IX86_BUILTIN_PMOVQW512,
29033 IX86_BUILTIN_PMOVQW512_MEM,
29034 IX86_BUILTIN_PMOVSDB512,
29035 IX86_BUILTIN_PMOVSDB512_MEM,
29036 IX86_BUILTIN_PMOVSDW512,
29037 IX86_BUILTIN_PMOVSDW512_MEM,
29038 IX86_BUILTIN_PMOVSQB512,
29039 IX86_BUILTIN_PMOVSQB512_MEM,
29040 IX86_BUILTIN_PMOVSQD512,
29041 IX86_BUILTIN_PMOVSQD512_MEM,
29042 IX86_BUILTIN_PMOVSQW512,
29043 IX86_BUILTIN_PMOVSQW512_MEM,
29044 IX86_BUILTIN_PMOVSXBD512,
29045 IX86_BUILTIN_PMOVSXBQ512,
29046 IX86_BUILTIN_PMOVSXDQ512,
29047 IX86_BUILTIN_PMOVSXWD512,
29048 IX86_BUILTIN_PMOVSXWQ512,
29049 IX86_BUILTIN_PMOVUSDB512,
29050 IX86_BUILTIN_PMOVUSDB512_MEM,
29051 IX86_BUILTIN_PMOVUSDW512,
29052 IX86_BUILTIN_PMOVUSDW512_MEM,
29053 IX86_BUILTIN_PMOVUSQB512,
29054 IX86_BUILTIN_PMOVUSQB512_MEM,
29055 IX86_BUILTIN_PMOVUSQD512,
29056 IX86_BUILTIN_PMOVUSQD512_MEM,
29057 IX86_BUILTIN_PMOVUSQW512,
29058 IX86_BUILTIN_PMOVUSQW512_MEM,
29059 IX86_BUILTIN_PMOVZXBD512,
29060 IX86_BUILTIN_PMOVZXBQ512,
29061 IX86_BUILTIN_PMOVZXDQ512,
29062 IX86_BUILTIN_PMOVZXWD512,
29063 IX86_BUILTIN_PMOVZXWQ512,
29064 IX86_BUILTIN_PMULDQ512,
29065 IX86_BUILTIN_PMULLD512,
29066 IX86_BUILTIN_PMULUDQ512,
29067 IX86_BUILTIN_PORD512,
29068 IX86_BUILTIN_PORQ512,
29069 IX86_BUILTIN_PROLD512,
29070 IX86_BUILTIN_PROLQ512,
29071 IX86_BUILTIN_PROLVD512,
29072 IX86_BUILTIN_PROLVQ512,
29073 IX86_BUILTIN_PRORD512,
29074 IX86_BUILTIN_PRORQ512,
29075 IX86_BUILTIN_PRORVD512,
29076 IX86_BUILTIN_PRORVQ512,
29077 IX86_BUILTIN_PSHUFD512,
29078 IX86_BUILTIN_PSLLD512,
29079 IX86_BUILTIN_PSLLDI512,
29080 IX86_BUILTIN_PSLLQ512,
29081 IX86_BUILTIN_PSLLQI512,
29082 IX86_BUILTIN_PSLLVV16SI,
29083 IX86_BUILTIN_PSLLVV8DI,
29084 IX86_BUILTIN_PSRAD512,
29085 IX86_BUILTIN_PSRADI512,
29086 IX86_BUILTIN_PSRAQ512,
29087 IX86_BUILTIN_PSRAQI512,
29088 IX86_BUILTIN_PSRAVV16SI,
29089 IX86_BUILTIN_PSRAVV8DI,
29090 IX86_BUILTIN_PSRLD512,
29091 IX86_BUILTIN_PSRLDI512,
29092 IX86_BUILTIN_PSRLQ512,
29093 IX86_BUILTIN_PSRLQI512,
29094 IX86_BUILTIN_PSRLVV16SI,
29095 IX86_BUILTIN_PSRLVV8DI,
29096 IX86_BUILTIN_PSUBD512,
29097 IX86_BUILTIN_PSUBQ512,
29098 IX86_BUILTIN_PTESTMD512,
29099 IX86_BUILTIN_PTESTMQ512,
29100 IX86_BUILTIN_PTESTNMD512,
29101 IX86_BUILTIN_PTESTNMQ512,
29102 IX86_BUILTIN_PUNPCKHDQ512,
29103 IX86_BUILTIN_PUNPCKHQDQ512,
29104 IX86_BUILTIN_PUNPCKLDQ512,
29105 IX86_BUILTIN_PUNPCKLQDQ512,
29106 IX86_BUILTIN_PXORD512,
29107 IX86_BUILTIN_PXORQ512,
29108 IX86_BUILTIN_RCP14PD512,
29109 IX86_BUILTIN_RCP14PS512,
29110 IX86_BUILTIN_RCP14SD,
29111 IX86_BUILTIN_RCP14SS,
29112 IX86_BUILTIN_RNDSCALEPD,
29113 IX86_BUILTIN_RNDSCALEPS,
29114 IX86_BUILTIN_RNDSCALESD,
29115 IX86_BUILTIN_RNDSCALESS,
29116 IX86_BUILTIN_RSQRT14PD512,
29117 IX86_BUILTIN_RSQRT14PS512,
29118 IX86_BUILTIN_RSQRT14SD,
29119 IX86_BUILTIN_RSQRT14SS,
29120 IX86_BUILTIN_SCALEFPD512,
29121 IX86_BUILTIN_SCALEFPS512,
29122 IX86_BUILTIN_SCALEFSD,
29123 IX86_BUILTIN_SCALEFSS,
29124 IX86_BUILTIN_SHUFPD512,
29125 IX86_BUILTIN_SHUFPS512,
29126 IX86_BUILTIN_SHUF_F32x4,
29127 IX86_BUILTIN_SHUF_F64x2,
29128 IX86_BUILTIN_SHUF_I32x4,
29129 IX86_BUILTIN_SHUF_I64x2,
29130 IX86_BUILTIN_SQRTPD512,
29131 IX86_BUILTIN_SQRTPD512_MASK,
29132 IX86_BUILTIN_SQRTPS512_MASK,
29133 IX86_BUILTIN_SQRTPS_NR512,
29134 IX86_BUILTIN_SQRTSD_ROUND,
29135 IX86_BUILTIN_SQRTSS_ROUND,
29136 IX86_BUILTIN_STOREAPD512,
29137 IX86_BUILTIN_STOREAPS512,
29138 IX86_BUILTIN_STOREDQUDI512,
29139 IX86_BUILTIN_STOREDQUSI512,
29140 IX86_BUILTIN_STOREUPD512,
29141 IX86_BUILTIN_STOREUPS512,
29142 IX86_BUILTIN_SUBPD512,
29143 IX86_BUILTIN_SUBPS512,
29144 IX86_BUILTIN_SUBSD_ROUND,
29145 IX86_BUILTIN_SUBSS_ROUND,
29146 IX86_BUILTIN_UCMPD512,
29147 IX86_BUILTIN_UCMPQ512,
29148 IX86_BUILTIN_UNPCKHPD512,
29149 IX86_BUILTIN_UNPCKHPS512,
29150 IX86_BUILTIN_UNPCKLPD512,
29151 IX86_BUILTIN_UNPCKLPS512,
29152 IX86_BUILTIN_VCVTSD2SI32,
29153 IX86_BUILTIN_VCVTSD2SI64,
29154 IX86_BUILTIN_VCVTSD2USI32,
29155 IX86_BUILTIN_VCVTSD2USI64,
29156 IX86_BUILTIN_VCVTSS2SI32,
29157 IX86_BUILTIN_VCVTSS2SI64,
29158 IX86_BUILTIN_VCVTSS2USI32,
29159 IX86_BUILTIN_VCVTSS2USI64,
29160 IX86_BUILTIN_VCVTTSD2SI32,
29161 IX86_BUILTIN_VCVTTSD2SI64,
29162 IX86_BUILTIN_VCVTTSD2USI32,
29163 IX86_BUILTIN_VCVTTSD2USI64,
29164 IX86_BUILTIN_VCVTTSS2SI32,
29165 IX86_BUILTIN_VCVTTSS2SI64,
29166 IX86_BUILTIN_VCVTTSS2USI32,
29167 IX86_BUILTIN_VCVTTSS2USI64,
29168 IX86_BUILTIN_VFMADDPD512_MASK,
29169 IX86_BUILTIN_VFMADDPD512_MASK3,
29170 IX86_BUILTIN_VFMADDPD512_MASKZ,
29171 IX86_BUILTIN_VFMADDPS512_MASK,
29172 IX86_BUILTIN_VFMADDPS512_MASK3,
29173 IX86_BUILTIN_VFMADDPS512_MASKZ,
29174 IX86_BUILTIN_VFMADDSD3_ROUND,
29175 IX86_BUILTIN_VFMADDSS3_ROUND,
29176 IX86_BUILTIN_VFMADDSUBPD512_MASK,
29177 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
29178 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
29179 IX86_BUILTIN_VFMADDSUBPS512_MASK,
29180 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
29181 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
29182 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
29183 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
29184 IX86_BUILTIN_VFMSUBPD512_MASK3,
29185 IX86_BUILTIN_VFMSUBPS512_MASK3,
29186 IX86_BUILTIN_VFMSUBSD3_MASK3,
29187 IX86_BUILTIN_VFMSUBSS3_MASK3,
29188 IX86_BUILTIN_VFNMADDPD512_MASK,
29189 IX86_BUILTIN_VFNMADDPS512_MASK,
29190 IX86_BUILTIN_VFNMSUBPD512_MASK,
29191 IX86_BUILTIN_VFNMSUBPD512_MASK3,
29192 IX86_BUILTIN_VFNMSUBPS512_MASK,
29193 IX86_BUILTIN_VFNMSUBPS512_MASK3,
29194 IX86_BUILTIN_VPCLZCNTD512,
29195 IX86_BUILTIN_VPCLZCNTQ512,
29196 IX86_BUILTIN_VPCONFLICTD512,
29197 IX86_BUILTIN_VPCONFLICTQ512,
29198 IX86_BUILTIN_VPERMDF512,
29199 IX86_BUILTIN_VPERMDI512,
29200 IX86_BUILTIN_VPERMI2VARD512,
29201 IX86_BUILTIN_VPERMI2VARPD512,
29202 IX86_BUILTIN_VPERMI2VARPS512,
29203 IX86_BUILTIN_VPERMI2VARQ512,
29204 IX86_BUILTIN_VPERMILPD512,
29205 IX86_BUILTIN_VPERMILPS512,
29206 IX86_BUILTIN_VPERMILVARPD512,
29207 IX86_BUILTIN_VPERMILVARPS512,
29208 IX86_BUILTIN_VPERMT2VARD512,
29209 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
29210 IX86_BUILTIN_VPERMT2VARPD512,
29211 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
29212 IX86_BUILTIN_VPERMT2VARPS512,
29213 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
29214 IX86_BUILTIN_VPERMT2VARQ512,
29215 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
29216 IX86_BUILTIN_VPERMVARDF512,
29217 IX86_BUILTIN_VPERMVARDI512,
29218 IX86_BUILTIN_VPERMVARSF512,
29219 IX86_BUILTIN_VPERMVARSI512,
29220 IX86_BUILTIN_VTERNLOGD512_MASK,
29221 IX86_BUILTIN_VTERNLOGD512_MASKZ,
29222 IX86_BUILTIN_VTERNLOGQ512_MASK,
29223 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
29225 /* Mask arithmetic operations */
29226 IX86_BUILTIN_KAND16,
29227 IX86_BUILTIN_KANDN16,
29228 IX86_BUILTIN_KNOT16,
29229 IX86_BUILTIN_KOR16,
29230 IX86_BUILTIN_KORTESTC16,
29231 IX86_BUILTIN_KORTESTZ16,
29232 IX86_BUILTIN_KUNPCKBW,
29233 IX86_BUILTIN_KXNOR16,
29234 IX86_BUILTIN_KXOR16,
29235 IX86_BUILTIN_KMOV16,
29237 /* AVX512VL. */
29238 IX86_BUILTIN_PMOVUSQD256_MEM,
29239 IX86_BUILTIN_PMOVUSQD128_MEM,
29240 IX86_BUILTIN_PMOVSQD256_MEM,
29241 IX86_BUILTIN_PMOVSQD128_MEM,
29242 IX86_BUILTIN_PMOVQD256_MEM,
29243 IX86_BUILTIN_PMOVQD128_MEM,
29244 IX86_BUILTIN_PMOVUSQW256_MEM,
29245 IX86_BUILTIN_PMOVUSQW128_MEM,
29246 IX86_BUILTIN_PMOVSQW256_MEM,
29247 IX86_BUILTIN_PMOVSQW128_MEM,
29248 IX86_BUILTIN_PMOVQW256_MEM,
29249 IX86_BUILTIN_PMOVQW128_MEM,
29250 IX86_BUILTIN_PMOVUSQB256_MEM,
29251 IX86_BUILTIN_PMOVUSQB128_MEM,
29252 IX86_BUILTIN_PMOVSQB256_MEM,
29253 IX86_BUILTIN_PMOVSQB128_MEM,
29254 IX86_BUILTIN_PMOVQB256_MEM,
29255 IX86_BUILTIN_PMOVQB128_MEM,
29256 IX86_BUILTIN_PMOVUSDW256_MEM,
29257 IX86_BUILTIN_PMOVUSDW128_MEM,
29258 IX86_BUILTIN_PMOVSDW256_MEM,
29259 IX86_BUILTIN_PMOVSDW128_MEM,
29260 IX86_BUILTIN_PMOVDW256_MEM,
29261 IX86_BUILTIN_PMOVDW128_MEM,
29262 IX86_BUILTIN_PMOVUSDB256_MEM,
29263 IX86_BUILTIN_PMOVUSDB128_MEM,
29264 IX86_BUILTIN_PMOVSDB256_MEM,
29265 IX86_BUILTIN_PMOVSDB128_MEM,
29266 IX86_BUILTIN_PMOVDB256_MEM,
29267 IX86_BUILTIN_PMOVDB128_MEM,
29268 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
29269 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
29270 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
29271 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
29272 IX86_BUILTIN_MOVDQA64STORE256_MASK,
29273 IX86_BUILTIN_MOVDQA64STORE128_MASK,
29274 IX86_BUILTIN_MOVDQA32STORE256_MASK,
29275 IX86_BUILTIN_MOVDQA32STORE128_MASK,
29276 IX86_BUILTIN_LOADAPD256_MASK,
29277 IX86_BUILTIN_LOADAPD128_MASK,
29278 IX86_BUILTIN_LOADAPS256_MASK,
29279 IX86_BUILTIN_LOADAPS128_MASK,
29280 IX86_BUILTIN_STOREAPD256_MASK,
29281 IX86_BUILTIN_STOREAPD128_MASK,
29282 IX86_BUILTIN_STOREAPS256_MASK,
29283 IX86_BUILTIN_STOREAPS128_MASK,
29284 IX86_BUILTIN_LOADUPD256_MASK,
29285 IX86_BUILTIN_LOADUPD128_MASK,
29286 IX86_BUILTIN_LOADUPS256_MASK,
29287 IX86_BUILTIN_LOADUPS128_MASK,
29288 IX86_BUILTIN_STOREUPD256_MASK,
29289 IX86_BUILTIN_STOREUPD128_MASK,
29290 IX86_BUILTIN_STOREUPS256_MASK,
29291 IX86_BUILTIN_STOREUPS128_MASK,
29292 IX86_BUILTIN_LOADDQUDI256_MASK,
29293 IX86_BUILTIN_LOADDQUDI128_MASK,
29294 IX86_BUILTIN_LOADDQUSI256_MASK,
29295 IX86_BUILTIN_LOADDQUSI128_MASK,
29296 IX86_BUILTIN_LOADDQUHI256_MASK,
29297 IX86_BUILTIN_LOADDQUHI128_MASK,
29298 IX86_BUILTIN_LOADDQUQI256_MASK,
29299 IX86_BUILTIN_LOADDQUQI128_MASK,
29300 IX86_BUILTIN_STOREDQUDI256_MASK,
29301 IX86_BUILTIN_STOREDQUDI128_MASK,
29302 IX86_BUILTIN_STOREDQUSI256_MASK,
29303 IX86_BUILTIN_STOREDQUSI128_MASK,
29304 IX86_BUILTIN_STOREDQUHI256_MASK,
29305 IX86_BUILTIN_STOREDQUHI128_MASK,
29306 IX86_BUILTIN_STOREDQUQI256_MASK,
29307 IX86_BUILTIN_STOREDQUQI128_MASK,
29308 IX86_BUILTIN_COMPRESSPDSTORE256,
29309 IX86_BUILTIN_COMPRESSPDSTORE128,
29310 IX86_BUILTIN_COMPRESSPSSTORE256,
29311 IX86_BUILTIN_COMPRESSPSSTORE128,
29312 IX86_BUILTIN_PCOMPRESSQSTORE256,
29313 IX86_BUILTIN_PCOMPRESSQSTORE128,
29314 IX86_BUILTIN_PCOMPRESSDSTORE256,
29315 IX86_BUILTIN_PCOMPRESSDSTORE128,
29316 IX86_BUILTIN_EXPANDPDLOAD256,
29317 IX86_BUILTIN_EXPANDPDLOAD128,
29318 IX86_BUILTIN_EXPANDPSLOAD256,
29319 IX86_BUILTIN_EXPANDPSLOAD128,
29320 IX86_BUILTIN_PEXPANDQLOAD256,
29321 IX86_BUILTIN_PEXPANDQLOAD128,
29322 IX86_BUILTIN_PEXPANDDLOAD256,
29323 IX86_BUILTIN_PEXPANDDLOAD128,
29324 IX86_BUILTIN_EXPANDPDLOAD256Z,
29325 IX86_BUILTIN_EXPANDPDLOAD128Z,
29326 IX86_BUILTIN_EXPANDPSLOAD256Z,
29327 IX86_BUILTIN_EXPANDPSLOAD128Z,
29328 IX86_BUILTIN_PEXPANDQLOAD256Z,
29329 IX86_BUILTIN_PEXPANDQLOAD128Z,
29330 IX86_BUILTIN_PEXPANDDLOAD256Z,
29331 IX86_BUILTIN_PEXPANDDLOAD128Z,
29332 IX86_BUILTIN_PALIGNR256_MASK,
29333 IX86_BUILTIN_PALIGNR128_MASK,
29334 IX86_BUILTIN_MOVDQA64_256_MASK,
29335 IX86_BUILTIN_MOVDQA64_128_MASK,
29336 IX86_BUILTIN_MOVDQA32_256_MASK,
29337 IX86_BUILTIN_MOVDQA32_128_MASK,
29338 IX86_BUILTIN_MOVAPD256_MASK,
29339 IX86_BUILTIN_MOVAPD128_MASK,
29340 IX86_BUILTIN_MOVAPS256_MASK,
29341 IX86_BUILTIN_MOVAPS128_MASK,
29342 IX86_BUILTIN_MOVDQUHI256_MASK,
29343 IX86_BUILTIN_MOVDQUHI128_MASK,
29344 IX86_BUILTIN_MOVDQUQI256_MASK,
29345 IX86_BUILTIN_MOVDQUQI128_MASK,
29346 IX86_BUILTIN_MINPS128_MASK,
29347 IX86_BUILTIN_MAXPS128_MASK,
29348 IX86_BUILTIN_MINPD128_MASK,
29349 IX86_BUILTIN_MAXPD128_MASK,
29350 IX86_BUILTIN_MAXPD256_MASK,
29351 IX86_BUILTIN_MAXPS256_MASK,
29352 IX86_BUILTIN_MINPD256_MASK,
29353 IX86_BUILTIN_MINPS256_MASK,
29354 IX86_BUILTIN_MULPS128_MASK,
29355 IX86_BUILTIN_DIVPS128_MASK,
29356 IX86_BUILTIN_MULPD128_MASK,
29357 IX86_BUILTIN_DIVPD128_MASK,
29358 IX86_BUILTIN_DIVPD256_MASK,
29359 IX86_BUILTIN_DIVPS256_MASK,
29360 IX86_BUILTIN_MULPD256_MASK,
29361 IX86_BUILTIN_MULPS256_MASK,
29362 IX86_BUILTIN_ADDPD128_MASK,
29363 IX86_BUILTIN_ADDPD256_MASK,
29364 IX86_BUILTIN_ADDPS128_MASK,
29365 IX86_BUILTIN_ADDPS256_MASK,
29366 IX86_BUILTIN_SUBPD128_MASK,
29367 IX86_BUILTIN_SUBPD256_MASK,
29368 IX86_BUILTIN_SUBPS128_MASK,
29369 IX86_BUILTIN_SUBPS256_MASK,
29370 IX86_BUILTIN_XORPD256_MASK,
29371 IX86_BUILTIN_XORPD128_MASK,
29372 IX86_BUILTIN_XORPS256_MASK,
29373 IX86_BUILTIN_XORPS128_MASK,
29374 IX86_BUILTIN_ORPD256_MASK,
29375 IX86_BUILTIN_ORPD128_MASK,
29376 IX86_BUILTIN_ORPS256_MASK,
29377 IX86_BUILTIN_ORPS128_MASK,
29378 IX86_BUILTIN_BROADCASTF32x2_256,
29379 IX86_BUILTIN_BROADCASTI32x2_256,
29380 IX86_BUILTIN_BROADCASTI32x2_128,
29381 IX86_BUILTIN_BROADCASTF64X2_256,
29382 IX86_BUILTIN_BROADCASTI64X2_256,
29383 IX86_BUILTIN_BROADCASTF32X4_256,
29384 IX86_BUILTIN_BROADCASTI32X4_256,
29385 IX86_BUILTIN_EXTRACTF32X4_256,
29386 IX86_BUILTIN_EXTRACTI32X4_256,
29387 IX86_BUILTIN_DBPSADBW256,
29388 IX86_BUILTIN_DBPSADBW128,
29389 IX86_BUILTIN_CVTTPD2QQ256,
29390 IX86_BUILTIN_CVTTPD2QQ128,
29391 IX86_BUILTIN_CVTTPD2UQQ256,
29392 IX86_BUILTIN_CVTTPD2UQQ128,
29393 IX86_BUILTIN_CVTPD2QQ256,
29394 IX86_BUILTIN_CVTPD2QQ128,
29395 IX86_BUILTIN_CVTPD2UQQ256,
29396 IX86_BUILTIN_CVTPD2UQQ128,
29397 IX86_BUILTIN_CVTPD2UDQ256_MASK,
29398 IX86_BUILTIN_CVTPD2UDQ128_MASK,
29399 IX86_BUILTIN_CVTTPS2QQ256,
29400 IX86_BUILTIN_CVTTPS2QQ128,
29401 IX86_BUILTIN_CVTTPS2UQQ256,
29402 IX86_BUILTIN_CVTTPS2UQQ128,
29403 IX86_BUILTIN_CVTTPS2DQ256_MASK,
29404 IX86_BUILTIN_CVTTPS2DQ128_MASK,
29405 IX86_BUILTIN_CVTTPS2UDQ256,
29406 IX86_BUILTIN_CVTTPS2UDQ128,
29407 IX86_BUILTIN_CVTTPD2DQ256_MASK,
29408 IX86_BUILTIN_CVTTPD2DQ128_MASK,
29409 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
29410 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
29411 IX86_BUILTIN_CVTPD2DQ256_MASK,
29412 IX86_BUILTIN_CVTPD2DQ128_MASK,
29413 IX86_BUILTIN_CVTDQ2PD256_MASK,
29414 IX86_BUILTIN_CVTDQ2PD128_MASK,
29415 IX86_BUILTIN_CVTUDQ2PD256_MASK,
29416 IX86_BUILTIN_CVTUDQ2PD128_MASK,
29417 IX86_BUILTIN_CVTDQ2PS256_MASK,
29418 IX86_BUILTIN_CVTDQ2PS128_MASK,
29419 IX86_BUILTIN_CVTUDQ2PS256_MASK,
29420 IX86_BUILTIN_CVTUDQ2PS128_MASK,
29421 IX86_BUILTIN_CVTPS2PD256_MASK,
29422 IX86_BUILTIN_CVTPS2PD128_MASK,
29423 IX86_BUILTIN_PBROADCASTB256_MASK,
29424 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
29425 IX86_BUILTIN_PBROADCASTB128_MASK,
29426 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
29427 IX86_BUILTIN_PBROADCASTW256_MASK,
29428 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
29429 IX86_BUILTIN_PBROADCASTW128_MASK,
29430 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
29431 IX86_BUILTIN_PBROADCASTD256_MASK,
29432 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
29433 IX86_BUILTIN_PBROADCASTD128_MASK,
29434 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
29435 IX86_BUILTIN_PBROADCASTQ256_MASK,
29436 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
29437 IX86_BUILTIN_PBROADCASTQ128_MASK,
29438 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
29439 IX86_BUILTIN_BROADCASTSS256,
29440 IX86_BUILTIN_BROADCASTSS128,
29441 IX86_BUILTIN_BROADCASTSD256,
29442 IX86_BUILTIN_EXTRACTF64X2_256,
29443 IX86_BUILTIN_EXTRACTI64X2_256,
29444 IX86_BUILTIN_INSERTF32X4_256,
29445 IX86_BUILTIN_INSERTI32X4_256,
29446 IX86_BUILTIN_PMOVSXBW256_MASK,
29447 IX86_BUILTIN_PMOVSXBW128_MASK,
29448 IX86_BUILTIN_PMOVSXBD256_MASK,
29449 IX86_BUILTIN_PMOVSXBD128_MASK,
29450 IX86_BUILTIN_PMOVSXBQ256_MASK,
29451 IX86_BUILTIN_PMOVSXBQ128_MASK,
29452 IX86_BUILTIN_PMOVSXWD256_MASK,
29453 IX86_BUILTIN_PMOVSXWD128_MASK,
29454 IX86_BUILTIN_PMOVSXWQ256_MASK,
29455 IX86_BUILTIN_PMOVSXWQ128_MASK,
29456 IX86_BUILTIN_PMOVSXDQ256_MASK,
29457 IX86_BUILTIN_PMOVSXDQ128_MASK,
29458 IX86_BUILTIN_PMOVZXBW256_MASK,
29459 IX86_BUILTIN_PMOVZXBW128_MASK,
29460 IX86_BUILTIN_PMOVZXBD256_MASK,
29461 IX86_BUILTIN_PMOVZXBD128_MASK,
29462 IX86_BUILTIN_PMOVZXBQ256_MASK,
29463 IX86_BUILTIN_PMOVZXBQ128_MASK,
29464 IX86_BUILTIN_PMOVZXWD256_MASK,
29465 IX86_BUILTIN_PMOVZXWD128_MASK,
29466 IX86_BUILTIN_PMOVZXWQ256_MASK,
29467 IX86_BUILTIN_PMOVZXWQ128_MASK,
29468 IX86_BUILTIN_PMOVZXDQ256_MASK,
29469 IX86_BUILTIN_PMOVZXDQ128_MASK,
29470 IX86_BUILTIN_REDUCEPD256_MASK,
29471 IX86_BUILTIN_REDUCEPD128_MASK,
29472 IX86_BUILTIN_REDUCEPS256_MASK,
29473 IX86_BUILTIN_REDUCEPS128_MASK,
29474 IX86_BUILTIN_REDUCESD_MASK,
29475 IX86_BUILTIN_REDUCESS_MASK,
29476 IX86_BUILTIN_VPERMVARHI256_MASK,
29477 IX86_BUILTIN_VPERMVARHI128_MASK,
29478 IX86_BUILTIN_VPERMT2VARHI256,
29479 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
29480 IX86_BUILTIN_VPERMT2VARHI128,
29481 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
29482 IX86_BUILTIN_VPERMI2VARHI256,
29483 IX86_BUILTIN_VPERMI2VARHI128,
29484 IX86_BUILTIN_RCP14PD256,
29485 IX86_BUILTIN_RCP14PD128,
29486 IX86_BUILTIN_RCP14PS256,
29487 IX86_BUILTIN_RCP14PS128,
29488 IX86_BUILTIN_RSQRT14PD256_MASK,
29489 IX86_BUILTIN_RSQRT14PD128_MASK,
29490 IX86_BUILTIN_RSQRT14PS256_MASK,
29491 IX86_BUILTIN_RSQRT14PS128_MASK,
29492 IX86_BUILTIN_SQRTPD256_MASK,
29493 IX86_BUILTIN_SQRTPD128_MASK,
29494 IX86_BUILTIN_SQRTPS256_MASK,
29495 IX86_BUILTIN_SQRTPS128_MASK,
29496 IX86_BUILTIN_PADDB128_MASK,
29497 IX86_BUILTIN_PADDW128_MASK,
29498 IX86_BUILTIN_PADDD128_MASK,
29499 IX86_BUILTIN_PADDQ128_MASK,
29500 IX86_BUILTIN_PSUBB128_MASK,
29501 IX86_BUILTIN_PSUBW128_MASK,
29502 IX86_BUILTIN_PSUBD128_MASK,
29503 IX86_BUILTIN_PSUBQ128_MASK,
29504 IX86_BUILTIN_PADDSB128_MASK,
29505 IX86_BUILTIN_PADDSW128_MASK,
29506 IX86_BUILTIN_PSUBSB128_MASK,
29507 IX86_BUILTIN_PSUBSW128_MASK,
29508 IX86_BUILTIN_PADDUSB128_MASK,
29509 IX86_BUILTIN_PADDUSW128_MASK,
29510 IX86_BUILTIN_PSUBUSB128_MASK,
29511 IX86_BUILTIN_PSUBUSW128_MASK,
29512 IX86_BUILTIN_PADDB256_MASK,
29513 IX86_BUILTIN_PADDW256_MASK,
29514 IX86_BUILTIN_PADDD256_MASK,
29515 IX86_BUILTIN_PADDQ256_MASK,
29516 IX86_BUILTIN_PADDSB256_MASK,
29517 IX86_BUILTIN_PADDSW256_MASK,
29518 IX86_BUILTIN_PADDUSB256_MASK,
29519 IX86_BUILTIN_PADDUSW256_MASK,
29520 IX86_BUILTIN_PSUBB256_MASK,
29521 IX86_BUILTIN_PSUBW256_MASK,
29522 IX86_BUILTIN_PSUBD256_MASK,
29523 IX86_BUILTIN_PSUBQ256_MASK,
29524 IX86_BUILTIN_PSUBSB256_MASK,
29525 IX86_BUILTIN_PSUBSW256_MASK,
29526 IX86_BUILTIN_PSUBUSB256_MASK,
29527 IX86_BUILTIN_PSUBUSW256_MASK,
29528 IX86_BUILTIN_SHUF_F64x2_256,
29529 IX86_BUILTIN_SHUF_I64x2_256,
29530 IX86_BUILTIN_SHUF_I32x4_256,
29531 IX86_BUILTIN_SHUF_F32x4_256,
29532 IX86_BUILTIN_PMOVWB128,
29533 IX86_BUILTIN_PMOVWB256,
29534 IX86_BUILTIN_PMOVSWB128,
29535 IX86_BUILTIN_PMOVSWB256,
29536 IX86_BUILTIN_PMOVUSWB128,
29537 IX86_BUILTIN_PMOVUSWB256,
29538 IX86_BUILTIN_PMOVDB128,
29539 IX86_BUILTIN_PMOVDB256,
29540 IX86_BUILTIN_PMOVSDB128,
29541 IX86_BUILTIN_PMOVSDB256,
29542 IX86_BUILTIN_PMOVUSDB128,
29543 IX86_BUILTIN_PMOVUSDB256,
29544 IX86_BUILTIN_PMOVDW128,
29545 IX86_BUILTIN_PMOVDW256,
29546 IX86_BUILTIN_PMOVSDW128,
29547 IX86_BUILTIN_PMOVSDW256,
29548 IX86_BUILTIN_PMOVUSDW128,
29549 IX86_BUILTIN_PMOVUSDW256,
29550 IX86_BUILTIN_PMOVQB128,
29551 IX86_BUILTIN_PMOVQB256,
29552 IX86_BUILTIN_PMOVSQB128,
29553 IX86_BUILTIN_PMOVSQB256,
29554 IX86_BUILTIN_PMOVUSQB128,
29555 IX86_BUILTIN_PMOVUSQB256,
29556 IX86_BUILTIN_PMOVQW128,
29557 IX86_BUILTIN_PMOVQW256,
29558 IX86_BUILTIN_PMOVSQW128,
29559 IX86_BUILTIN_PMOVSQW256,
29560 IX86_BUILTIN_PMOVUSQW128,
29561 IX86_BUILTIN_PMOVUSQW256,
29562 IX86_BUILTIN_PMOVQD128,
29563 IX86_BUILTIN_PMOVQD256,
29564 IX86_BUILTIN_PMOVSQD128,
29565 IX86_BUILTIN_PMOVSQD256,
29566 IX86_BUILTIN_PMOVUSQD128,
29567 IX86_BUILTIN_PMOVUSQD256,
29568 IX86_BUILTIN_RANGEPD256,
29569 IX86_BUILTIN_RANGEPD128,
29570 IX86_BUILTIN_RANGEPS256,
29571 IX86_BUILTIN_RANGEPS128,
29572 IX86_BUILTIN_GETEXPPS256,
29573 IX86_BUILTIN_GETEXPPD256,
29574 IX86_BUILTIN_GETEXPPS128,
29575 IX86_BUILTIN_GETEXPPD128,
29576 IX86_BUILTIN_FIXUPIMMPD256_MASK,
29577 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
29578 IX86_BUILTIN_FIXUPIMMPS256_MASK,
29579 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
29580 IX86_BUILTIN_FIXUPIMMPD128_MASK,
29581 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
29582 IX86_BUILTIN_FIXUPIMMPS128_MASK,
29583 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
29584 IX86_BUILTIN_PABSQ256,
29585 IX86_BUILTIN_PABSQ128,
29586 IX86_BUILTIN_PABSD256_MASK,
29587 IX86_BUILTIN_PABSD128_MASK,
29588 IX86_BUILTIN_PMULHRSW256_MASK,
29589 IX86_BUILTIN_PMULHRSW128_MASK,
29590 IX86_BUILTIN_PMULHUW128_MASK,
29591 IX86_BUILTIN_PMULHUW256_MASK,
29592 IX86_BUILTIN_PMULHW256_MASK,
29593 IX86_BUILTIN_PMULHW128_MASK,
29594 IX86_BUILTIN_PMULLW256_MASK,
29595 IX86_BUILTIN_PMULLW128_MASK,
29596 IX86_BUILTIN_PMULLQ256,
29597 IX86_BUILTIN_PMULLQ128,
29598 IX86_BUILTIN_ANDPD256_MASK,
29599 IX86_BUILTIN_ANDPD128_MASK,
29600 IX86_BUILTIN_ANDPS256_MASK,
29601 IX86_BUILTIN_ANDPS128_MASK,
29602 IX86_BUILTIN_ANDNPD256_MASK,
29603 IX86_BUILTIN_ANDNPD128_MASK,
29604 IX86_BUILTIN_ANDNPS256_MASK,
29605 IX86_BUILTIN_ANDNPS128_MASK,
29606 IX86_BUILTIN_PSLLWI128_MASK,
29607 IX86_BUILTIN_PSLLDI128_MASK,
29608 IX86_BUILTIN_PSLLQI128_MASK,
29609 IX86_BUILTIN_PSLLW128_MASK,
29610 IX86_BUILTIN_PSLLD128_MASK,
29611 IX86_BUILTIN_PSLLQ128_MASK,
29612 IX86_BUILTIN_PSLLWI256_MASK ,
29613 IX86_BUILTIN_PSLLW256_MASK,
29614 IX86_BUILTIN_PSLLDI256_MASK,
29615 IX86_BUILTIN_PSLLD256_MASK,
29616 IX86_BUILTIN_PSLLQI256_MASK,
29617 IX86_BUILTIN_PSLLQ256_MASK,
29618 IX86_BUILTIN_PSRADI128_MASK,
29619 IX86_BUILTIN_PSRAD128_MASK,
29620 IX86_BUILTIN_PSRADI256_MASK,
29621 IX86_BUILTIN_PSRAD256_MASK,
29622 IX86_BUILTIN_PSRAQI128_MASK,
29623 IX86_BUILTIN_PSRAQ128_MASK,
29624 IX86_BUILTIN_PSRAQI256_MASK,
29625 IX86_BUILTIN_PSRAQ256_MASK,
29626 IX86_BUILTIN_PANDD256,
29627 IX86_BUILTIN_PANDD128,
29628 IX86_BUILTIN_PSRLDI128_MASK,
29629 IX86_BUILTIN_PSRLD128_MASK,
29630 IX86_BUILTIN_PSRLDI256_MASK,
29631 IX86_BUILTIN_PSRLD256_MASK,
29632 IX86_BUILTIN_PSRLQI128_MASK,
29633 IX86_BUILTIN_PSRLQ128_MASK,
29634 IX86_BUILTIN_PSRLQI256_MASK,
29635 IX86_BUILTIN_PSRLQ256_MASK,
29636 IX86_BUILTIN_PANDQ256,
29637 IX86_BUILTIN_PANDQ128,
29638 IX86_BUILTIN_PANDND256,
29639 IX86_BUILTIN_PANDND128,
29640 IX86_BUILTIN_PANDNQ256,
29641 IX86_BUILTIN_PANDNQ128,
29642 IX86_BUILTIN_PORD256,
29643 IX86_BUILTIN_PORD128,
29644 IX86_BUILTIN_PORQ256,
29645 IX86_BUILTIN_PORQ128,
29646 IX86_BUILTIN_PXORD256,
29647 IX86_BUILTIN_PXORD128,
29648 IX86_BUILTIN_PXORQ256,
29649 IX86_BUILTIN_PXORQ128,
29650 IX86_BUILTIN_PACKSSWB256_MASK,
29651 IX86_BUILTIN_PACKSSWB128_MASK,
29652 IX86_BUILTIN_PACKUSWB256_MASK,
29653 IX86_BUILTIN_PACKUSWB128_MASK,
29654 IX86_BUILTIN_RNDSCALEPS256,
29655 IX86_BUILTIN_RNDSCALEPD256,
29656 IX86_BUILTIN_RNDSCALEPS128,
29657 IX86_BUILTIN_RNDSCALEPD128,
29658 IX86_BUILTIN_VTERNLOGQ256_MASK,
29659 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29660 IX86_BUILTIN_VTERNLOGD256_MASK,
29661 IX86_BUILTIN_VTERNLOGD256_MASKZ,
29662 IX86_BUILTIN_VTERNLOGQ128_MASK,
29663 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29664 IX86_BUILTIN_VTERNLOGD128_MASK,
29665 IX86_BUILTIN_VTERNLOGD128_MASKZ,
29666 IX86_BUILTIN_SCALEFPD256,
29667 IX86_BUILTIN_SCALEFPS256,
29668 IX86_BUILTIN_SCALEFPD128,
29669 IX86_BUILTIN_SCALEFPS128,
29670 IX86_BUILTIN_VFMADDPD256_MASK,
29671 IX86_BUILTIN_VFMADDPD256_MASK3,
29672 IX86_BUILTIN_VFMADDPD256_MASKZ,
29673 IX86_BUILTIN_VFMADDPD128_MASK,
29674 IX86_BUILTIN_VFMADDPD128_MASK3,
29675 IX86_BUILTIN_VFMADDPD128_MASKZ,
29676 IX86_BUILTIN_VFMADDPS256_MASK,
29677 IX86_BUILTIN_VFMADDPS256_MASK3,
29678 IX86_BUILTIN_VFMADDPS256_MASKZ,
29679 IX86_BUILTIN_VFMADDPS128_MASK,
29680 IX86_BUILTIN_VFMADDPS128_MASK3,
29681 IX86_BUILTIN_VFMADDPS128_MASKZ,
29682 IX86_BUILTIN_VFMSUBPD256_MASK3,
29683 IX86_BUILTIN_VFMSUBPD128_MASK3,
29684 IX86_BUILTIN_VFMSUBPS256_MASK3,
29685 IX86_BUILTIN_VFMSUBPS128_MASK3,
29686 IX86_BUILTIN_VFNMADDPD256_MASK,
29687 IX86_BUILTIN_VFNMADDPD128_MASK,
29688 IX86_BUILTIN_VFNMADDPS256_MASK,
29689 IX86_BUILTIN_VFNMADDPS128_MASK,
29690 IX86_BUILTIN_VFNMSUBPD256_MASK,
29691 IX86_BUILTIN_VFNMSUBPD256_MASK3,
29692 IX86_BUILTIN_VFNMSUBPD128_MASK,
29693 IX86_BUILTIN_VFNMSUBPD128_MASK3,
29694 IX86_BUILTIN_VFNMSUBPS256_MASK,
29695 IX86_BUILTIN_VFNMSUBPS256_MASK3,
29696 IX86_BUILTIN_VFNMSUBPS128_MASK,
29697 IX86_BUILTIN_VFNMSUBPS128_MASK3,
29698 IX86_BUILTIN_VFMADDSUBPD256_MASK,
29699 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29700 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29701 IX86_BUILTIN_VFMADDSUBPD128_MASK,
29702 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29703 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29704 IX86_BUILTIN_VFMADDSUBPS256_MASK,
29705 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29706 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29707 IX86_BUILTIN_VFMADDSUBPS128_MASK,
29708 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29709 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29710 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29711 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29712 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29713 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29714 IX86_BUILTIN_INSERTF64X2_256,
29715 IX86_BUILTIN_INSERTI64X2_256,
29716 IX86_BUILTIN_PSRAVV16HI,
29717 IX86_BUILTIN_PSRAVV8HI,
29718 IX86_BUILTIN_PMADDUBSW256_MASK,
29719 IX86_BUILTIN_PMADDUBSW128_MASK,
29720 IX86_BUILTIN_PMADDWD256_MASK,
29721 IX86_BUILTIN_PMADDWD128_MASK,
29722 IX86_BUILTIN_PSRLVV16HI,
29723 IX86_BUILTIN_PSRLVV8HI,
29724 IX86_BUILTIN_CVTPS2DQ256_MASK,
29725 IX86_BUILTIN_CVTPS2DQ128_MASK,
29726 IX86_BUILTIN_CVTPS2UDQ256,
29727 IX86_BUILTIN_CVTPS2UDQ128,
29728 IX86_BUILTIN_CVTPS2QQ256,
29729 IX86_BUILTIN_CVTPS2QQ128,
29730 IX86_BUILTIN_CVTPS2UQQ256,
29731 IX86_BUILTIN_CVTPS2UQQ128,
29732 IX86_BUILTIN_GETMANTPS256,
29733 IX86_BUILTIN_GETMANTPS128,
29734 IX86_BUILTIN_GETMANTPD256,
29735 IX86_BUILTIN_GETMANTPD128,
29736 IX86_BUILTIN_MOVDDUP256_MASK,
29737 IX86_BUILTIN_MOVDDUP128_MASK,
29738 IX86_BUILTIN_MOVSHDUP256_MASK,
29739 IX86_BUILTIN_MOVSHDUP128_MASK,
29740 IX86_BUILTIN_MOVSLDUP256_MASK,
29741 IX86_BUILTIN_MOVSLDUP128_MASK,
29742 IX86_BUILTIN_CVTQQ2PS256,
29743 IX86_BUILTIN_CVTQQ2PS128,
29744 IX86_BUILTIN_CVTUQQ2PS256,
29745 IX86_BUILTIN_CVTUQQ2PS128,
29746 IX86_BUILTIN_CVTQQ2PD256,
29747 IX86_BUILTIN_CVTQQ2PD128,
29748 IX86_BUILTIN_CVTUQQ2PD256,
29749 IX86_BUILTIN_CVTUQQ2PD128,
29750 IX86_BUILTIN_VPERMT2VARQ256,
29751 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29752 IX86_BUILTIN_VPERMT2VARD256,
29753 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29754 IX86_BUILTIN_VPERMI2VARQ256,
29755 IX86_BUILTIN_VPERMI2VARD256,
29756 IX86_BUILTIN_VPERMT2VARPD256,
29757 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29758 IX86_BUILTIN_VPERMT2VARPS256,
29759 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29760 IX86_BUILTIN_VPERMI2VARPD256,
29761 IX86_BUILTIN_VPERMI2VARPS256,
29762 IX86_BUILTIN_VPERMT2VARQ128,
29763 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29764 IX86_BUILTIN_VPERMT2VARD128,
29765 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29766 IX86_BUILTIN_VPERMI2VARQ128,
29767 IX86_BUILTIN_VPERMI2VARD128,
29768 IX86_BUILTIN_VPERMT2VARPD128,
29769 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29770 IX86_BUILTIN_VPERMT2VARPS128,
29771 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29772 IX86_BUILTIN_VPERMI2VARPD128,
29773 IX86_BUILTIN_VPERMI2VARPS128,
29774 IX86_BUILTIN_PSHUFB256_MASK,
29775 IX86_BUILTIN_PSHUFB128_MASK,
29776 IX86_BUILTIN_PSHUFHW256_MASK,
29777 IX86_BUILTIN_PSHUFHW128_MASK,
29778 IX86_BUILTIN_PSHUFLW256_MASK,
29779 IX86_BUILTIN_PSHUFLW128_MASK,
29780 IX86_BUILTIN_PSHUFD256_MASK,
29781 IX86_BUILTIN_PSHUFD128_MASK,
29782 IX86_BUILTIN_SHUFPD256_MASK,
29783 IX86_BUILTIN_SHUFPD128_MASK,
29784 IX86_BUILTIN_SHUFPS256_MASK,
29785 IX86_BUILTIN_SHUFPS128_MASK,
29786 IX86_BUILTIN_PROLVQ256,
29787 IX86_BUILTIN_PROLVQ128,
29788 IX86_BUILTIN_PROLQ256,
29789 IX86_BUILTIN_PROLQ128,
29790 IX86_BUILTIN_PRORVQ256,
29791 IX86_BUILTIN_PRORVQ128,
29792 IX86_BUILTIN_PRORQ256,
29793 IX86_BUILTIN_PRORQ128,
29794 IX86_BUILTIN_PSRAVQ128,
29795 IX86_BUILTIN_PSRAVQ256,
29796 IX86_BUILTIN_PSLLVV4DI_MASK,
29797 IX86_BUILTIN_PSLLVV2DI_MASK,
29798 IX86_BUILTIN_PSLLVV8SI_MASK,
29799 IX86_BUILTIN_PSLLVV4SI_MASK,
29800 IX86_BUILTIN_PSRAVV8SI_MASK,
29801 IX86_BUILTIN_PSRAVV4SI_MASK,
29802 IX86_BUILTIN_PSRLVV4DI_MASK,
29803 IX86_BUILTIN_PSRLVV2DI_MASK,
29804 IX86_BUILTIN_PSRLVV8SI_MASK,
29805 IX86_BUILTIN_PSRLVV4SI_MASK,
29806 IX86_BUILTIN_PSRAWI256_MASK,
29807 IX86_BUILTIN_PSRAW256_MASK,
29808 IX86_BUILTIN_PSRAWI128_MASK,
29809 IX86_BUILTIN_PSRAW128_MASK,
29810 IX86_BUILTIN_PSRLWI256_MASK,
29811 IX86_BUILTIN_PSRLW256_MASK,
29812 IX86_BUILTIN_PSRLWI128_MASK,
29813 IX86_BUILTIN_PSRLW128_MASK,
29814 IX86_BUILTIN_PRORVD256,
29815 IX86_BUILTIN_PROLVD256,
29816 IX86_BUILTIN_PRORD256,
29817 IX86_BUILTIN_PROLD256,
29818 IX86_BUILTIN_PRORVD128,
29819 IX86_BUILTIN_PROLVD128,
29820 IX86_BUILTIN_PRORD128,
29821 IX86_BUILTIN_PROLD128,
29822 IX86_BUILTIN_FPCLASSPD256,
29823 IX86_BUILTIN_FPCLASSPD128,
29824 IX86_BUILTIN_FPCLASSSD,
29825 IX86_BUILTIN_FPCLASSPS256,
29826 IX86_BUILTIN_FPCLASSPS128,
29827 IX86_BUILTIN_FPCLASSSS,
29828 IX86_BUILTIN_CVTB2MASK128,
29829 IX86_BUILTIN_CVTB2MASK256,
29830 IX86_BUILTIN_CVTW2MASK128,
29831 IX86_BUILTIN_CVTW2MASK256,
29832 IX86_BUILTIN_CVTD2MASK128,
29833 IX86_BUILTIN_CVTD2MASK256,
29834 IX86_BUILTIN_CVTQ2MASK128,
29835 IX86_BUILTIN_CVTQ2MASK256,
29836 IX86_BUILTIN_CVTMASK2B128,
29837 IX86_BUILTIN_CVTMASK2B256,
29838 IX86_BUILTIN_CVTMASK2W128,
29839 IX86_BUILTIN_CVTMASK2W256,
29840 IX86_BUILTIN_CVTMASK2D128,
29841 IX86_BUILTIN_CVTMASK2D256,
29842 IX86_BUILTIN_CVTMASK2Q128,
29843 IX86_BUILTIN_CVTMASK2Q256,
29844 IX86_BUILTIN_PCMPEQB128_MASK,
29845 IX86_BUILTIN_PCMPEQB256_MASK,
29846 IX86_BUILTIN_PCMPEQW128_MASK,
29847 IX86_BUILTIN_PCMPEQW256_MASK,
29848 IX86_BUILTIN_PCMPEQD128_MASK,
29849 IX86_BUILTIN_PCMPEQD256_MASK,
29850 IX86_BUILTIN_PCMPEQQ128_MASK,
29851 IX86_BUILTIN_PCMPEQQ256_MASK,
29852 IX86_BUILTIN_PCMPGTB128_MASK,
29853 IX86_BUILTIN_PCMPGTB256_MASK,
29854 IX86_BUILTIN_PCMPGTW128_MASK,
29855 IX86_BUILTIN_PCMPGTW256_MASK,
29856 IX86_BUILTIN_PCMPGTD128_MASK,
29857 IX86_BUILTIN_PCMPGTD256_MASK,
29858 IX86_BUILTIN_PCMPGTQ128_MASK,
29859 IX86_BUILTIN_PCMPGTQ256_MASK,
29860 IX86_BUILTIN_PTESTMB128,
29861 IX86_BUILTIN_PTESTMB256,
29862 IX86_BUILTIN_PTESTMW128,
29863 IX86_BUILTIN_PTESTMW256,
29864 IX86_BUILTIN_PTESTMD128,
29865 IX86_BUILTIN_PTESTMD256,
29866 IX86_BUILTIN_PTESTMQ128,
29867 IX86_BUILTIN_PTESTMQ256,
29868 IX86_BUILTIN_PTESTNMB128,
29869 IX86_BUILTIN_PTESTNMB256,
29870 IX86_BUILTIN_PTESTNMW128,
29871 IX86_BUILTIN_PTESTNMW256,
29872 IX86_BUILTIN_PTESTNMD128,
29873 IX86_BUILTIN_PTESTNMD256,
29874 IX86_BUILTIN_PTESTNMQ128,
29875 IX86_BUILTIN_PTESTNMQ256,
29876 IX86_BUILTIN_PBROADCASTMB128,
29877 IX86_BUILTIN_PBROADCASTMB256,
29878 IX86_BUILTIN_PBROADCASTMW128,
29879 IX86_BUILTIN_PBROADCASTMW256,
29880 IX86_BUILTIN_COMPRESSPD256,
29881 IX86_BUILTIN_COMPRESSPD128,
29882 IX86_BUILTIN_COMPRESSPS256,
29883 IX86_BUILTIN_COMPRESSPS128,
29884 IX86_BUILTIN_PCOMPRESSQ256,
29885 IX86_BUILTIN_PCOMPRESSQ128,
29886 IX86_BUILTIN_PCOMPRESSD256,
29887 IX86_BUILTIN_PCOMPRESSD128,
29888 IX86_BUILTIN_EXPANDPD256,
29889 IX86_BUILTIN_EXPANDPD128,
29890 IX86_BUILTIN_EXPANDPS256,
29891 IX86_BUILTIN_EXPANDPS128,
29892 IX86_BUILTIN_PEXPANDQ256,
29893 IX86_BUILTIN_PEXPANDQ128,
29894 IX86_BUILTIN_PEXPANDD256,
29895 IX86_BUILTIN_PEXPANDD128,
29896 IX86_BUILTIN_EXPANDPD256Z,
29897 IX86_BUILTIN_EXPANDPD128Z,
29898 IX86_BUILTIN_EXPANDPS256Z,
29899 IX86_BUILTIN_EXPANDPS128Z,
29900 IX86_BUILTIN_PEXPANDQ256Z,
29901 IX86_BUILTIN_PEXPANDQ128Z,
29902 IX86_BUILTIN_PEXPANDD256Z,
29903 IX86_BUILTIN_PEXPANDD128Z,
29904 IX86_BUILTIN_PMAXSD256_MASK,
29905 IX86_BUILTIN_PMINSD256_MASK,
29906 IX86_BUILTIN_PMAXUD256_MASK,
29907 IX86_BUILTIN_PMINUD256_MASK,
29908 IX86_BUILTIN_PMAXSD128_MASK,
29909 IX86_BUILTIN_PMINSD128_MASK,
29910 IX86_BUILTIN_PMAXUD128_MASK,
29911 IX86_BUILTIN_PMINUD128_MASK,
29912 IX86_BUILTIN_PMAXSQ256_MASK,
29913 IX86_BUILTIN_PMINSQ256_MASK,
29914 IX86_BUILTIN_PMAXUQ256_MASK,
29915 IX86_BUILTIN_PMINUQ256_MASK,
29916 IX86_BUILTIN_PMAXSQ128_MASK,
29917 IX86_BUILTIN_PMINSQ128_MASK,
29918 IX86_BUILTIN_PMAXUQ128_MASK,
29919 IX86_BUILTIN_PMINUQ128_MASK,
29920 IX86_BUILTIN_PMINSB256_MASK,
29921 IX86_BUILTIN_PMINUB256_MASK,
29922 IX86_BUILTIN_PMAXSB256_MASK,
29923 IX86_BUILTIN_PMAXUB256_MASK,
29924 IX86_BUILTIN_PMINSB128_MASK,
29925 IX86_BUILTIN_PMINUB128_MASK,
29926 IX86_BUILTIN_PMAXSB128_MASK,
29927 IX86_BUILTIN_PMAXUB128_MASK,
29928 IX86_BUILTIN_PMINSW256_MASK,
29929 IX86_BUILTIN_PMINUW256_MASK,
29930 IX86_BUILTIN_PMAXSW256_MASK,
29931 IX86_BUILTIN_PMAXUW256_MASK,
29932 IX86_BUILTIN_PMINSW128_MASK,
29933 IX86_BUILTIN_PMINUW128_MASK,
29934 IX86_BUILTIN_PMAXSW128_MASK,
29935 IX86_BUILTIN_PMAXUW128_MASK,
29936 IX86_BUILTIN_VPCONFLICTQ256,
29937 IX86_BUILTIN_VPCONFLICTD256,
29938 IX86_BUILTIN_VPCLZCNTQ256,
29939 IX86_BUILTIN_VPCLZCNTD256,
29940 IX86_BUILTIN_UNPCKHPD256_MASK,
29941 IX86_BUILTIN_UNPCKHPD128_MASK,
29942 IX86_BUILTIN_UNPCKHPS256_MASK,
29943 IX86_BUILTIN_UNPCKHPS128_MASK,
29944 IX86_BUILTIN_UNPCKLPD256_MASK,
29945 IX86_BUILTIN_UNPCKLPD128_MASK,
29946 IX86_BUILTIN_UNPCKLPS256_MASK,
29947 IX86_BUILTIN_VPCONFLICTQ128,
29948 IX86_BUILTIN_VPCONFLICTD128,
29949 IX86_BUILTIN_VPCLZCNTQ128,
29950 IX86_BUILTIN_VPCLZCNTD128,
29951 IX86_BUILTIN_UNPCKLPS128_MASK,
29952 IX86_BUILTIN_ALIGND256,
29953 IX86_BUILTIN_ALIGNQ256,
29954 IX86_BUILTIN_ALIGND128,
29955 IX86_BUILTIN_ALIGNQ128,
29956 IX86_BUILTIN_CVTPS2PH256_MASK,
29957 IX86_BUILTIN_CVTPS2PH_MASK,
29958 IX86_BUILTIN_CVTPH2PS_MASK,
29959 IX86_BUILTIN_CVTPH2PS256_MASK,
29960 IX86_BUILTIN_PUNPCKHDQ128_MASK,
29961 IX86_BUILTIN_PUNPCKHDQ256_MASK,
29962 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
29963 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
29964 IX86_BUILTIN_PUNPCKLDQ128_MASK,
29965 IX86_BUILTIN_PUNPCKLDQ256_MASK,
29966 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
29967 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
29968 IX86_BUILTIN_PUNPCKHBW128_MASK,
29969 IX86_BUILTIN_PUNPCKHBW256_MASK,
29970 IX86_BUILTIN_PUNPCKHWD128_MASK,
29971 IX86_BUILTIN_PUNPCKHWD256_MASK,
29972 IX86_BUILTIN_PUNPCKLBW128_MASK,
29973 IX86_BUILTIN_PUNPCKLBW256_MASK,
29974 IX86_BUILTIN_PUNPCKLWD128_MASK,
29975 IX86_BUILTIN_PUNPCKLWD256_MASK,
29976 IX86_BUILTIN_PSLLVV16HI,
29977 IX86_BUILTIN_PSLLVV8HI,
29978 IX86_BUILTIN_PACKSSDW256_MASK,
29979 IX86_BUILTIN_PACKSSDW128_MASK,
29980 IX86_BUILTIN_PACKUSDW256_MASK,
29981 IX86_BUILTIN_PACKUSDW128_MASK,
29982 IX86_BUILTIN_PAVGB256_MASK,
29983 IX86_BUILTIN_PAVGW256_MASK,
29984 IX86_BUILTIN_PAVGB128_MASK,
29985 IX86_BUILTIN_PAVGW128_MASK,
29986 IX86_BUILTIN_VPERMVARSF256_MASK,
29987 IX86_BUILTIN_VPERMVARDF256_MASK,
29988 IX86_BUILTIN_VPERMDF256_MASK,
29989 IX86_BUILTIN_PABSB256_MASK,
29990 IX86_BUILTIN_PABSB128_MASK,
29991 IX86_BUILTIN_PABSW256_MASK,
29992 IX86_BUILTIN_PABSW128_MASK,
29993 IX86_BUILTIN_VPERMILVARPD_MASK,
29994 IX86_BUILTIN_VPERMILVARPS_MASK,
29995 IX86_BUILTIN_VPERMILVARPD256_MASK,
29996 IX86_BUILTIN_VPERMILVARPS256_MASK,
29997 IX86_BUILTIN_VPERMILPD_MASK,
29998 IX86_BUILTIN_VPERMILPS_MASK,
29999 IX86_BUILTIN_VPERMILPD256_MASK,
30000 IX86_BUILTIN_VPERMILPS256_MASK,
30001 IX86_BUILTIN_BLENDMQ256,
30002 IX86_BUILTIN_BLENDMD256,
30003 IX86_BUILTIN_BLENDMPD256,
30004 IX86_BUILTIN_BLENDMPS256,
30005 IX86_BUILTIN_BLENDMQ128,
30006 IX86_BUILTIN_BLENDMD128,
30007 IX86_BUILTIN_BLENDMPD128,
30008 IX86_BUILTIN_BLENDMPS128,
30009 IX86_BUILTIN_BLENDMW256,
30010 IX86_BUILTIN_BLENDMB256,
30011 IX86_BUILTIN_BLENDMW128,
30012 IX86_BUILTIN_BLENDMB128,
30013 IX86_BUILTIN_PMULLD256_MASK,
30014 IX86_BUILTIN_PMULLD128_MASK,
30015 IX86_BUILTIN_PMULUDQ256_MASK,
30016 IX86_BUILTIN_PMULDQ256_MASK,
30017 IX86_BUILTIN_PMULDQ128_MASK,
30018 IX86_BUILTIN_PMULUDQ128_MASK,
30019 IX86_BUILTIN_CVTPD2PS256_MASK,
30020 IX86_BUILTIN_CVTPD2PS_MASK,
30021 IX86_BUILTIN_VPERMVARSI256_MASK,
30022 IX86_BUILTIN_VPERMVARDI256_MASK,
30023 IX86_BUILTIN_VPERMDI256_MASK,
30024 IX86_BUILTIN_CMPQ256,
30025 IX86_BUILTIN_CMPD256,
30026 IX86_BUILTIN_UCMPQ256,
30027 IX86_BUILTIN_UCMPD256,
30028 IX86_BUILTIN_CMPB256,
30029 IX86_BUILTIN_CMPW256,
30030 IX86_BUILTIN_UCMPB256,
30031 IX86_BUILTIN_UCMPW256,
30032 IX86_BUILTIN_CMPPD256_MASK,
30033 IX86_BUILTIN_CMPPS256_MASK,
30034 IX86_BUILTIN_CMPQ128,
30035 IX86_BUILTIN_CMPD128,
30036 IX86_BUILTIN_UCMPQ128,
30037 IX86_BUILTIN_UCMPD128,
30038 IX86_BUILTIN_CMPB128,
30039 IX86_BUILTIN_CMPW128,
30040 IX86_BUILTIN_UCMPB128,
30041 IX86_BUILTIN_UCMPW128,
30042 IX86_BUILTIN_CMPPD128_MASK,
30043 IX86_BUILTIN_CMPPS128_MASK,
30045 IX86_BUILTIN_GATHER3SIV8SF,
30046 IX86_BUILTIN_GATHER3SIV4SF,
30047 IX86_BUILTIN_GATHER3SIV4DF,
30048 IX86_BUILTIN_GATHER3SIV2DF,
30049 IX86_BUILTIN_GATHER3DIV8SF,
30050 IX86_BUILTIN_GATHER3DIV4SF,
30051 IX86_BUILTIN_GATHER3DIV4DF,
30052 IX86_BUILTIN_GATHER3DIV2DF,
30053 IX86_BUILTIN_GATHER3SIV8SI,
30054 IX86_BUILTIN_GATHER3SIV4SI,
30055 IX86_BUILTIN_GATHER3SIV4DI,
30056 IX86_BUILTIN_GATHER3SIV2DI,
30057 IX86_BUILTIN_GATHER3DIV8SI,
30058 IX86_BUILTIN_GATHER3DIV4SI,
30059 IX86_BUILTIN_GATHER3DIV4DI,
30060 IX86_BUILTIN_GATHER3DIV2DI,
30061 IX86_BUILTIN_SCATTERSIV8SF,
30062 IX86_BUILTIN_SCATTERSIV4SF,
30063 IX86_BUILTIN_SCATTERSIV4DF,
30064 IX86_BUILTIN_SCATTERSIV2DF,
30065 IX86_BUILTIN_SCATTERDIV8SF,
30066 IX86_BUILTIN_SCATTERDIV4SF,
30067 IX86_BUILTIN_SCATTERDIV4DF,
30068 IX86_BUILTIN_SCATTERDIV2DF,
30069 IX86_BUILTIN_SCATTERSIV8SI,
30070 IX86_BUILTIN_SCATTERSIV4SI,
30071 IX86_BUILTIN_SCATTERSIV4DI,
30072 IX86_BUILTIN_SCATTERSIV2DI,
30073 IX86_BUILTIN_SCATTERDIV8SI,
30074 IX86_BUILTIN_SCATTERDIV4SI,
30075 IX86_BUILTIN_SCATTERDIV4DI,
30076 IX86_BUILTIN_SCATTERDIV2DI,
30078 /* AVX512DQ. */
30079 IX86_BUILTIN_RANGESD128,
30080 IX86_BUILTIN_RANGESS128,
30081 IX86_BUILTIN_KUNPCKWD,
30082 IX86_BUILTIN_KUNPCKDQ,
30083 IX86_BUILTIN_BROADCASTF32x2_512,
30084 IX86_BUILTIN_BROADCASTI32x2_512,
30085 IX86_BUILTIN_BROADCASTF64X2_512,
30086 IX86_BUILTIN_BROADCASTI64X2_512,
30087 IX86_BUILTIN_BROADCASTF32X8_512,
30088 IX86_BUILTIN_BROADCASTI32X8_512,
30089 IX86_BUILTIN_EXTRACTF64X2_512,
30090 IX86_BUILTIN_EXTRACTF32X8,
30091 IX86_BUILTIN_EXTRACTI64X2_512,
30092 IX86_BUILTIN_EXTRACTI32X8,
30093 IX86_BUILTIN_REDUCEPD512_MASK,
30094 IX86_BUILTIN_REDUCEPS512_MASK,
30095 IX86_BUILTIN_PMULLQ512,
30096 IX86_BUILTIN_XORPD512,
30097 IX86_BUILTIN_XORPS512,
30098 IX86_BUILTIN_ORPD512,
30099 IX86_BUILTIN_ORPS512,
30100 IX86_BUILTIN_ANDPD512,
30101 IX86_BUILTIN_ANDPS512,
30102 IX86_BUILTIN_ANDNPD512,
30103 IX86_BUILTIN_ANDNPS512,
30104 IX86_BUILTIN_INSERTF32X8,
30105 IX86_BUILTIN_INSERTI32X8,
30106 IX86_BUILTIN_INSERTF64X2_512,
30107 IX86_BUILTIN_INSERTI64X2_512,
30108 IX86_BUILTIN_FPCLASSPD512,
30109 IX86_BUILTIN_FPCLASSPS512,
30110 IX86_BUILTIN_CVTD2MASK512,
30111 IX86_BUILTIN_CVTQ2MASK512,
30112 IX86_BUILTIN_CVTMASK2D512,
30113 IX86_BUILTIN_CVTMASK2Q512,
30114 IX86_BUILTIN_CVTPD2QQ512,
30115 IX86_BUILTIN_CVTPS2QQ512,
30116 IX86_BUILTIN_CVTPD2UQQ512,
30117 IX86_BUILTIN_CVTPS2UQQ512,
30118 IX86_BUILTIN_CVTQQ2PS512,
30119 IX86_BUILTIN_CVTUQQ2PS512,
30120 IX86_BUILTIN_CVTQQ2PD512,
30121 IX86_BUILTIN_CVTUQQ2PD512,
30122 IX86_BUILTIN_CVTTPS2QQ512,
30123 IX86_BUILTIN_CVTTPS2UQQ512,
30124 IX86_BUILTIN_CVTTPD2QQ512,
30125 IX86_BUILTIN_CVTTPD2UQQ512,
30126 IX86_BUILTIN_RANGEPS512,
30127 IX86_BUILTIN_RANGEPD512,
30129 /* AVX512BW. */
30130 IX86_BUILTIN_PACKUSDW512,
30131 IX86_BUILTIN_PACKSSDW512,
30132 IX86_BUILTIN_LOADDQUHI512_MASK,
30133 IX86_BUILTIN_LOADDQUQI512_MASK,
30134 IX86_BUILTIN_PSLLDQ512,
30135 IX86_BUILTIN_PSRLDQ512,
30136 IX86_BUILTIN_STOREDQUHI512_MASK,
30137 IX86_BUILTIN_STOREDQUQI512_MASK,
30138 IX86_BUILTIN_PALIGNR512,
30139 IX86_BUILTIN_PALIGNR512_MASK,
30140 IX86_BUILTIN_MOVDQUHI512_MASK,
30141 IX86_BUILTIN_MOVDQUQI512_MASK,
30142 IX86_BUILTIN_PSADBW512,
30143 IX86_BUILTIN_DBPSADBW512,
30144 IX86_BUILTIN_PBROADCASTB512,
30145 IX86_BUILTIN_PBROADCASTB512_GPR,
30146 IX86_BUILTIN_PBROADCASTW512,
30147 IX86_BUILTIN_PBROADCASTW512_GPR,
30148 IX86_BUILTIN_PMOVSXBW512_MASK,
30149 IX86_BUILTIN_PMOVZXBW512_MASK,
30150 IX86_BUILTIN_VPERMVARHI512_MASK,
30151 IX86_BUILTIN_VPERMT2VARHI512,
30152 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
30153 IX86_BUILTIN_VPERMI2VARHI512,
30154 IX86_BUILTIN_PAVGB512,
30155 IX86_BUILTIN_PAVGW512,
30156 IX86_BUILTIN_PADDB512,
30157 IX86_BUILTIN_PSUBB512,
30158 IX86_BUILTIN_PSUBSB512,
30159 IX86_BUILTIN_PADDSB512,
30160 IX86_BUILTIN_PSUBUSB512,
30161 IX86_BUILTIN_PADDUSB512,
30162 IX86_BUILTIN_PSUBW512,
30163 IX86_BUILTIN_PADDW512,
30164 IX86_BUILTIN_PSUBSW512,
30165 IX86_BUILTIN_PADDSW512,
30166 IX86_BUILTIN_PSUBUSW512,
30167 IX86_BUILTIN_PADDUSW512,
30168 IX86_BUILTIN_PMAXUW512,
30169 IX86_BUILTIN_PMAXSW512,
30170 IX86_BUILTIN_PMINUW512,
30171 IX86_BUILTIN_PMINSW512,
30172 IX86_BUILTIN_PMAXUB512,
30173 IX86_BUILTIN_PMAXSB512,
30174 IX86_BUILTIN_PMINUB512,
30175 IX86_BUILTIN_PMINSB512,
30176 IX86_BUILTIN_PMOVWB512,
30177 IX86_BUILTIN_PMOVSWB512,
30178 IX86_BUILTIN_PMOVUSWB512,
30179 IX86_BUILTIN_PMULHRSW512_MASK,
30180 IX86_BUILTIN_PMULHUW512_MASK,
30181 IX86_BUILTIN_PMULHW512_MASK,
30182 IX86_BUILTIN_PMULLW512_MASK,
30183 IX86_BUILTIN_PSLLWI512_MASK,
30184 IX86_BUILTIN_PSLLW512_MASK,
30185 IX86_BUILTIN_PACKSSWB512,
30186 IX86_BUILTIN_PACKUSWB512,
30187 IX86_BUILTIN_PSRAVV32HI,
30188 IX86_BUILTIN_PMADDUBSW512_MASK,
30189 IX86_BUILTIN_PMADDWD512_MASK,
30190 IX86_BUILTIN_PSRLVV32HI,
30191 IX86_BUILTIN_PUNPCKHBW512,
30192 IX86_BUILTIN_PUNPCKHWD512,
30193 IX86_BUILTIN_PUNPCKLBW512,
30194 IX86_BUILTIN_PUNPCKLWD512,
30195 IX86_BUILTIN_PSHUFB512,
30196 IX86_BUILTIN_PSHUFHW512,
30197 IX86_BUILTIN_PSHUFLW512,
30198 IX86_BUILTIN_PSRAWI512,
30199 IX86_BUILTIN_PSRAW512,
30200 IX86_BUILTIN_PSRLWI512,
30201 IX86_BUILTIN_PSRLW512,
30202 IX86_BUILTIN_CVTB2MASK512,
30203 IX86_BUILTIN_CVTW2MASK512,
30204 IX86_BUILTIN_CVTMASK2B512,
30205 IX86_BUILTIN_CVTMASK2W512,
30206 IX86_BUILTIN_PCMPEQB512_MASK,
30207 IX86_BUILTIN_PCMPEQW512_MASK,
30208 IX86_BUILTIN_PCMPGTB512_MASK,
30209 IX86_BUILTIN_PCMPGTW512_MASK,
30210 IX86_BUILTIN_PTESTMB512,
30211 IX86_BUILTIN_PTESTMW512,
30212 IX86_BUILTIN_PTESTNMB512,
30213 IX86_BUILTIN_PTESTNMW512,
30214 IX86_BUILTIN_PSLLVV32HI,
30215 IX86_BUILTIN_PABSB512,
30216 IX86_BUILTIN_PABSW512,
30217 IX86_BUILTIN_BLENDMW512,
30218 IX86_BUILTIN_BLENDMB512,
30219 IX86_BUILTIN_CMPB512,
30220 IX86_BUILTIN_CMPW512,
30221 IX86_BUILTIN_UCMPB512,
30222 IX86_BUILTIN_UCMPW512,
30224 /* Alternate 4 and 8 element gather/scatter for the vectorizer
30225 where all operands are 32-byte or 64-byte wide respectively. */
30226 IX86_BUILTIN_GATHERALTSIV4DF,
30227 IX86_BUILTIN_GATHERALTDIV8SF,
30228 IX86_BUILTIN_GATHERALTSIV4DI,
30229 IX86_BUILTIN_GATHERALTDIV8SI,
30230 IX86_BUILTIN_GATHER3ALTDIV16SF,
30231 IX86_BUILTIN_GATHER3ALTDIV16SI,
30232 IX86_BUILTIN_GATHER3ALTSIV4DF,
30233 IX86_BUILTIN_GATHER3ALTDIV8SF,
30234 IX86_BUILTIN_GATHER3ALTSIV4DI,
30235 IX86_BUILTIN_GATHER3ALTDIV8SI,
30236 IX86_BUILTIN_GATHER3ALTSIV8DF,
30237 IX86_BUILTIN_GATHER3ALTSIV8DI,
30238 IX86_BUILTIN_GATHER3DIV16SF,
30239 IX86_BUILTIN_GATHER3DIV16SI,
30240 IX86_BUILTIN_GATHER3DIV8DF,
30241 IX86_BUILTIN_GATHER3DIV8DI,
30242 IX86_BUILTIN_GATHER3SIV16SF,
30243 IX86_BUILTIN_GATHER3SIV16SI,
30244 IX86_BUILTIN_GATHER3SIV8DF,
30245 IX86_BUILTIN_GATHER3SIV8DI,
30246 IX86_BUILTIN_SCATTERDIV16SF,
30247 IX86_BUILTIN_SCATTERDIV16SI,
30248 IX86_BUILTIN_SCATTERDIV8DF,
30249 IX86_BUILTIN_SCATTERDIV8DI,
30250 IX86_BUILTIN_SCATTERSIV16SF,
30251 IX86_BUILTIN_SCATTERSIV16SI,
30252 IX86_BUILTIN_SCATTERSIV8DF,
30253 IX86_BUILTIN_SCATTERSIV8DI,
30255 /* AVX512PF */
30256 IX86_BUILTIN_GATHERPFQPD,
30257 IX86_BUILTIN_GATHERPFDPS,
30258 IX86_BUILTIN_GATHERPFDPD,
30259 IX86_BUILTIN_GATHERPFQPS,
30260 IX86_BUILTIN_SCATTERPFDPD,
30261 IX86_BUILTIN_SCATTERPFDPS,
30262 IX86_BUILTIN_SCATTERPFQPD,
30263 IX86_BUILTIN_SCATTERPFQPS,
30265 /* AVX-512ER */
30266 IX86_BUILTIN_EXP2PD_MASK,
30267 IX86_BUILTIN_EXP2PS_MASK,
30268 IX86_BUILTIN_EXP2PS,
30269 IX86_BUILTIN_RCP28PD,
30270 IX86_BUILTIN_RCP28PS,
30271 IX86_BUILTIN_RCP28SD,
30272 IX86_BUILTIN_RCP28SS,
30273 IX86_BUILTIN_RSQRT28PD,
30274 IX86_BUILTIN_RSQRT28PS,
30275 IX86_BUILTIN_RSQRT28SD,
30276 IX86_BUILTIN_RSQRT28SS,
30278 /* AVX-512IFMA */
30279 IX86_BUILTIN_VPMADD52LUQ512,
30280 IX86_BUILTIN_VPMADD52HUQ512,
30281 IX86_BUILTIN_VPMADD52LUQ256,
30282 IX86_BUILTIN_VPMADD52HUQ256,
30283 IX86_BUILTIN_VPMADD52LUQ128,
30284 IX86_BUILTIN_VPMADD52HUQ128,
30285 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
30286 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
30287 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
30288 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
30289 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
30290 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
30292 /* AVX-512VBMI */
30293 IX86_BUILTIN_VPMULTISHIFTQB512,
30294 IX86_BUILTIN_VPMULTISHIFTQB256,
30295 IX86_BUILTIN_VPMULTISHIFTQB128,
30296 IX86_BUILTIN_VPERMVARQI512_MASK,
30297 IX86_BUILTIN_VPERMT2VARQI512,
30298 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
30299 IX86_BUILTIN_VPERMI2VARQI512,
30300 IX86_BUILTIN_VPERMVARQI256_MASK,
30301 IX86_BUILTIN_VPERMVARQI128_MASK,
30302 IX86_BUILTIN_VPERMT2VARQI256,
30303 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
30304 IX86_BUILTIN_VPERMT2VARQI128,
30305 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
30306 IX86_BUILTIN_VPERMI2VARQI256,
30307 IX86_BUILTIN_VPERMI2VARQI128,
30309 /* SHA builtins. */
30310 IX86_BUILTIN_SHA1MSG1,
30311 IX86_BUILTIN_SHA1MSG2,
30312 IX86_BUILTIN_SHA1NEXTE,
30313 IX86_BUILTIN_SHA1RNDS4,
30314 IX86_BUILTIN_SHA256MSG1,
30315 IX86_BUILTIN_SHA256MSG2,
30316 IX86_BUILTIN_SHA256RNDS2,
30318 /* CLWB instructions. */
30319 IX86_BUILTIN_CLWB,
30321 /* PCOMMIT instructions. */
30322 IX86_BUILTIN_PCOMMIT,
30324 /* CLFLUSHOPT instructions. */
30325 IX86_BUILTIN_CLFLUSHOPT,
30327 /* TFmode support builtins. */
30328 IX86_BUILTIN_INFQ,
30329 IX86_BUILTIN_HUGE_VALQ,
30330 IX86_BUILTIN_FABSQ,
30331 IX86_BUILTIN_COPYSIGNQ,
30333 /* Vectorizer support builtins. */
30334 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
30335 IX86_BUILTIN_CPYSGNPS,
30336 IX86_BUILTIN_CPYSGNPD,
30337 IX86_BUILTIN_CPYSGNPS256,
30338 IX86_BUILTIN_CPYSGNPS512,
30339 IX86_BUILTIN_CPYSGNPD256,
30340 IX86_BUILTIN_CPYSGNPD512,
30341 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
30342 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
30345 /* FMA4 instructions. */
30346 IX86_BUILTIN_VFMADDSS,
30347 IX86_BUILTIN_VFMADDSD,
30348 IX86_BUILTIN_VFMADDPS,
30349 IX86_BUILTIN_VFMADDPD,
30350 IX86_BUILTIN_VFMADDPS256,
30351 IX86_BUILTIN_VFMADDPD256,
30352 IX86_BUILTIN_VFMADDSUBPS,
30353 IX86_BUILTIN_VFMADDSUBPD,
30354 IX86_BUILTIN_VFMADDSUBPS256,
30355 IX86_BUILTIN_VFMADDSUBPD256,
30357 /* FMA3 instructions. */
30358 IX86_BUILTIN_VFMADDSS3,
30359 IX86_BUILTIN_VFMADDSD3,
30361 /* XOP instructions. */
30362 IX86_BUILTIN_VPCMOV,
30363 IX86_BUILTIN_VPCMOV_V2DI,
30364 IX86_BUILTIN_VPCMOV_V4SI,
30365 IX86_BUILTIN_VPCMOV_V8HI,
30366 IX86_BUILTIN_VPCMOV_V16QI,
30367 IX86_BUILTIN_VPCMOV_V4SF,
30368 IX86_BUILTIN_VPCMOV_V2DF,
30369 IX86_BUILTIN_VPCMOV256,
30370 IX86_BUILTIN_VPCMOV_V4DI256,
30371 IX86_BUILTIN_VPCMOV_V8SI256,
30372 IX86_BUILTIN_VPCMOV_V16HI256,
30373 IX86_BUILTIN_VPCMOV_V32QI256,
30374 IX86_BUILTIN_VPCMOV_V8SF256,
30375 IX86_BUILTIN_VPCMOV_V4DF256,
30377 IX86_BUILTIN_VPPERM,
30379 IX86_BUILTIN_VPMACSSWW,
30380 IX86_BUILTIN_VPMACSWW,
30381 IX86_BUILTIN_VPMACSSWD,
30382 IX86_BUILTIN_VPMACSWD,
30383 IX86_BUILTIN_VPMACSSDD,
30384 IX86_BUILTIN_VPMACSDD,
30385 IX86_BUILTIN_VPMACSSDQL,
30386 IX86_BUILTIN_VPMACSSDQH,
30387 IX86_BUILTIN_VPMACSDQL,
30388 IX86_BUILTIN_VPMACSDQH,
30389 IX86_BUILTIN_VPMADCSSWD,
30390 IX86_BUILTIN_VPMADCSWD,
30392 IX86_BUILTIN_VPHADDBW,
30393 IX86_BUILTIN_VPHADDBD,
30394 IX86_BUILTIN_VPHADDBQ,
30395 IX86_BUILTIN_VPHADDWD,
30396 IX86_BUILTIN_VPHADDWQ,
30397 IX86_BUILTIN_VPHADDDQ,
30398 IX86_BUILTIN_VPHADDUBW,
30399 IX86_BUILTIN_VPHADDUBD,
30400 IX86_BUILTIN_VPHADDUBQ,
30401 IX86_BUILTIN_VPHADDUWD,
30402 IX86_BUILTIN_VPHADDUWQ,
30403 IX86_BUILTIN_VPHADDUDQ,
30404 IX86_BUILTIN_VPHSUBBW,
30405 IX86_BUILTIN_VPHSUBWD,
30406 IX86_BUILTIN_VPHSUBDQ,
30408 IX86_BUILTIN_VPROTB,
30409 IX86_BUILTIN_VPROTW,
30410 IX86_BUILTIN_VPROTD,
30411 IX86_BUILTIN_VPROTQ,
30412 IX86_BUILTIN_VPROTB_IMM,
30413 IX86_BUILTIN_VPROTW_IMM,
30414 IX86_BUILTIN_VPROTD_IMM,
30415 IX86_BUILTIN_VPROTQ_IMM,
30417 IX86_BUILTIN_VPSHLB,
30418 IX86_BUILTIN_VPSHLW,
30419 IX86_BUILTIN_VPSHLD,
30420 IX86_BUILTIN_VPSHLQ,
30421 IX86_BUILTIN_VPSHAB,
30422 IX86_BUILTIN_VPSHAW,
30423 IX86_BUILTIN_VPSHAD,
30424 IX86_BUILTIN_VPSHAQ,
30426 IX86_BUILTIN_VFRCZSS,
30427 IX86_BUILTIN_VFRCZSD,
30428 IX86_BUILTIN_VFRCZPS,
30429 IX86_BUILTIN_VFRCZPD,
30430 IX86_BUILTIN_VFRCZPS256,
30431 IX86_BUILTIN_VFRCZPD256,
30433 IX86_BUILTIN_VPCOMEQUB,
30434 IX86_BUILTIN_VPCOMNEUB,
30435 IX86_BUILTIN_VPCOMLTUB,
30436 IX86_BUILTIN_VPCOMLEUB,
30437 IX86_BUILTIN_VPCOMGTUB,
30438 IX86_BUILTIN_VPCOMGEUB,
30439 IX86_BUILTIN_VPCOMFALSEUB,
30440 IX86_BUILTIN_VPCOMTRUEUB,
30442 IX86_BUILTIN_VPCOMEQUW,
30443 IX86_BUILTIN_VPCOMNEUW,
30444 IX86_BUILTIN_VPCOMLTUW,
30445 IX86_BUILTIN_VPCOMLEUW,
30446 IX86_BUILTIN_VPCOMGTUW,
30447 IX86_BUILTIN_VPCOMGEUW,
30448 IX86_BUILTIN_VPCOMFALSEUW,
30449 IX86_BUILTIN_VPCOMTRUEUW,
30451 IX86_BUILTIN_VPCOMEQUD,
30452 IX86_BUILTIN_VPCOMNEUD,
30453 IX86_BUILTIN_VPCOMLTUD,
30454 IX86_BUILTIN_VPCOMLEUD,
30455 IX86_BUILTIN_VPCOMGTUD,
30456 IX86_BUILTIN_VPCOMGEUD,
30457 IX86_BUILTIN_VPCOMFALSEUD,
30458 IX86_BUILTIN_VPCOMTRUEUD,
30460 IX86_BUILTIN_VPCOMEQUQ,
30461 IX86_BUILTIN_VPCOMNEUQ,
30462 IX86_BUILTIN_VPCOMLTUQ,
30463 IX86_BUILTIN_VPCOMLEUQ,
30464 IX86_BUILTIN_VPCOMGTUQ,
30465 IX86_BUILTIN_VPCOMGEUQ,
30466 IX86_BUILTIN_VPCOMFALSEUQ,
30467 IX86_BUILTIN_VPCOMTRUEUQ,
30469 IX86_BUILTIN_VPCOMEQB,
30470 IX86_BUILTIN_VPCOMNEB,
30471 IX86_BUILTIN_VPCOMLTB,
30472 IX86_BUILTIN_VPCOMLEB,
30473 IX86_BUILTIN_VPCOMGTB,
30474 IX86_BUILTIN_VPCOMGEB,
30475 IX86_BUILTIN_VPCOMFALSEB,
30476 IX86_BUILTIN_VPCOMTRUEB,
30478 IX86_BUILTIN_VPCOMEQW,
30479 IX86_BUILTIN_VPCOMNEW,
30480 IX86_BUILTIN_VPCOMLTW,
30481 IX86_BUILTIN_VPCOMLEW,
30482 IX86_BUILTIN_VPCOMGTW,
30483 IX86_BUILTIN_VPCOMGEW,
30484 IX86_BUILTIN_VPCOMFALSEW,
30485 IX86_BUILTIN_VPCOMTRUEW,
30487 IX86_BUILTIN_VPCOMEQD,
30488 IX86_BUILTIN_VPCOMNED,
30489 IX86_BUILTIN_VPCOMLTD,
30490 IX86_BUILTIN_VPCOMLED,
30491 IX86_BUILTIN_VPCOMGTD,
30492 IX86_BUILTIN_VPCOMGED,
30493 IX86_BUILTIN_VPCOMFALSED,
30494 IX86_BUILTIN_VPCOMTRUED,
30496 IX86_BUILTIN_VPCOMEQQ,
30497 IX86_BUILTIN_VPCOMNEQ,
30498 IX86_BUILTIN_VPCOMLTQ,
30499 IX86_BUILTIN_VPCOMLEQ,
30500 IX86_BUILTIN_VPCOMGTQ,
30501 IX86_BUILTIN_VPCOMGEQ,
30502 IX86_BUILTIN_VPCOMFALSEQ,
30503 IX86_BUILTIN_VPCOMTRUEQ,
30505 /* LWP instructions. */
30506 IX86_BUILTIN_LLWPCB,
30507 IX86_BUILTIN_SLWPCB,
30508 IX86_BUILTIN_LWPVAL32,
30509 IX86_BUILTIN_LWPVAL64,
30510 IX86_BUILTIN_LWPINS32,
30511 IX86_BUILTIN_LWPINS64,
30513 IX86_BUILTIN_CLZS,
30515 /* RTM */
30516 IX86_BUILTIN_XBEGIN,
30517 IX86_BUILTIN_XEND,
30518 IX86_BUILTIN_XABORT,
30519 IX86_BUILTIN_XTEST,
30521 /* MPX */
30522 IX86_BUILTIN_BNDMK,
30523 IX86_BUILTIN_BNDSTX,
30524 IX86_BUILTIN_BNDLDX,
30525 IX86_BUILTIN_BNDCL,
30526 IX86_BUILTIN_BNDCU,
30527 IX86_BUILTIN_BNDRET,
30528 IX86_BUILTIN_BNDNARROW,
30529 IX86_BUILTIN_BNDINT,
30530 IX86_BUILTIN_SIZEOF,
30531 IX86_BUILTIN_BNDLOWER,
30532 IX86_BUILTIN_BNDUPPER,
30534 /* BMI instructions. */
30535 IX86_BUILTIN_BEXTR32,
30536 IX86_BUILTIN_BEXTR64,
30537 IX86_BUILTIN_CTZS,
30539 /* TBM instructions. */
30540 IX86_BUILTIN_BEXTRI32,
30541 IX86_BUILTIN_BEXTRI64,
30543 /* BMI2 instructions. */
30544 IX86_BUILTIN_BZHI32,
30545 IX86_BUILTIN_BZHI64,
30546 IX86_BUILTIN_PDEP32,
30547 IX86_BUILTIN_PDEP64,
30548 IX86_BUILTIN_PEXT32,
30549 IX86_BUILTIN_PEXT64,
30551 /* ADX instructions. */
30552 IX86_BUILTIN_ADDCARRYX32,
30553 IX86_BUILTIN_ADDCARRYX64,
30555 /* SBB instructions. */
30556 IX86_BUILTIN_SBB32,
30557 IX86_BUILTIN_SBB64,
30559 /* FSGSBASE instructions. */
30560 IX86_BUILTIN_RDFSBASE32,
30561 IX86_BUILTIN_RDFSBASE64,
30562 IX86_BUILTIN_RDGSBASE32,
30563 IX86_BUILTIN_RDGSBASE64,
30564 IX86_BUILTIN_WRFSBASE32,
30565 IX86_BUILTIN_WRFSBASE64,
30566 IX86_BUILTIN_WRGSBASE32,
30567 IX86_BUILTIN_WRGSBASE64,
30569 /* RDRND instructions. */
30570 IX86_BUILTIN_RDRAND16_STEP,
30571 IX86_BUILTIN_RDRAND32_STEP,
30572 IX86_BUILTIN_RDRAND64_STEP,
30574 /* RDSEED instructions. */
30575 IX86_BUILTIN_RDSEED16_STEP,
30576 IX86_BUILTIN_RDSEED32_STEP,
30577 IX86_BUILTIN_RDSEED64_STEP,
30579 /* F16C instructions. */
30580 IX86_BUILTIN_CVTPH2PS,
30581 IX86_BUILTIN_CVTPH2PS256,
30582 IX86_BUILTIN_CVTPS2PH,
30583 IX86_BUILTIN_CVTPS2PH256,
30585 /* MONITORX and MWAITX instrucions. */
30586 IX86_BUILTIN_MONITORX,
30587 IX86_BUILTIN_MWAITX,
30589 /* CFString built-in for darwin */
30590 IX86_BUILTIN_CFSTRING,
30592 /* Builtins to get CPU type and supported features. */
30593 IX86_BUILTIN_CPU_INIT,
30594 IX86_BUILTIN_CPU_IS,
30595 IX86_BUILTIN_CPU_SUPPORTS,
30597 /* Read/write FLAGS register built-ins. */
30598 IX86_BUILTIN_READ_FLAGS,
30599 IX86_BUILTIN_WRITE_FLAGS,
30601 IX86_BUILTIN_MAX
30604 /* Table for the ix86 builtin decls. */
30605 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30607 /* Table of all of the builtin functions that are possible with different ISA's
30608 but are waiting to be built until a function is declared to use that
30609 ISA. */
30610 struct builtin_isa {
30611 const char *name; /* function name */
30612 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30613 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
30614 bool const_p; /* true if the declaration is constant */
30615 bool leaf_p; /* true if the declaration has leaf attribute */
30616 bool nothrow_p; /* true if the declaration has nothrow attribute */
30617 bool set_and_not_built_p;
30620 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30622 /* Bits that can still enable any inclusion of a builtin. */
30623 static HOST_WIDE_INT deferred_isa_values = 0;
30625 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
30626 of which isa_flags to use in the ix86_builtins_isa array. Stores the
30627 function decl in the ix86_builtins array. Returns the function decl or
30628 NULL_TREE, if the builtin was not added.
30630 If the front end has a special hook for builtin functions, delay adding
30631 builtin functions that aren't in the current ISA until the ISA is changed
30632 with function specific optimization. Doing so, can save about 300K for the
30633 default compiler. When the builtin is expanded, check at that time whether
30634 it is valid.
30636 If the front end doesn't have a special hook, record all builtins, even if
30637 it isn't an instruction set in the current ISA in case the user uses
30638 function specific options for a different ISA, so that we don't get scope
30639 errors if a builtin is added in the middle of a function scope. */
30641 static inline tree
30642 def_builtin (HOST_WIDE_INT mask, const char *name,
30643 enum ix86_builtin_func_type tcode,
30644 enum ix86_builtins code)
30646 tree decl = NULL_TREE;
30648 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30650 ix86_builtins_isa[(int) code].isa = mask;
30652 mask &= ~OPTION_MASK_ISA_64BIT;
30653 if (mask == 0
30654 || (mask & ix86_isa_flags) != 0
30655 || (lang_hooks.builtin_function
30656 == lang_hooks.builtin_function_ext_scope))
30659 tree type = ix86_get_builtin_func_type (tcode);
30660 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30661 NULL, NULL_TREE);
30662 ix86_builtins[(int) code] = decl;
30663 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30665 else
30667 /* Just a MASK where set_and_not_built_p == true can potentially
30668 include a builtin. */
30669 deferred_isa_values |= mask;
30670 ix86_builtins[(int) code] = NULL_TREE;
30671 ix86_builtins_isa[(int) code].tcode = tcode;
30672 ix86_builtins_isa[(int) code].name = name;
30673 ix86_builtins_isa[(int) code].leaf_p = false;
30674 ix86_builtins_isa[(int) code].nothrow_p = false;
30675 ix86_builtins_isa[(int) code].const_p = false;
30676 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30680 return decl;
30683 /* Like def_builtin, but also marks the function decl "const". */
30685 static inline tree
30686 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30687 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30689 tree decl = def_builtin (mask, name, tcode, code);
30690 if (decl)
30691 TREE_READONLY (decl) = 1;
30692 else
30693 ix86_builtins_isa[(int) code].const_p = true;
30695 return decl;
30698 /* Add any new builtin functions for a given ISA that may not have been
30699 declared. This saves a bit of space compared to adding all of the
30700 declarations to the tree, even if we didn't use them. */
30702 static void
30703 ix86_add_new_builtins (HOST_WIDE_INT isa)
30705 if ((isa & deferred_isa_values) == 0)
30706 return;
30708 /* Bits in ISA value can be removed from potential isa values. */
30709 deferred_isa_values &= ~isa;
30711 int i;
30712 tree saved_current_target_pragma = current_target_pragma;
30713 current_target_pragma = NULL_TREE;
30715 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30717 if ((ix86_builtins_isa[i].isa & isa) != 0
30718 && ix86_builtins_isa[i].set_and_not_built_p)
30720 tree decl, type;
30722 /* Don't define the builtin again. */
30723 ix86_builtins_isa[i].set_and_not_built_p = false;
30725 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30726 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30727 type, i, BUILT_IN_MD, NULL,
30728 NULL_TREE);
30730 ix86_builtins[i] = decl;
30731 if (ix86_builtins_isa[i].const_p)
30732 TREE_READONLY (decl) = 1;
30733 if (ix86_builtins_isa[i].leaf_p)
30734 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30735 NULL_TREE);
30736 if (ix86_builtins_isa[i].nothrow_p)
30737 TREE_NOTHROW (decl) = 1;
30741 current_target_pragma = saved_current_target_pragma;
30744 /* Bits for builtin_description.flag. */
30746 /* Set when we don't support the comparison natively, and should
30747 swap_comparison in order to support it. */
30748 #define BUILTIN_DESC_SWAP_OPERANDS 1
30750 struct builtin_description
30752 const HOST_WIDE_INT mask;
30753 const enum insn_code icode;
30754 const char *const name;
30755 const enum ix86_builtins code;
30756 const enum rtx_code comparison;
30757 const int flag;
30760 static const struct builtin_description bdesc_comi[] =
30762 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30763 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30764 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30765 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30766 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30767 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30768 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30769 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30770 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30771 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30772 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30773 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30774 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30775 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30776 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30777 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30778 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30779 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30780 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30781 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30782 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30783 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30784 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30785 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30788 static const struct builtin_description bdesc_pcmpestr[] =
30790 /* SSE4.2 */
30791 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30792 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30793 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30794 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30795 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30796 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30797 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30800 static const struct builtin_description bdesc_pcmpistr[] =
30802 /* SSE4.2 */
30803 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30804 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30805 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
30806 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
30807 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
30808 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
30809 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
30812 /* Special builtins with variable number of arguments. */
30813 static const struct builtin_description bdesc_special_args[] =
30815 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
30816 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
30817 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
30819 /* 80387 (for use internally for atomic compound assignment). */
30820 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
30821 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
30822 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
30823 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
30825 /* MMX */
30826 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30828 /* 3DNow! */
30829 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30831 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
30832 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
30833 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
30834 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30835 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30836 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30837 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30838 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30839 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30841 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30842 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30843 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30844 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30845 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30846 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30847 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30848 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30850 /* SSE */
30851 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30852 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30853 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30855 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30856 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30857 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30858 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30860 /* SSE or 3DNow!A */
30861 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30862 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
30864 /* SSE2 */
30865 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30866 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30867 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30868 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
30869 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30870 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
30871 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
30872 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
30873 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
30874 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30876 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30877 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30879 /* SSE3 */
30880 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30882 /* SSE4.1 */
30883 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
30885 /* SSE4A */
30886 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30887 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30889 /* AVX */
30890 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
30891 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
30893 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30894 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30895 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30896 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
30897 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
30899 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30900 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30901 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30902 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30903 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30904 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
30905 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30907 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
30908 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30909 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30911 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
30912 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
30913 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
30914 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
30915 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
30916 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
30917 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
30918 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
30920 /* AVX2 */
30921 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
30922 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
30923 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
30924 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
30925 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
30926 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
30927 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
30928 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
30929 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
30931 /* AVX512F */
30932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
30953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
30954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
30955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
30956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30980 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
30981 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
30982 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
30983 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
30984 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
30985 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
30987 /* FSGSBASE */
30988 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30989 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30990 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30991 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30992 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30993 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30994 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30995 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30997 /* RTM */
30998 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30999 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
31000 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
31002 /* AVX512BW */
31003 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
31004 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
31005 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
31006 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
31008 /* AVX512VL */
31009 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
31010 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
31011 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
31012 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
31013 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31014 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31015 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31016 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31017 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31018 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31019 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31020 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31021 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31022 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31023 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31024 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31025 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31026 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31027 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31028 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31029 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31030 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31031 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31032 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31033 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31034 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31035 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31036 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31037 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31038 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31039 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31040 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31041 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31042 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31043 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31044 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31045 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
31046 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
31047 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
31048 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
31049 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31050 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31051 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31052 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31053 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31054 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31055 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31056 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31057 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31058 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31059 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31060 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31061 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31062 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31063 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31064 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31065 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31066 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31067 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31068 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31069 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31070 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31071 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31072 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31073 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31074 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31075 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31076 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31077 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31078 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31079 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31080 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31081 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31082 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31083 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31084 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31085 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31086 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31087 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31088 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31089 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31090 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31091 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31092 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31093 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31094 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31095 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31096 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31097 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31098 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31099 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31100 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31101 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31102 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31104 /* PCOMMIT. */
31105 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
31108 /* Builtins with variable number of arguments. */
31109 static const struct builtin_description bdesc_args[] =
31111 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
31112 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
31113 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
31114 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31115 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31116 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31117 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31119 /* MMX */
31120 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31121 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31122 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31123 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31124 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31125 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31127 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31128 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31129 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31130 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31131 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31132 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31133 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31134 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31136 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31137 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31139 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31140 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31141 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31142 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31144 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31145 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31146 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31147 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31148 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31149 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31151 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31152 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31153 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31154 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31155 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
31156 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
31158 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31159 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
31160 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31162 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
31164 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31165 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31166 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31167 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31168 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31169 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31171 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31172 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31173 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31174 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31175 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31176 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31178 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31179 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31180 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31181 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31183 /* 3DNow! */
31184 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31185 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31186 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31187 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31189 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31190 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31191 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31192 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31193 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31194 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31195 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31196 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31197 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31198 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31199 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31200 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31201 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31202 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31203 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31205 /* 3DNow!A */
31206 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31207 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31208 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31209 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31210 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31211 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31213 /* SSE */
31214 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
31215 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31216 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31217 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31218 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31219 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31220 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31221 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31222 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31223 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31224 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31225 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31227 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31229 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31230 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31231 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31232 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31233 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31234 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31235 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31236 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31238 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31239 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31240 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31241 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31242 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31243 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31244 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31245 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31246 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31247 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31248 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
31249 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31250 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31251 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31252 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31253 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31254 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31255 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31256 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31257 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31259 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31260 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31261 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31262 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31264 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31265 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31266 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31267 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31269 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31271 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31272 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31273 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31274 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31275 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31277 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
31278 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
31279 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
31281 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
31283 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31284 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31285 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31287 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
31288 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
31290 /* SSE MMX or 3Dnow!A */
31291 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31292 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31293 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31295 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31296 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31297 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31298 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31300 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
31301 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
31303 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
31305 /* SSE2 */
31306 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31308 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
31309 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
31310 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31311 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
31312 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
31314 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31315 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31316 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
31317 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31320 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
31322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31324 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31325 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31327 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31328 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
31329 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31331 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31332 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31333 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31334 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31335 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31336 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31337 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31338 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31340 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31341 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31342 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31343 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31344 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
31345 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31346 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31347 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31348 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31349 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31350 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31351 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31352 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31353 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31354 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31355 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31356 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31357 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31358 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31359 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31361 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31362 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31363 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31364 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31366 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31367 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31368 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31369 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31371 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31373 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31374 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31375 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31377 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31379 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31380 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31381 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31382 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31383 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31384 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31385 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31386 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31388 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31389 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31390 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31391 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31392 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31393 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31394 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31395 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31397 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31398 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
31400 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31401 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31402 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31403 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31408 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31415 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31416 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31417 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31420 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31421 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31422 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31423 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31424 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31425 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31426 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31427 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31433 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
31436 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
31437 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31439 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
31441 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
31442 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
31443 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
31444 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
31446 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31447 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31448 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31449 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31450 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31451 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31452 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31454 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31455 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31456 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31457 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31458 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31459 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31460 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31462 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31463 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31464 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31465 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31467 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
31468 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31469 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31471 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
31473 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31475 /* SSE2 MMX */
31476 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31477 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31479 /* SSE3 */
31480 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
31481 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31483 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31484 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31485 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31486 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31487 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31488 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31490 /* SSSE3 */
31491 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31492 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
31493 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31494 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
31495 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31496 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31498 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31499 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31500 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31501 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31502 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31503 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31504 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31505 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31506 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31507 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31508 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31509 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31510 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
31511 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
31512 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31513 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31514 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31515 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31516 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31517 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31518 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31519 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31520 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31521 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31523 /* SSSE3. */
31524 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
31525 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
31527 /* SSE4.1 */
31528 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31529 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31530 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
31531 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
31532 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31533 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31534 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31535 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
31536 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
31537 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
31539 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31540 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31541 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31542 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31543 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31544 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31545 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31546 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31547 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31548 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31549 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31550 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31551 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31553 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31554 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31555 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31556 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31557 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31558 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31559 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31560 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31561 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31562 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31563 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31564 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31566 /* SSE4.1 */
31567 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31568 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31569 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31570 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31572 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
31573 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
31574 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
31575 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
31577 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31578 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31580 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31581 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31583 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
31584 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
31585 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
31586 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
31588 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
31589 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
31591 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31592 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31594 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31595 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31596 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31598 /* SSE4.2 */
31599 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31600 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
31601 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
31602 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31603 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31605 /* SSE4A */
31606 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
31607 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
31608 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
31609 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31611 /* AES */
31612 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
31613 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31615 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31616 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31617 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31618 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31620 /* PCLMUL */
31621 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
31623 /* AVX */
31624 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31625 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31626 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31627 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31628 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31629 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31630 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31631 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31632 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31633 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31634 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31635 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31636 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31637 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31638 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31639 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31640 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31641 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31642 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31643 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31644 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31645 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31646 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31647 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31648 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31649 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31651 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
31652 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
31653 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
31654 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31656 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31657 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31658 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
31659 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
31660 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31661 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31662 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31663 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31664 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31665 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31666 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31667 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31668 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31669 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
31670 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
31671 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
31672 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
31673 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
31674 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
31675 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31676 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
31677 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31678 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31679 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31680 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31681 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31682 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31683 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31684 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31685 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31686 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31687 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31688 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31689 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31691 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31692 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31693 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31695 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31696 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31697 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31698 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31699 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31701 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31704 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31706 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31707 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31708 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31709 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31711 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31712 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31725 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31726 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31736 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31737 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31738 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31749 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31754 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31756 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
31757 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31759 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31760 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31762 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31764 /* AVX2 */
31765 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31766 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31767 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31768 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31769 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31770 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31771 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31772 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31773 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31774 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31775 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31776 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31777 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31778 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31779 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31780 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31781 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31782 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31783 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31784 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31785 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31786 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31787 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31788 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31789 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31790 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31791 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31792 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31793 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31794 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31795 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31796 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31797 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31798 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31799 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31800 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31801 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31802 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31803 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31804 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31805 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31806 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31807 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31808 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31809 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31810 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31811 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31812 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31813 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31814 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31815 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31816 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
31817 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31818 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31819 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31820 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31821 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31822 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31823 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31824 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31825 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31826 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31827 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31828 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31829 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31830 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31831 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31832 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31833 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31834 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31835 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31836 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31837 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31838 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31839 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
31840 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31841 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31842 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31843 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31844 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31845 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31846 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31847 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31848 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31849 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31850 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31851 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31852 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31853 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31854 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31855 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31856 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31857 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31858 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31859 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31860 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31861 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31862 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31863 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31864 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31865 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31866 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31867 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31868 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31869 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31870 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31871 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31872 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31873 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31874 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31875 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31876 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31877 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31878 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31879 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31880 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31881 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31882 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31883 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31884 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31885 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31886 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
31887 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
31888 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31889 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31890 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31891 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31892 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31893 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31894 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31895 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31896 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31897 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
31898 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
31899 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
31900 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
31901 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31902 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31903 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31904 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31905 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31906 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31907 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31908 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31909 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31910 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31912 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31914 /* BMI */
31915 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31916 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31917 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31919 /* TBM */
31920 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31921 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31923 /* F16C */
31924 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
31925 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
31926 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
31927 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
31929 /* BMI2 */
31930 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31931 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31932 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31933 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31934 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31935 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31937 /* AVX512F */
31938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
31939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
31940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
31941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
31942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
31943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
31944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
31952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
31954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
31955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
31962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
31964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
31969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
31970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
31971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
31972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
31973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
31974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
31975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
31976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31981 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31982 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31983 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31984 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31985 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31986 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31988 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31989 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31990 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31991 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31992 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
31993 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
31994 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
31995 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
31996 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
31997 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31999 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32001 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
32026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
32027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
32028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
32029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
32030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
32036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
32037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
32038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
32039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
32040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
32041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
32043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
32093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
32094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
32095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
32096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
32097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
32098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
32099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
32100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32104 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32105 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32106 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32107 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
32136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
32137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
32138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32139 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
32141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32144 /* Mask arithmetic operations */
32145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
32148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
32152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
32156 /* SHA */
32157 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32158 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32159 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32160 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
32161 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32162 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32163 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
32165 /* AVX512VL. */
32166 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
32167 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
32168 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32169 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32170 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32171 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32172 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32173 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32174 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32175 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32176 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32177 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32178 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32179 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32180 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32181 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32182 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32183 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32184 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32185 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32186 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32187 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32188 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32189 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32190 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32191 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32192 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32193 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32194 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32195 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32196 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32197 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32198 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32199 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32200 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32201 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32202 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32203 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32204 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32205 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32206 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32207 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32208 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32209 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32210 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32211 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32212 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32213 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32214 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32215 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32216 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32217 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32218 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32219 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
32220 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
32221 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
32222 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
32223 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32224 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32225 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32226 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32227 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32228 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32229 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32230 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32231 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32232 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32233 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32234 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32235 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32236 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32237 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32238 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32239 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32240 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32241 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32242 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32243 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32244 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32245 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32246 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32247 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32248 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32249 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32250 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32251 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32252 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32253 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32254 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32255 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
32256 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
32257 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
32258 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
32259 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32260 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
32261 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
32262 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
32263 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32264 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
32265 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32266 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
32267 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32268 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
32269 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32270 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
32271 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32272 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
32273 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32274 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32275 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32276 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
32277 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
32278 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
32279 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
32280 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32281 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32282 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32283 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32284 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32285 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32286 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32287 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32288 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32289 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32290 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32291 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32292 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32293 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32294 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32295 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32296 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32297 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32298 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32299 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32300 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32301 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32302 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32303 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32304 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32305 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32306 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32307 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32308 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32309 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32310 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32311 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32312 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32313 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32314 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32315 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32316 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32317 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32318 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32319 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32320 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32321 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32322 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32323 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32324 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32325 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32326 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32327 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32328 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32329 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32330 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32331 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32332 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32333 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32334 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32335 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32336 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32337 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32338 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32339 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32340 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32341 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32342 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32343 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32344 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32345 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32346 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32347 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32348 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32349 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32350 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32351 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32352 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32353 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32354 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32355 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32356 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32357 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32358 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32359 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32360 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32361 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32362 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32363 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32364 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32365 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32366 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32367 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32368 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32369 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32370 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32371 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32372 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32373 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32374 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32375 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32376 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32377 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32378 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32379 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32380 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32381 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32382 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32383 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32384 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32385 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32386 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32387 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32388 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32389 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32390 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32391 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32392 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32393 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32394 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32395 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32396 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32397 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32398 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32399 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32400 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32401 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32402 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32403 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32404 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32405 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32406 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32407 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32408 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32409 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32410 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32411 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32412 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32413 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32414 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32415 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32416 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32417 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32418 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32419 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32420 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32421 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32422 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32423 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32424 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32425 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32426 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32427 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32428 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32429 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32430 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32431 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32432 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32433 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32434 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32435 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32436 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32437 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32438 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32439 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32440 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32441 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32442 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32443 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32444 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32445 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32446 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32447 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32448 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32449 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32450 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32451 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32452 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32453 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32454 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32455 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32456 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32457 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32458 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32459 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32460 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32461 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32462 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32463 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32464 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32465 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32466 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32467 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32468 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32469 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32470 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32471 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32472 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32473 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32474 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32475 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32476 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32477 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32478 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32479 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32480 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32481 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32482 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32483 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32484 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32485 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32486 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32487 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32488 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32489 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32490 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32491 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32492 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32493 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32494 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32495 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32496 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32497 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32498 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32499 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32500 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32501 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32502 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32503 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32504 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32505 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32506 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32507 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32508 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32509 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32510 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32511 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32512 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32513 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32514 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32515 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32516 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32517 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32518 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32519 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32520 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32521 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32522 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32523 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32524 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32525 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32526 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32527 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32528 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32529 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32530 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32531 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32532 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32533 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32534 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32535 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32536 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32537 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32538 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32539 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32540 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32541 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32542 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32543 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32544 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32545 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32546 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32547 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32548 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
32549 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
32550 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32551 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32552 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
32553 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
32554 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
32555 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
32556 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32557 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32558 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32559 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32560 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32561 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32562 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32563 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32564 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32565 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32566 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32567 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32568 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32569 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32570 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32571 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32572 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32573 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32574 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32575 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32576 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32577 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32578 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32579 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32580 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32581 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32582 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32583 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32584 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32585 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32586 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32587 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32588 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32589 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32590 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32591 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32592 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32593 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32594 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32595 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32596 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32597 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32598 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32599 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32600 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32601 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32602 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32603 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32604 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32605 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32606 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32607 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32608 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32609 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32610 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32611 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32612 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32613 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32614 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32615 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32616 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32617 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32618 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32619 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32620 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32621 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32622 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32623 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32624 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32625 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32626 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32627 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32628 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32629 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32630 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32631 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32632 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32633 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32634 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32635 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32636 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32637 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32638 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32639 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32640 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32641 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32642 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32643 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32644 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32645 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32646 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32647 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32648 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32649 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32650 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32651 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32652 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32653 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32654 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32655 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32656 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
32657 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
32658 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
32659 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
32660 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
32661 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
32662 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
32663 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
32664 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
32665 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
32666 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
32667 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
32668 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
32669 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
32670 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
32671 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
32672 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
32673 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
32674 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
32675 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
32676 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
32677 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
32678 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32679 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32680 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32681 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32682 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32683 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32684 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32685 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32686 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32687 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32688 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32689 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32690 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32691 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32692 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32693 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32694 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32695 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32696 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32697 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32698 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32699 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32700 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32701 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32702 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32703 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32704 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32705 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32706 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32707 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32708 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32709 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32710 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
32711 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
32712 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
32713 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
32714 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32715 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32716 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32717 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32718 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32719 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32720 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32721 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32722 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32723 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32724 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32725 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32726 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32727 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32728 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32729 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32730 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32731 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32732 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32733 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32734 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32735 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32736 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32737 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32738 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32739 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32740 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32741 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32742 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32743 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32744 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32745 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32746 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32747 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32748 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32749 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32750 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32751 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32752 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32753 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32754 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32755 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32756 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32757 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32758 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32759 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32760 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32761 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32762 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32763 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32764 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32765 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32766 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32767 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32768 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32769 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32770 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32771 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32772 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32773 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32774 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32775 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32776 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32777 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32778 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32779 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32780 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32781 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32782 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32783 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32784 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32785 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32786 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32787 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32788 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
32789 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
32790 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
32791 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
32792 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
32793 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
32794 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32795 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32796 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32797 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32798 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32799 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32800 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32801 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32802 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32803 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32804 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32805 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32806 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32807 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32808 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32809 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32810 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32811 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32812 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32813 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32814 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32815 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32816 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32817 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32818 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32819 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32820 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32821 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32822 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32823 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32824 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32825 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32826 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32827 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32828 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32829 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32830 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32831 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32832 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32833 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32834 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32835 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32836 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32837 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32838 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32839 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32840 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32841 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32842 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32843 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32844 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32845 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32846 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32847 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32848 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32849 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32850 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32851 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32852 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32853 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
32854 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
32855 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32856 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32857 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32858 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32859 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32860 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32861 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32862 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32863 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32864 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32865 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32866 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
32867 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
32868 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32869 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32870 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32871 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32872 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32873 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32874 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32875 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32876 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
32877 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
32879 /* AVX512DQ. */
32880 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32881 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32882 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
32883 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
32884 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
32885 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
32886 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
32887 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
32888 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
32889 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
32890 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32891 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32892 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32893 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32894 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32895 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32896 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32897 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32898 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32899 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
32900 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32901 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
32902 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
32903 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
32904 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
32905 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
32906 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
32907 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
32908 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
32909 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
32910 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
32912 /* AVX512BW. */
32913 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
32914 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
32915 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32916 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32917 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32918 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32919 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
32920 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
32921 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32922 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32923 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
32924 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
32925 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
32926 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
32927 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
32928 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
32929 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32930 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32931 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32932 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32933 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32934 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32935 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32936 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32937 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32938 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32939 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32940 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32941 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32942 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32943 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32944 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32945 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32946 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32947 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32948 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32949 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32950 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32951 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32952 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32953 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32954 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32955 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32956 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32957 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32958 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32959 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32960 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32961 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32962 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32963 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32964 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32965 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32966 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32967 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32968 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32969 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
32970 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
32971 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32972 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32973 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32974 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32975 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32976 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32977 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32978 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32979 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32980 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32981 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32982 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32983 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
32984 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
32985 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
32986 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
32987 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32988 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32989 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32990 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32991 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32992 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32993 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32994 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32995 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32996 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32997 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32998 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32999 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
33000 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
33001 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
33002 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
33003 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
33005 /* AVX512IFMA */
33006 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33007 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33008 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33009 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33010 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33011 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33012 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33013 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33014 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33015 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33016 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33017 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33019 /* AVX512VBMI */
33020 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33021 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33022 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33023 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33024 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33025 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33026 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33027 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33028 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33029 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33030 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33031 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33032 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33033 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33034 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33037 /* Builtins with rounding support. */
33038 static const struct builtin_description bdesc_round_args[] =
33040 /* AVX512F */
33041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
33046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
33047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
33048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
33049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
33050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
33051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
33052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
33054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
33056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
33058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
33060 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
33061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
33062 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
33063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
33064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
33069 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
33070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
33071 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
33072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
33077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
33078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
33079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
33080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
33081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
33082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33121 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33123 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33125 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33127 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33129 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33131 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33133 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33135 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
33143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
33144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33159 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33161 /* AVX512ER */
33162 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33163 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33164 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33165 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33166 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33167 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33168 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33169 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33170 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33171 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33173 /* AVX512DQ. */
33174 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33175 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33176 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33177 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33178 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33179 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33180 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33181 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33182 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33183 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33184 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33185 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33186 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33187 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33188 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
33189 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
33192 /* Bultins for MPX. */
33193 static const struct builtin_description bdesc_mpx[] =
33195 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
33196 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33197 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33200 /* Const builtins for MPX. */
33201 static const struct builtin_description bdesc_mpx_const[] =
33203 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
33204 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
33205 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
33206 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
33207 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
33208 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
33209 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
33210 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
33213 /* FMA4 and XOP. */
33214 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
33215 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
33216 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
33217 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
33218 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
33219 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
33220 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
33221 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
33222 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
33223 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
33224 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
33225 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
33226 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
33227 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
33228 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
33229 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
33230 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
33231 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
33232 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
33233 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
33234 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
33235 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
33236 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
33237 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
33238 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
33239 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
33240 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
33241 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
33242 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
33243 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
33244 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
33245 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
33246 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
33247 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
33248 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
33249 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
33250 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
33251 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
33252 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
33253 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
33254 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
33255 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
33256 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
33257 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
33258 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
33259 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
33260 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
33261 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
33262 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
33263 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
33264 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
33265 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
33267 static const struct builtin_description bdesc_multi_arg[] =
33269 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
33270 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
33271 UNKNOWN, (int)MULTI_ARG_3_SF },
33272 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
33273 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
33274 UNKNOWN, (int)MULTI_ARG_3_DF },
33276 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
33277 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
33278 UNKNOWN, (int)MULTI_ARG_3_SF },
33279 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
33280 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
33281 UNKNOWN, (int)MULTI_ARG_3_DF },
33283 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
33284 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
33285 UNKNOWN, (int)MULTI_ARG_3_SF },
33286 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
33287 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
33288 UNKNOWN, (int)MULTI_ARG_3_DF },
33289 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
33290 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
33291 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33292 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
33293 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
33294 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33296 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
33297 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
33298 UNKNOWN, (int)MULTI_ARG_3_SF },
33299 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
33300 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
33301 UNKNOWN, (int)MULTI_ARG_3_DF },
33302 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
33303 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
33304 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33305 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
33306 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
33307 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33309 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
33310 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
33311 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
33312 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
33313 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
33314 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
33315 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
33317 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33318 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33319 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
33320 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
33321 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
33322 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
33323 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
33325 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
33327 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33328 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33329 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33330 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33331 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33332 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33333 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33334 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33335 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33336 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33337 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33338 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33340 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33341 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
33342 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
33343 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
33344 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
33345 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
33346 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
33347 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
33348 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33349 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
33350 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
33351 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
33352 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33353 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
33354 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
33355 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
33357 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
33358 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
33359 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
33360 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
33361 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
33362 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
33364 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33365 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33366 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33367 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33368 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33369 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33370 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33371 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33372 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33373 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33374 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33375 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33376 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33377 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33378 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33380 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
33381 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33382 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33383 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
33384 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
33385 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
33386 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
33388 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
33389 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33390 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33391 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
33392 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
33393 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
33394 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
33396 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
33397 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33398 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33399 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
33400 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
33401 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
33402 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
33404 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33405 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33406 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33407 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
33408 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
33409 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
33410 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
33412 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
33413 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33414 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33415 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
33416 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
33417 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
33418 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
33420 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
33421 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33422 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33423 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
33424 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
33425 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
33426 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
33428 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
33429 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33430 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33431 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
33432 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
33433 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
33434 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
33436 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33437 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33438 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33439 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
33440 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
33441 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
33442 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
33444 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33445 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33446 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33447 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33448 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33449 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33450 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33451 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33453 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33454 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33455 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33456 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33457 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33458 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33459 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33460 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33462 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
33463 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
33464 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
33465 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
33469 /* TM vector builtins. */
33471 /* Reuse the existing x86-specific `struct builtin_description' cause
33472 we're lazy. Add casts to make them fit. */
33473 static const struct builtin_description bdesc_tm[] =
33475 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33476 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33477 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33478 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33479 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33480 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33481 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33483 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33484 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33485 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33486 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33487 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33488 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33489 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33491 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33492 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33493 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33494 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33495 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33496 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33497 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33499 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
33500 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
33501 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
33504 /* TM callbacks. */
33506 /* Return the builtin decl needed to load a vector of TYPE. */
33508 static tree
33509 ix86_builtin_tm_load (tree type)
33511 if (TREE_CODE (type) == VECTOR_TYPE)
33513 switch (tree_to_uhwi (TYPE_SIZE (type)))
33515 case 64:
33516 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
33517 case 128:
33518 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
33519 case 256:
33520 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
33523 return NULL_TREE;
33526 /* Return the builtin decl needed to store a vector of TYPE. */
33528 static tree
33529 ix86_builtin_tm_store (tree type)
33531 if (TREE_CODE (type) == VECTOR_TYPE)
33533 switch (tree_to_uhwi (TYPE_SIZE (type)))
33535 case 64:
33536 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
33537 case 128:
33538 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
33539 case 256:
33540 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
33543 return NULL_TREE;
33546 /* Initialize the transactional memory vector load/store builtins. */
33548 static void
33549 ix86_init_tm_builtins (void)
33551 enum ix86_builtin_func_type ftype;
33552 const struct builtin_description *d;
33553 size_t i;
33554 tree decl;
33555 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
33556 tree attrs_log, attrs_type_log;
33558 if (!flag_tm)
33559 return;
33561 /* If there are no builtins defined, we must be compiling in a
33562 language without trans-mem support. */
33563 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
33564 return;
33566 /* Use whatever attributes a normal TM load has. */
33567 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
33568 attrs_load = DECL_ATTRIBUTES (decl);
33569 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33570 /* Use whatever attributes a normal TM store has. */
33571 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
33572 attrs_store = DECL_ATTRIBUTES (decl);
33573 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33574 /* Use whatever attributes a normal TM log has. */
33575 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
33576 attrs_log = DECL_ATTRIBUTES (decl);
33577 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33579 for (i = 0, d = bdesc_tm;
33580 i < ARRAY_SIZE (bdesc_tm);
33581 i++, d++)
33583 if ((d->mask & ix86_isa_flags) != 0
33584 || (lang_hooks.builtin_function
33585 == lang_hooks.builtin_function_ext_scope))
33587 tree type, attrs, attrs_type;
33588 enum built_in_function code = (enum built_in_function) d->code;
33590 ftype = (enum ix86_builtin_func_type) d->flag;
33591 type = ix86_get_builtin_func_type (ftype);
33593 if (BUILTIN_TM_LOAD_P (code))
33595 attrs = attrs_load;
33596 attrs_type = attrs_type_load;
33598 else if (BUILTIN_TM_STORE_P (code))
33600 attrs = attrs_store;
33601 attrs_type = attrs_type_store;
33603 else
33605 attrs = attrs_log;
33606 attrs_type = attrs_type_log;
33608 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
33609 /* The builtin without the prefix for
33610 calling it directly. */
33611 d->name + strlen ("__builtin_"),
33612 attrs);
33613 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
33614 set the TYPE_ATTRIBUTES. */
33615 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
33617 set_builtin_decl (code, decl, false);
33622 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
33623 in the current target ISA to allow the user to compile particular modules
33624 with different target specific options that differ from the command line
33625 options. */
33626 static void
33627 ix86_init_mmx_sse_builtins (void)
33629 const struct builtin_description * d;
33630 enum ix86_builtin_func_type ftype;
33631 size_t i;
33633 /* Add all special builtins with variable number of operands. */
33634 for (i = 0, d = bdesc_special_args;
33635 i < ARRAY_SIZE (bdesc_special_args);
33636 i++, d++)
33638 if (d->name == 0)
33639 continue;
33641 ftype = (enum ix86_builtin_func_type) d->flag;
33642 def_builtin (d->mask, d->name, ftype, d->code);
33645 /* Add all builtins with variable number of operands. */
33646 for (i = 0, d = bdesc_args;
33647 i < ARRAY_SIZE (bdesc_args);
33648 i++, d++)
33650 if (d->name == 0)
33651 continue;
33653 ftype = (enum ix86_builtin_func_type) d->flag;
33654 def_builtin_const (d->mask, d->name, ftype, d->code);
33657 /* Add all builtins with rounding. */
33658 for (i = 0, d = bdesc_round_args;
33659 i < ARRAY_SIZE (bdesc_round_args);
33660 i++, d++)
33662 if (d->name == 0)
33663 continue;
33665 ftype = (enum ix86_builtin_func_type) d->flag;
33666 def_builtin_const (d->mask, d->name, ftype, d->code);
33669 /* pcmpestr[im] insns. */
33670 for (i = 0, d = bdesc_pcmpestr;
33671 i < ARRAY_SIZE (bdesc_pcmpestr);
33672 i++, d++)
33674 if (d->code == IX86_BUILTIN_PCMPESTRM128)
33675 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
33676 else
33677 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
33678 def_builtin_const (d->mask, d->name, ftype, d->code);
33681 /* pcmpistr[im] insns. */
33682 for (i = 0, d = bdesc_pcmpistr;
33683 i < ARRAY_SIZE (bdesc_pcmpistr);
33684 i++, d++)
33686 if (d->code == IX86_BUILTIN_PCMPISTRM128)
33687 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
33688 else
33689 ftype = INT_FTYPE_V16QI_V16QI_INT;
33690 def_builtin_const (d->mask, d->name, ftype, d->code);
33693 /* comi/ucomi insns. */
33694 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
33696 if (d->mask == OPTION_MASK_ISA_SSE2)
33697 ftype = INT_FTYPE_V2DF_V2DF;
33698 else
33699 ftype = INT_FTYPE_V4SF_V4SF;
33700 def_builtin_const (d->mask, d->name, ftype, d->code);
33703 /* SSE */
33704 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
33705 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
33706 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
33707 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
33709 /* SSE or 3DNow!A */
33710 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33711 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
33712 IX86_BUILTIN_MASKMOVQ);
33714 /* SSE2 */
33715 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
33716 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
33718 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
33719 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
33720 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
33721 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
33723 /* SSE3. */
33724 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
33725 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
33726 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
33727 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
33729 /* AES */
33730 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
33731 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
33732 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33733 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33734 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33735 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33736 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33737 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33738 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33739 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33740 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33741 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33743 /* PCLMUL */
33744 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33745 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33747 /* RDRND */
33748 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33749 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33750 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33751 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33752 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33753 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33754 IX86_BUILTIN_RDRAND64_STEP);
33756 /* AVX2 */
33757 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33758 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33759 IX86_BUILTIN_GATHERSIV2DF);
33761 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33762 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33763 IX86_BUILTIN_GATHERSIV4DF);
33765 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33766 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33767 IX86_BUILTIN_GATHERDIV2DF);
33769 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33770 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33771 IX86_BUILTIN_GATHERDIV4DF);
33773 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33774 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33775 IX86_BUILTIN_GATHERSIV4SF);
33777 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33778 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33779 IX86_BUILTIN_GATHERSIV8SF);
33781 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33782 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33783 IX86_BUILTIN_GATHERDIV4SF);
33785 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33786 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33787 IX86_BUILTIN_GATHERDIV8SF);
33789 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33790 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33791 IX86_BUILTIN_GATHERSIV2DI);
33793 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33794 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33795 IX86_BUILTIN_GATHERSIV4DI);
33797 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33798 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33799 IX86_BUILTIN_GATHERDIV2DI);
33801 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33802 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33803 IX86_BUILTIN_GATHERDIV4DI);
33805 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
33806 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
33807 IX86_BUILTIN_GATHERSIV4SI);
33809 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
33810 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
33811 IX86_BUILTIN_GATHERSIV8SI);
33813 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
33814 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
33815 IX86_BUILTIN_GATHERDIV4SI);
33817 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
33818 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
33819 IX86_BUILTIN_GATHERDIV8SI);
33821 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
33822 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
33823 IX86_BUILTIN_GATHERALTSIV4DF);
33825 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
33826 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
33827 IX86_BUILTIN_GATHERALTDIV8SF);
33829 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
33830 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
33831 IX86_BUILTIN_GATHERALTSIV4DI);
33833 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
33834 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
33835 IX86_BUILTIN_GATHERALTDIV8SI);
33837 /* AVX512F */
33838 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
33839 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
33840 IX86_BUILTIN_GATHER3SIV16SF);
33842 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
33843 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
33844 IX86_BUILTIN_GATHER3SIV8DF);
33846 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
33847 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
33848 IX86_BUILTIN_GATHER3DIV16SF);
33850 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
33851 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
33852 IX86_BUILTIN_GATHER3DIV8DF);
33854 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
33855 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
33856 IX86_BUILTIN_GATHER3SIV16SI);
33858 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
33859 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
33860 IX86_BUILTIN_GATHER3SIV8DI);
33862 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
33863 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
33864 IX86_BUILTIN_GATHER3DIV16SI);
33866 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
33867 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
33868 IX86_BUILTIN_GATHER3DIV8DI);
33870 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
33871 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
33872 IX86_BUILTIN_GATHER3ALTSIV8DF);
33874 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
33875 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
33876 IX86_BUILTIN_GATHER3ALTDIV16SF);
33878 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
33879 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
33880 IX86_BUILTIN_GATHER3ALTSIV8DI);
33882 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
33883 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
33884 IX86_BUILTIN_GATHER3ALTDIV16SI);
33886 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
33887 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
33888 IX86_BUILTIN_SCATTERSIV16SF);
33890 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
33891 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
33892 IX86_BUILTIN_SCATTERSIV8DF);
33894 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
33895 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
33896 IX86_BUILTIN_SCATTERDIV16SF);
33898 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
33899 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
33900 IX86_BUILTIN_SCATTERDIV8DF);
33902 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
33903 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
33904 IX86_BUILTIN_SCATTERSIV16SI);
33906 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
33907 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
33908 IX86_BUILTIN_SCATTERSIV8DI);
33910 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
33911 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
33912 IX86_BUILTIN_SCATTERDIV16SI);
33914 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
33915 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
33916 IX86_BUILTIN_SCATTERDIV8DI);
33918 /* AVX512VL */
33919 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
33920 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
33921 IX86_BUILTIN_GATHER3SIV2DF);
33923 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
33924 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
33925 IX86_BUILTIN_GATHER3SIV4DF);
33927 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
33928 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
33929 IX86_BUILTIN_GATHER3DIV2DF);
33931 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
33932 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
33933 IX86_BUILTIN_GATHER3DIV4DF);
33935 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
33936 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
33937 IX86_BUILTIN_GATHER3SIV4SF);
33939 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
33940 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
33941 IX86_BUILTIN_GATHER3SIV8SF);
33943 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
33944 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
33945 IX86_BUILTIN_GATHER3DIV4SF);
33947 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
33948 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
33949 IX86_BUILTIN_GATHER3DIV8SF);
33951 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
33952 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
33953 IX86_BUILTIN_GATHER3SIV2DI);
33955 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
33956 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
33957 IX86_BUILTIN_GATHER3SIV4DI);
33959 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
33960 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
33961 IX86_BUILTIN_GATHER3DIV2DI);
33963 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
33964 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
33965 IX86_BUILTIN_GATHER3DIV4DI);
33967 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
33968 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
33969 IX86_BUILTIN_GATHER3SIV4SI);
33971 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
33972 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
33973 IX86_BUILTIN_GATHER3SIV8SI);
33975 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
33976 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
33977 IX86_BUILTIN_GATHER3DIV4SI);
33979 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
33980 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
33981 IX86_BUILTIN_GATHER3DIV8SI);
33983 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
33984 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
33985 IX86_BUILTIN_GATHER3ALTSIV4DF);
33987 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
33988 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
33989 IX86_BUILTIN_GATHER3ALTDIV8SF);
33991 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
33992 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
33993 IX86_BUILTIN_GATHER3ALTSIV4DI);
33995 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
33996 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
33997 IX86_BUILTIN_GATHER3ALTDIV8SI);
33999 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
34000 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
34001 IX86_BUILTIN_SCATTERSIV8SF);
34003 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
34004 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
34005 IX86_BUILTIN_SCATTERSIV4SF);
34007 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
34008 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
34009 IX86_BUILTIN_SCATTERSIV4DF);
34011 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
34012 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
34013 IX86_BUILTIN_SCATTERSIV2DF);
34015 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
34016 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
34017 IX86_BUILTIN_SCATTERDIV8SF);
34019 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
34020 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
34021 IX86_BUILTIN_SCATTERDIV4SF);
34023 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
34024 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
34025 IX86_BUILTIN_SCATTERDIV4DF);
34027 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
34028 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
34029 IX86_BUILTIN_SCATTERDIV2DF);
34031 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
34032 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
34033 IX86_BUILTIN_SCATTERSIV8SI);
34035 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
34036 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
34037 IX86_BUILTIN_SCATTERSIV4SI);
34039 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
34040 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
34041 IX86_BUILTIN_SCATTERSIV4DI);
34043 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
34044 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
34045 IX86_BUILTIN_SCATTERSIV2DI);
34047 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
34048 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
34049 IX86_BUILTIN_SCATTERDIV8SI);
34051 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
34052 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
34053 IX86_BUILTIN_SCATTERDIV4SI);
34055 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
34056 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
34057 IX86_BUILTIN_SCATTERDIV4DI);
34059 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
34060 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
34061 IX86_BUILTIN_SCATTERDIV2DI);
34063 /* AVX512PF */
34064 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
34065 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
34066 IX86_BUILTIN_GATHERPFDPD);
34067 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
34068 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
34069 IX86_BUILTIN_GATHERPFDPS);
34070 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
34071 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34072 IX86_BUILTIN_GATHERPFQPD);
34073 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
34074 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34075 IX86_BUILTIN_GATHERPFQPS);
34076 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
34077 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
34078 IX86_BUILTIN_SCATTERPFDPD);
34079 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
34080 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
34081 IX86_BUILTIN_SCATTERPFDPS);
34082 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
34083 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34084 IX86_BUILTIN_SCATTERPFQPD);
34085 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
34086 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34087 IX86_BUILTIN_SCATTERPFQPS);
34089 /* SHA */
34090 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
34091 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
34092 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
34093 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
34094 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
34095 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
34096 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
34097 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
34098 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
34099 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
34100 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
34101 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
34102 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
34103 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
34105 /* RTM. */
34106 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
34107 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
34109 /* MMX access to the vec_init patterns. */
34110 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
34111 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
34113 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
34114 V4HI_FTYPE_HI_HI_HI_HI,
34115 IX86_BUILTIN_VEC_INIT_V4HI);
34117 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
34118 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
34119 IX86_BUILTIN_VEC_INIT_V8QI);
34121 /* Access to the vec_extract patterns. */
34122 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
34123 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
34124 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
34125 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
34126 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
34127 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
34128 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
34129 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
34130 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
34131 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
34133 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34134 "__builtin_ia32_vec_ext_v4hi",
34135 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
34137 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
34138 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
34140 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
34141 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
34143 /* Access to the vec_set patterns. */
34144 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
34145 "__builtin_ia32_vec_set_v2di",
34146 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
34148 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
34149 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
34151 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
34152 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
34154 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
34155 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
34157 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34158 "__builtin_ia32_vec_set_v4hi",
34159 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
34161 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
34162 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
34164 /* RDSEED */
34165 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
34166 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
34167 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
34168 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
34169 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
34170 "__builtin_ia32_rdseed_di_step",
34171 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
34173 /* ADCX */
34174 def_builtin (0, "__builtin_ia32_addcarryx_u32",
34175 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
34176 def_builtin (OPTION_MASK_ISA_64BIT,
34177 "__builtin_ia32_addcarryx_u64",
34178 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34179 IX86_BUILTIN_ADDCARRYX64);
34181 /* SBB */
34182 def_builtin (0, "__builtin_ia32_sbb_u32",
34183 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
34184 def_builtin (OPTION_MASK_ISA_64BIT,
34185 "__builtin_ia32_sbb_u64",
34186 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34187 IX86_BUILTIN_SBB64);
34189 /* Read/write FLAGS. */
34190 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
34191 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34192 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
34193 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34194 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
34195 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
34196 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
34197 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
34199 /* CLFLUSHOPT. */
34200 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
34201 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
34203 /* CLWB. */
34204 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
34205 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
34207 /* MONITORX and MWAITX. */
34208 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx",
34209 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX);
34210 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx",
34211 VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
34213 /* Add FMA4 multi-arg argument instructions */
34214 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
34216 if (d->name == 0)
34217 continue;
34219 ftype = (enum ix86_builtin_func_type) d->flag;
34220 def_builtin_const (d->mask, d->name, ftype, d->code);
34224 static void
34225 ix86_init_mpx_builtins ()
34227 const struct builtin_description * d;
34228 enum ix86_builtin_func_type ftype;
34229 tree decl;
34230 size_t i;
34232 for (i = 0, d = bdesc_mpx;
34233 i < ARRAY_SIZE (bdesc_mpx);
34234 i++, d++)
34236 if (d->name == 0)
34237 continue;
34239 ftype = (enum ix86_builtin_func_type) d->flag;
34240 decl = def_builtin (d->mask, d->name, ftype, d->code);
34242 /* With no leaf and nothrow flags for MPX builtins
34243 abnormal edges may follow its call when setjmp
34244 presents in the function. Since we may have a lot
34245 of MPX builtins calls it causes lots of useless
34246 edges and enormous PHI nodes. To avoid this we mark
34247 MPX builtins as leaf and nothrow. */
34248 if (decl)
34250 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34251 NULL_TREE);
34252 TREE_NOTHROW (decl) = 1;
34254 else
34256 ix86_builtins_isa[(int)d->code].leaf_p = true;
34257 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34261 for (i = 0, d = bdesc_mpx_const;
34262 i < ARRAY_SIZE (bdesc_mpx_const);
34263 i++, d++)
34265 if (d->name == 0)
34266 continue;
34268 ftype = (enum ix86_builtin_func_type) d->flag;
34269 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
34271 if (decl)
34273 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34274 NULL_TREE);
34275 TREE_NOTHROW (decl) = 1;
34277 else
34279 ix86_builtins_isa[(int)d->code].leaf_p = true;
34280 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34285 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
34286 to return a pointer to VERSION_DECL if the outcome of the expression
34287 formed by PREDICATE_CHAIN is true. This function will be called during
34288 version dispatch to decide which function version to execute. It returns
34289 the basic block at the end, to which more conditions can be added. */
34291 static basic_block
34292 add_condition_to_bb (tree function_decl, tree version_decl,
34293 tree predicate_chain, basic_block new_bb)
34295 gimple return_stmt;
34296 tree convert_expr, result_var;
34297 gimple convert_stmt;
34298 gimple call_cond_stmt;
34299 gimple if_else_stmt;
34301 basic_block bb1, bb2, bb3;
34302 edge e12, e23;
34304 tree cond_var, and_expr_var = NULL_TREE;
34305 gimple_seq gseq;
34307 tree predicate_decl, predicate_arg;
34309 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
34311 gcc_assert (new_bb != NULL);
34312 gseq = bb_seq (new_bb);
34315 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
34316 build_fold_addr_expr (version_decl));
34317 result_var = create_tmp_var (ptr_type_node);
34318 convert_stmt = gimple_build_assign (result_var, convert_expr);
34319 return_stmt = gimple_build_return (result_var);
34321 if (predicate_chain == NULL_TREE)
34323 gimple_seq_add_stmt (&gseq, convert_stmt);
34324 gimple_seq_add_stmt (&gseq, return_stmt);
34325 set_bb_seq (new_bb, gseq);
34326 gimple_set_bb (convert_stmt, new_bb);
34327 gimple_set_bb (return_stmt, new_bb);
34328 pop_cfun ();
34329 return new_bb;
34332 while (predicate_chain != NULL)
34334 cond_var = create_tmp_var (integer_type_node);
34335 predicate_decl = TREE_PURPOSE (predicate_chain);
34336 predicate_arg = TREE_VALUE (predicate_chain);
34337 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
34338 gimple_call_set_lhs (call_cond_stmt, cond_var);
34340 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
34341 gimple_set_bb (call_cond_stmt, new_bb);
34342 gimple_seq_add_stmt (&gseq, call_cond_stmt);
34344 predicate_chain = TREE_CHAIN (predicate_chain);
34346 if (and_expr_var == NULL)
34347 and_expr_var = cond_var;
34348 else
34350 gimple assign_stmt;
34351 /* Use MIN_EXPR to check if any integer is zero?.
34352 and_expr_var = min_expr <cond_var, and_expr_var> */
34353 assign_stmt = gimple_build_assign (and_expr_var,
34354 build2 (MIN_EXPR, integer_type_node,
34355 cond_var, and_expr_var));
34357 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
34358 gimple_set_bb (assign_stmt, new_bb);
34359 gimple_seq_add_stmt (&gseq, assign_stmt);
34363 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
34364 integer_zero_node,
34365 NULL_TREE, NULL_TREE);
34366 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
34367 gimple_set_bb (if_else_stmt, new_bb);
34368 gimple_seq_add_stmt (&gseq, if_else_stmt);
34370 gimple_seq_add_stmt (&gseq, convert_stmt);
34371 gimple_seq_add_stmt (&gseq, return_stmt);
34372 set_bb_seq (new_bb, gseq);
34374 bb1 = new_bb;
34375 e12 = split_block (bb1, if_else_stmt);
34376 bb2 = e12->dest;
34377 e12->flags &= ~EDGE_FALLTHRU;
34378 e12->flags |= EDGE_TRUE_VALUE;
34380 e23 = split_block (bb2, return_stmt);
34382 gimple_set_bb (convert_stmt, bb2);
34383 gimple_set_bb (return_stmt, bb2);
34385 bb3 = e23->dest;
34386 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
34388 remove_edge (e23);
34389 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
34391 pop_cfun ();
34393 return bb3;
34396 /* This parses the attribute arguments to target in DECL and determines
34397 the right builtin to use to match the platform specification.
34398 It returns the priority value for this version decl. If PREDICATE_LIST
34399 is not NULL, it stores the list of cpu features that need to be checked
34400 before dispatching this function. */
34402 static unsigned int
34403 get_builtin_code_for_version (tree decl, tree *predicate_list)
34405 tree attrs;
34406 struct cl_target_option cur_target;
34407 tree target_node;
34408 struct cl_target_option *new_target;
34409 const char *arg_str = NULL;
34410 const char *attrs_str = NULL;
34411 char *tok_str = NULL;
34412 char *token;
34414 /* Priority of i386 features, greater value is higher priority. This is
34415 used to decide the order in which function dispatch must happen. For
34416 instance, a version specialized for SSE4.2 should be checked for dispatch
34417 before a version for SSE3, as SSE4.2 implies SSE3. */
34418 enum feature_priority
34420 P_ZERO = 0,
34421 P_MMX,
34422 P_SSE,
34423 P_SSE2,
34424 P_SSE3,
34425 P_SSSE3,
34426 P_PROC_SSSE3,
34427 P_SSE4_A,
34428 P_PROC_SSE4_A,
34429 P_SSE4_1,
34430 P_SSE4_2,
34431 P_PROC_SSE4_2,
34432 P_POPCNT,
34433 P_AVX,
34434 P_PROC_AVX,
34435 P_BMI,
34436 P_PROC_BMI,
34437 P_FMA4,
34438 P_XOP,
34439 P_PROC_XOP,
34440 P_FMA,
34441 P_PROC_FMA,
34442 P_BMI2,
34443 P_AVX2,
34444 P_PROC_AVX2,
34445 P_AVX512F,
34446 P_PROC_AVX512F
34449 enum feature_priority priority = P_ZERO;
34451 /* These are the target attribute strings for which a dispatcher is
34452 available, from fold_builtin_cpu. */
34454 static struct _feature_list
34456 const char *const name;
34457 const enum feature_priority priority;
34459 const feature_list[] =
34461 {"mmx", P_MMX},
34462 {"sse", P_SSE},
34463 {"sse2", P_SSE2},
34464 {"sse3", P_SSE3},
34465 {"sse4a", P_SSE4_A},
34466 {"ssse3", P_SSSE3},
34467 {"sse4.1", P_SSE4_1},
34468 {"sse4.2", P_SSE4_2},
34469 {"popcnt", P_POPCNT},
34470 {"avx", P_AVX},
34471 {"bmi", P_BMI},
34472 {"fma4", P_FMA4},
34473 {"xop", P_XOP},
34474 {"fma", P_FMA},
34475 {"bmi2", P_BMI2},
34476 {"avx2", P_AVX2},
34477 {"avx512f", P_AVX512F}
34481 static unsigned int NUM_FEATURES
34482 = sizeof (feature_list) / sizeof (struct _feature_list);
34484 unsigned int i;
34486 tree predicate_chain = NULL_TREE;
34487 tree predicate_decl, predicate_arg;
34489 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34490 gcc_assert (attrs != NULL);
34492 attrs = TREE_VALUE (TREE_VALUE (attrs));
34494 gcc_assert (TREE_CODE (attrs) == STRING_CST);
34495 attrs_str = TREE_STRING_POINTER (attrs);
34497 /* Return priority zero for default function. */
34498 if (strcmp (attrs_str, "default") == 0)
34499 return 0;
34501 /* Handle arch= if specified. For priority, set it to be 1 more than
34502 the best instruction set the processor can handle. For instance, if
34503 there is a version for atom and a version for ssse3 (the highest ISA
34504 priority for atom), the atom version must be checked for dispatch
34505 before the ssse3 version. */
34506 if (strstr (attrs_str, "arch=") != NULL)
34508 cl_target_option_save (&cur_target, &global_options);
34509 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
34510 &global_options_set);
34512 gcc_assert (target_node);
34513 new_target = TREE_TARGET_OPTION (target_node);
34514 gcc_assert (new_target);
34516 if (new_target->arch_specified && new_target->arch > 0)
34518 switch (new_target->arch)
34520 case PROCESSOR_CORE2:
34521 arg_str = "core2";
34522 priority = P_PROC_SSSE3;
34523 break;
34524 case PROCESSOR_NEHALEM:
34525 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
34526 arg_str = "westmere";
34527 else
34528 /* We translate "arch=corei7" and "arch=nehalem" to
34529 "corei7" so that it will be mapped to M_INTEL_COREI7
34530 as cpu type to cover all M_INTEL_COREI7_XXXs. */
34531 arg_str = "corei7";
34532 priority = P_PROC_SSE4_2;
34533 break;
34534 case PROCESSOR_SANDYBRIDGE:
34535 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
34536 arg_str = "ivybridge";
34537 else
34538 arg_str = "sandybridge";
34539 priority = P_PROC_AVX;
34540 break;
34541 case PROCESSOR_HASWELL:
34542 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
34543 arg_str = "broadwell";
34544 else
34545 arg_str = "haswell";
34546 priority = P_PROC_AVX2;
34547 break;
34548 case PROCESSOR_BONNELL:
34549 arg_str = "bonnell";
34550 priority = P_PROC_SSSE3;
34551 break;
34552 case PROCESSOR_KNL:
34553 arg_str = "knl";
34554 priority = P_PROC_AVX512F;
34555 break;
34556 case PROCESSOR_SILVERMONT:
34557 arg_str = "silvermont";
34558 priority = P_PROC_SSE4_2;
34559 break;
34560 case PROCESSOR_AMDFAM10:
34561 arg_str = "amdfam10h";
34562 priority = P_PROC_SSE4_A;
34563 break;
34564 case PROCESSOR_BTVER1:
34565 arg_str = "btver1";
34566 priority = P_PROC_SSE4_A;
34567 break;
34568 case PROCESSOR_BTVER2:
34569 arg_str = "btver2";
34570 priority = P_PROC_BMI;
34571 break;
34572 case PROCESSOR_BDVER1:
34573 arg_str = "bdver1";
34574 priority = P_PROC_XOP;
34575 break;
34576 case PROCESSOR_BDVER2:
34577 arg_str = "bdver2";
34578 priority = P_PROC_FMA;
34579 break;
34580 case PROCESSOR_BDVER3:
34581 arg_str = "bdver3";
34582 priority = P_PROC_FMA;
34583 break;
34584 case PROCESSOR_BDVER4:
34585 arg_str = "bdver4";
34586 priority = P_PROC_AVX2;
34587 break;
34591 cl_target_option_restore (&global_options, &cur_target);
34593 if (predicate_list && arg_str == NULL)
34595 error_at (DECL_SOURCE_LOCATION (decl),
34596 "No dispatcher found for the versioning attributes");
34597 return 0;
34600 if (predicate_list)
34602 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
34603 /* For a C string literal the length includes the trailing NULL. */
34604 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
34605 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34606 predicate_chain);
34610 /* Process feature name. */
34611 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
34612 strcpy (tok_str, attrs_str);
34613 token = strtok (tok_str, ",");
34614 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
34616 while (token != NULL)
34618 /* Do not process "arch=" */
34619 if (strncmp (token, "arch=", 5) == 0)
34621 token = strtok (NULL, ",");
34622 continue;
34624 for (i = 0; i < NUM_FEATURES; ++i)
34626 if (strcmp (token, feature_list[i].name) == 0)
34628 if (predicate_list)
34630 predicate_arg = build_string_literal (
34631 strlen (feature_list[i].name) + 1,
34632 feature_list[i].name);
34633 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34634 predicate_chain);
34636 /* Find the maximum priority feature. */
34637 if (feature_list[i].priority > priority)
34638 priority = feature_list[i].priority;
34640 break;
34643 if (predicate_list && i == NUM_FEATURES)
34645 error_at (DECL_SOURCE_LOCATION (decl),
34646 "No dispatcher found for %s", token);
34647 return 0;
34649 token = strtok (NULL, ",");
34651 free (tok_str);
34653 if (predicate_list && predicate_chain == NULL_TREE)
34655 error_at (DECL_SOURCE_LOCATION (decl),
34656 "No dispatcher found for the versioning attributes : %s",
34657 attrs_str);
34658 return 0;
34660 else if (predicate_list)
34662 predicate_chain = nreverse (predicate_chain);
34663 *predicate_list = predicate_chain;
34666 return priority;
34669 /* This compares the priority of target features in function DECL1
34670 and DECL2. It returns positive value if DECL1 is higher priority,
34671 negative value if DECL2 is higher priority and 0 if they are the
34672 same. */
34674 static int
34675 ix86_compare_version_priority (tree decl1, tree decl2)
34677 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
34678 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
34680 return (int)priority1 - (int)priority2;
34683 /* V1 and V2 point to function versions with different priorities
34684 based on the target ISA. This function compares their priorities. */
34686 static int
34687 feature_compare (const void *v1, const void *v2)
34689 typedef struct _function_version_info
34691 tree version_decl;
34692 tree predicate_chain;
34693 unsigned int dispatch_priority;
34694 } function_version_info;
34696 const function_version_info c1 = *(const function_version_info *)v1;
34697 const function_version_info c2 = *(const function_version_info *)v2;
34698 return (c2.dispatch_priority - c1.dispatch_priority);
34701 /* This function generates the dispatch function for
34702 multi-versioned functions. DISPATCH_DECL is the function which will
34703 contain the dispatch logic. FNDECLS are the function choices for
34704 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
34705 in DISPATCH_DECL in which the dispatch code is generated. */
34707 static int
34708 dispatch_function_versions (tree dispatch_decl,
34709 void *fndecls_p,
34710 basic_block *empty_bb)
34712 tree default_decl;
34713 gimple ifunc_cpu_init_stmt;
34714 gimple_seq gseq;
34715 int ix;
34716 tree ele;
34717 vec<tree> *fndecls;
34718 unsigned int num_versions = 0;
34719 unsigned int actual_versions = 0;
34720 unsigned int i;
34722 struct _function_version_info
34724 tree version_decl;
34725 tree predicate_chain;
34726 unsigned int dispatch_priority;
34727 }*function_version_info;
34729 gcc_assert (dispatch_decl != NULL
34730 && fndecls_p != NULL
34731 && empty_bb != NULL);
34733 /*fndecls_p is actually a vector. */
34734 fndecls = static_cast<vec<tree> *> (fndecls_p);
34736 /* At least one more version other than the default. */
34737 num_versions = fndecls->length ();
34738 gcc_assert (num_versions >= 2);
34740 function_version_info = (struct _function_version_info *)
34741 XNEWVEC (struct _function_version_info, (num_versions - 1));
34743 /* The first version in the vector is the default decl. */
34744 default_decl = (*fndecls)[0];
34746 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
34748 gseq = bb_seq (*empty_bb);
34749 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
34750 constructors, so explicity call __builtin_cpu_init here. */
34751 ifunc_cpu_init_stmt = gimple_build_call_vec (
34752 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
34753 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
34754 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
34755 set_bb_seq (*empty_bb, gseq);
34757 pop_cfun ();
34760 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
34762 tree version_decl = ele;
34763 tree predicate_chain = NULL_TREE;
34764 unsigned int priority;
34765 /* Get attribute string, parse it and find the right predicate decl.
34766 The predicate function could be a lengthy combination of many
34767 features, like arch-type and various isa-variants. */
34768 priority = get_builtin_code_for_version (version_decl,
34769 &predicate_chain);
34771 if (predicate_chain == NULL_TREE)
34772 continue;
34774 function_version_info [actual_versions].version_decl = version_decl;
34775 function_version_info [actual_versions].predicate_chain
34776 = predicate_chain;
34777 function_version_info [actual_versions].dispatch_priority = priority;
34778 actual_versions++;
34781 /* Sort the versions according to descending order of dispatch priority. The
34782 priority is based on the ISA. This is not a perfect solution. There
34783 could still be ambiguity. If more than one function version is suitable
34784 to execute, which one should be dispatched? In future, allow the user
34785 to specify a dispatch priority next to the version. */
34786 qsort (function_version_info, actual_versions,
34787 sizeof (struct _function_version_info), feature_compare);
34789 for (i = 0; i < actual_versions; ++i)
34790 *empty_bb = add_condition_to_bb (dispatch_decl,
34791 function_version_info[i].version_decl,
34792 function_version_info[i].predicate_chain,
34793 *empty_bb);
34795 /* dispatch default version at the end. */
34796 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
34797 NULL, *empty_bb);
34799 free (function_version_info);
34800 return 0;
34803 /* Comparator function to be used in qsort routine to sort attribute
34804 specification strings to "target". */
34806 static int
34807 attr_strcmp (const void *v1, const void *v2)
34809 const char *c1 = *(char *const*)v1;
34810 const char *c2 = *(char *const*)v2;
34811 return strcmp (c1, c2);
34814 /* ARGLIST is the argument to target attribute. This function tokenizes
34815 the comma separated arguments, sorts them and returns a string which
34816 is a unique identifier for the comma separated arguments. It also
34817 replaces non-identifier characters "=,-" with "_". */
34819 static char *
34820 sorted_attr_string (tree arglist)
34822 tree arg;
34823 size_t str_len_sum = 0;
34824 char **args = NULL;
34825 char *attr_str, *ret_str;
34826 char *attr = NULL;
34827 unsigned int argnum = 1;
34828 unsigned int i;
34830 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34832 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34833 size_t len = strlen (str);
34834 str_len_sum += len + 1;
34835 if (arg != arglist)
34836 argnum++;
34837 for (i = 0; i < strlen (str); i++)
34838 if (str[i] == ',')
34839 argnum++;
34842 attr_str = XNEWVEC (char, str_len_sum);
34843 str_len_sum = 0;
34844 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34846 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34847 size_t len = strlen (str);
34848 memcpy (attr_str + str_len_sum, str, len);
34849 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
34850 str_len_sum += len + 1;
34853 /* Replace "=,-" with "_". */
34854 for (i = 0; i < strlen (attr_str); i++)
34855 if (attr_str[i] == '=' || attr_str[i]== '-')
34856 attr_str[i] = '_';
34858 if (argnum == 1)
34859 return attr_str;
34861 args = XNEWVEC (char *, argnum);
34863 i = 0;
34864 attr = strtok (attr_str, ",");
34865 while (attr != NULL)
34867 args[i] = attr;
34868 i++;
34869 attr = strtok (NULL, ",");
34872 qsort (args, argnum, sizeof (char *), attr_strcmp);
34874 ret_str = XNEWVEC (char, str_len_sum);
34875 str_len_sum = 0;
34876 for (i = 0; i < argnum; i++)
34878 size_t len = strlen (args[i]);
34879 memcpy (ret_str + str_len_sum, args[i], len);
34880 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
34881 str_len_sum += len + 1;
34884 XDELETEVEC (args);
34885 XDELETEVEC (attr_str);
34886 return ret_str;
34889 /* This function changes the assembler name for functions that are
34890 versions. If DECL is a function version and has a "target"
34891 attribute, it appends the attribute string to its assembler name. */
34893 static tree
34894 ix86_mangle_function_version_assembler_name (tree decl, tree id)
34896 tree version_attr;
34897 const char *orig_name, *version_string;
34898 char *attr_str, *assembler_name;
34900 if (DECL_DECLARED_INLINE_P (decl)
34901 && lookup_attribute ("gnu_inline",
34902 DECL_ATTRIBUTES (decl)))
34903 error_at (DECL_SOURCE_LOCATION (decl),
34904 "Function versions cannot be marked as gnu_inline,"
34905 " bodies have to be generated");
34907 if (DECL_VIRTUAL_P (decl)
34908 || DECL_VINDEX (decl))
34909 sorry ("Virtual function multiversioning not supported");
34911 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34913 /* target attribute string cannot be NULL. */
34914 gcc_assert (version_attr != NULL_TREE);
34916 orig_name = IDENTIFIER_POINTER (id);
34917 version_string
34918 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
34920 if (strcmp (version_string, "default") == 0)
34921 return id;
34923 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
34924 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
34926 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
34928 /* Allow assembler name to be modified if already set. */
34929 if (DECL_ASSEMBLER_NAME_SET_P (decl))
34930 SET_DECL_RTL (decl, NULL);
34932 tree ret = get_identifier (assembler_name);
34933 XDELETEVEC (attr_str);
34934 XDELETEVEC (assembler_name);
34935 return ret;
34938 /* This function returns true if FN1 and FN2 are versions of the same function,
34939 that is, the target strings of the function decls are different. This assumes
34940 that FN1 and FN2 have the same signature. */
34942 static bool
34943 ix86_function_versions (tree fn1, tree fn2)
34945 tree attr1, attr2;
34946 char *target1, *target2;
34947 bool result;
34949 if (TREE_CODE (fn1) != FUNCTION_DECL
34950 || TREE_CODE (fn2) != FUNCTION_DECL)
34951 return false;
34953 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
34954 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
34956 /* At least one function decl should have the target attribute specified. */
34957 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
34958 return false;
34960 /* Diagnose missing target attribute if one of the decls is already
34961 multi-versioned. */
34962 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
34964 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
34966 if (attr2 != NULL_TREE)
34968 std::swap (fn1, fn2);
34969 attr1 = attr2;
34971 error_at (DECL_SOURCE_LOCATION (fn2),
34972 "missing %<target%> attribute for multi-versioned %D",
34973 fn2);
34974 inform (DECL_SOURCE_LOCATION (fn1),
34975 "previous declaration of %D", fn1);
34976 /* Prevent diagnosing of the same error multiple times. */
34977 DECL_ATTRIBUTES (fn2)
34978 = tree_cons (get_identifier ("target"),
34979 copy_node (TREE_VALUE (attr1)),
34980 DECL_ATTRIBUTES (fn2));
34982 return false;
34985 target1 = sorted_attr_string (TREE_VALUE (attr1));
34986 target2 = sorted_attr_string (TREE_VALUE (attr2));
34988 /* The sorted target strings must be different for fn1 and fn2
34989 to be versions. */
34990 if (strcmp (target1, target2) == 0)
34991 result = false;
34992 else
34993 result = true;
34995 XDELETEVEC (target1);
34996 XDELETEVEC (target2);
34998 return result;
35001 static tree
35002 ix86_mangle_decl_assembler_name (tree decl, tree id)
35004 /* For function version, add the target suffix to the assembler name. */
35005 if (TREE_CODE (decl) == FUNCTION_DECL
35006 && DECL_FUNCTION_VERSIONED (decl))
35007 id = ix86_mangle_function_version_assembler_name (decl, id);
35008 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
35009 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
35010 #endif
35012 return id;
35015 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
35016 is true, append the full path name of the source file. */
35018 static char *
35019 make_name (tree decl, const char *suffix, bool make_unique)
35021 char *global_var_name;
35022 int name_len;
35023 const char *name;
35024 const char *unique_name = NULL;
35026 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
35028 /* Get a unique name that can be used globally without any chances
35029 of collision at link time. */
35030 if (make_unique)
35031 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
35033 name_len = strlen (name) + strlen (suffix) + 2;
35035 if (make_unique)
35036 name_len += strlen (unique_name) + 1;
35037 global_var_name = XNEWVEC (char, name_len);
35039 /* Use '.' to concatenate names as it is demangler friendly. */
35040 if (make_unique)
35041 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
35042 suffix);
35043 else
35044 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
35046 return global_var_name;
35049 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35051 /* Make a dispatcher declaration for the multi-versioned function DECL.
35052 Calls to DECL function will be replaced with calls to the dispatcher
35053 by the front-end. Return the decl created. */
35055 static tree
35056 make_dispatcher_decl (const tree decl)
35058 tree func_decl;
35059 char *func_name;
35060 tree fn_type, func_type;
35061 bool is_uniq = false;
35063 if (TREE_PUBLIC (decl) == 0)
35064 is_uniq = true;
35066 func_name = make_name (decl, "ifunc", is_uniq);
35068 fn_type = TREE_TYPE (decl);
35069 func_type = build_function_type (TREE_TYPE (fn_type),
35070 TYPE_ARG_TYPES (fn_type));
35072 func_decl = build_fn_decl (func_name, func_type);
35073 XDELETEVEC (func_name);
35074 TREE_USED (func_decl) = 1;
35075 DECL_CONTEXT (func_decl) = NULL_TREE;
35076 DECL_INITIAL (func_decl) = error_mark_node;
35077 DECL_ARTIFICIAL (func_decl) = 1;
35078 /* Mark this func as external, the resolver will flip it again if
35079 it gets generated. */
35080 DECL_EXTERNAL (func_decl) = 1;
35081 /* This will be of type IFUNCs have to be externally visible. */
35082 TREE_PUBLIC (func_decl) = 1;
35084 return func_decl;
35087 #endif
35089 /* Returns true if decl is multi-versioned and DECL is the default function,
35090 that is it is not tagged with target specific optimization. */
35092 static bool
35093 is_function_default_version (const tree decl)
35095 if (TREE_CODE (decl) != FUNCTION_DECL
35096 || !DECL_FUNCTION_VERSIONED (decl))
35097 return false;
35098 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
35099 gcc_assert (attr);
35100 attr = TREE_VALUE (TREE_VALUE (attr));
35101 return (TREE_CODE (attr) == STRING_CST
35102 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
35105 /* Make a dispatcher declaration for the multi-versioned function DECL.
35106 Calls to DECL function will be replaced with calls to the dispatcher
35107 by the front-end. Returns the decl of the dispatcher function. */
35109 static tree
35110 ix86_get_function_versions_dispatcher (void *decl)
35112 tree fn = (tree) decl;
35113 struct cgraph_node *node = NULL;
35114 struct cgraph_node *default_node = NULL;
35115 struct cgraph_function_version_info *node_v = NULL;
35116 struct cgraph_function_version_info *first_v = NULL;
35118 tree dispatch_decl = NULL;
35120 struct cgraph_function_version_info *default_version_info = NULL;
35122 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
35124 node = cgraph_node::get (fn);
35125 gcc_assert (node != NULL);
35127 node_v = node->function_version ();
35128 gcc_assert (node_v != NULL);
35130 if (node_v->dispatcher_resolver != NULL)
35131 return node_v->dispatcher_resolver;
35133 /* Find the default version and make it the first node. */
35134 first_v = node_v;
35135 /* Go to the beginning of the chain. */
35136 while (first_v->prev != NULL)
35137 first_v = first_v->prev;
35138 default_version_info = first_v;
35139 while (default_version_info != NULL)
35141 if (is_function_default_version
35142 (default_version_info->this_node->decl))
35143 break;
35144 default_version_info = default_version_info->next;
35147 /* If there is no default node, just return NULL. */
35148 if (default_version_info == NULL)
35149 return NULL;
35151 /* Make default info the first node. */
35152 if (first_v != default_version_info)
35154 default_version_info->prev->next = default_version_info->next;
35155 if (default_version_info->next)
35156 default_version_info->next->prev = default_version_info->prev;
35157 first_v->prev = default_version_info;
35158 default_version_info->next = first_v;
35159 default_version_info->prev = NULL;
35162 default_node = default_version_info->this_node;
35164 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35165 if (targetm.has_ifunc_p ())
35167 struct cgraph_function_version_info *it_v = NULL;
35168 struct cgraph_node *dispatcher_node = NULL;
35169 struct cgraph_function_version_info *dispatcher_version_info = NULL;
35171 /* Right now, the dispatching is done via ifunc. */
35172 dispatch_decl = make_dispatcher_decl (default_node->decl);
35174 dispatcher_node = cgraph_node::get_create (dispatch_decl);
35175 gcc_assert (dispatcher_node != NULL);
35176 dispatcher_node->dispatcher_function = 1;
35177 dispatcher_version_info
35178 = dispatcher_node->insert_new_function_version ();
35179 dispatcher_version_info->next = default_version_info;
35180 dispatcher_node->definition = 1;
35182 /* Set the dispatcher for all the versions. */
35183 it_v = default_version_info;
35184 while (it_v != NULL)
35186 it_v->dispatcher_resolver = dispatch_decl;
35187 it_v = it_v->next;
35190 else
35191 #endif
35193 error_at (DECL_SOURCE_LOCATION (default_node->decl),
35194 "multiversioning needs ifunc which is not supported "
35195 "on this target");
35198 return dispatch_decl;
35201 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
35202 it to CHAIN. */
35204 static tree
35205 make_attribute (const char *name, const char *arg_name, tree chain)
35207 tree attr_name;
35208 tree attr_arg_name;
35209 tree attr_args;
35210 tree attr;
35212 attr_name = get_identifier (name);
35213 attr_arg_name = build_string (strlen (arg_name), arg_name);
35214 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
35215 attr = tree_cons (attr_name, attr_args, chain);
35216 return attr;
35219 /* Make the resolver function decl to dispatch the versions of
35220 a multi-versioned function, DEFAULT_DECL. Create an
35221 empty basic block in the resolver and store the pointer in
35222 EMPTY_BB. Return the decl of the resolver function. */
35224 static tree
35225 make_resolver_func (const tree default_decl,
35226 const tree dispatch_decl,
35227 basic_block *empty_bb)
35229 char *resolver_name;
35230 tree decl, type, decl_name, t;
35231 bool is_uniq = false;
35233 /* IFUNC's have to be globally visible. So, if the default_decl is
35234 not, then the name of the IFUNC should be made unique. */
35235 if (TREE_PUBLIC (default_decl) == 0)
35236 is_uniq = true;
35238 /* Append the filename to the resolver function if the versions are
35239 not externally visible. This is because the resolver function has
35240 to be externally visible for the loader to find it. So, appending
35241 the filename will prevent conflicts with a resolver function from
35242 another module which is based on the same version name. */
35243 resolver_name = make_name (default_decl, "resolver", is_uniq);
35245 /* The resolver function should return a (void *). */
35246 type = build_function_type_list (ptr_type_node, NULL_TREE);
35248 decl = build_fn_decl (resolver_name, type);
35249 decl_name = get_identifier (resolver_name);
35250 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
35252 DECL_NAME (decl) = decl_name;
35253 TREE_USED (decl) = 1;
35254 DECL_ARTIFICIAL (decl) = 1;
35255 DECL_IGNORED_P (decl) = 0;
35256 /* IFUNC resolvers have to be externally visible. */
35257 TREE_PUBLIC (decl) = 1;
35258 DECL_UNINLINABLE (decl) = 1;
35260 /* Resolver is not external, body is generated. */
35261 DECL_EXTERNAL (decl) = 0;
35262 DECL_EXTERNAL (dispatch_decl) = 0;
35264 DECL_CONTEXT (decl) = NULL_TREE;
35265 DECL_INITIAL (decl) = make_node (BLOCK);
35266 DECL_STATIC_CONSTRUCTOR (decl) = 0;
35268 if (DECL_COMDAT_GROUP (default_decl)
35269 || TREE_PUBLIC (default_decl))
35271 /* In this case, each translation unit with a call to this
35272 versioned function will put out a resolver. Ensure it
35273 is comdat to keep just one copy. */
35274 DECL_COMDAT (decl) = 1;
35275 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
35277 /* Build result decl and add to function_decl. */
35278 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
35279 DECL_ARTIFICIAL (t) = 1;
35280 DECL_IGNORED_P (t) = 1;
35281 DECL_RESULT (decl) = t;
35283 gimplify_function_tree (decl);
35284 push_cfun (DECL_STRUCT_FUNCTION (decl));
35285 *empty_bb = init_lowered_empty_function (decl, false, 0);
35287 cgraph_node::add_new_function (decl, true);
35288 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
35290 pop_cfun ();
35292 gcc_assert (dispatch_decl != NULL);
35293 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
35294 DECL_ATTRIBUTES (dispatch_decl)
35295 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
35297 /* Create the alias for dispatch to resolver here. */
35298 /*cgraph_create_function_alias (dispatch_decl, decl);*/
35299 cgraph_node::create_same_body_alias (dispatch_decl, decl);
35300 XDELETEVEC (resolver_name);
35301 return decl;
35304 /* Generate the dispatching code body to dispatch multi-versioned function
35305 DECL. The target hook is called to process the "target" attributes and
35306 provide the code to dispatch the right function at run-time. NODE points
35307 to the dispatcher decl whose body will be created. */
35309 static tree
35310 ix86_generate_version_dispatcher_body (void *node_p)
35312 tree resolver_decl;
35313 basic_block empty_bb;
35314 tree default_ver_decl;
35315 struct cgraph_node *versn;
35316 struct cgraph_node *node;
35318 struct cgraph_function_version_info *node_version_info = NULL;
35319 struct cgraph_function_version_info *versn_info = NULL;
35321 node = (cgraph_node *)node_p;
35323 node_version_info = node->function_version ();
35324 gcc_assert (node->dispatcher_function
35325 && node_version_info != NULL);
35327 if (node_version_info->dispatcher_resolver)
35328 return node_version_info->dispatcher_resolver;
35330 /* The first version in the chain corresponds to the default version. */
35331 default_ver_decl = node_version_info->next->this_node->decl;
35333 /* node is going to be an alias, so remove the finalized bit. */
35334 node->definition = false;
35336 resolver_decl = make_resolver_func (default_ver_decl,
35337 node->decl, &empty_bb);
35339 node_version_info->dispatcher_resolver = resolver_decl;
35341 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
35343 auto_vec<tree, 2> fn_ver_vec;
35345 for (versn_info = node_version_info->next; versn_info;
35346 versn_info = versn_info->next)
35348 versn = versn_info->this_node;
35349 /* Check for virtual functions here again, as by this time it should
35350 have been determined if this function needs a vtable index or
35351 not. This happens for methods in derived classes that override
35352 virtual methods in base classes but are not explicitly marked as
35353 virtual. */
35354 if (DECL_VINDEX (versn->decl))
35355 sorry ("Virtual function multiversioning not supported");
35357 fn_ver_vec.safe_push (versn->decl);
35360 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
35361 cgraph_edge::rebuild_edges ();
35362 pop_cfun ();
35363 return resolver_decl;
35365 /* This builds the processor_model struct type defined in
35366 libgcc/config/i386/cpuinfo.c */
35368 static tree
35369 build_processor_model_struct (void)
35371 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
35372 "__cpu_features"};
35373 tree field = NULL_TREE, field_chain = NULL_TREE;
35374 int i;
35375 tree type = make_node (RECORD_TYPE);
35377 /* The first 3 fields are unsigned int. */
35378 for (i = 0; i < 3; ++i)
35380 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35381 get_identifier (field_name[i]), unsigned_type_node);
35382 if (field_chain != NULL_TREE)
35383 DECL_CHAIN (field) = field_chain;
35384 field_chain = field;
35387 /* The last field is an array of unsigned integers of size one. */
35388 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35389 get_identifier (field_name[3]),
35390 build_array_type (unsigned_type_node,
35391 build_index_type (size_one_node)));
35392 if (field_chain != NULL_TREE)
35393 DECL_CHAIN (field) = field_chain;
35394 field_chain = field;
35396 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
35397 return type;
35400 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
35402 static tree
35403 make_var_decl (tree type, const char *name)
35405 tree new_decl;
35407 new_decl = build_decl (UNKNOWN_LOCATION,
35408 VAR_DECL,
35409 get_identifier(name),
35410 type);
35412 DECL_EXTERNAL (new_decl) = 1;
35413 TREE_STATIC (new_decl) = 1;
35414 TREE_PUBLIC (new_decl) = 1;
35415 DECL_INITIAL (new_decl) = 0;
35416 DECL_ARTIFICIAL (new_decl) = 0;
35417 DECL_PRESERVE_P (new_decl) = 1;
35419 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
35420 assemble_variable (new_decl, 0, 0, 0);
35422 return new_decl;
35425 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
35426 into an integer defined in libgcc/config/i386/cpuinfo.c */
35428 static tree
35429 fold_builtin_cpu (tree fndecl, tree *args)
35431 unsigned int i;
35432 enum ix86_builtins fn_code = (enum ix86_builtins)
35433 DECL_FUNCTION_CODE (fndecl);
35434 tree param_string_cst = NULL;
35436 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
35437 enum processor_features
35439 F_CMOV = 0,
35440 F_MMX,
35441 F_POPCNT,
35442 F_SSE,
35443 F_SSE2,
35444 F_SSE3,
35445 F_SSSE3,
35446 F_SSE4_1,
35447 F_SSE4_2,
35448 F_AVX,
35449 F_AVX2,
35450 F_SSE4_A,
35451 F_FMA4,
35452 F_XOP,
35453 F_FMA,
35454 F_AVX512F,
35455 F_BMI,
35456 F_BMI2,
35457 F_MAX
35460 /* These are the values for vendor types and cpu types and subtypes
35461 in cpuinfo.c. Cpu types and subtypes should be subtracted by
35462 the corresponding start value. */
35463 enum processor_model
35465 M_INTEL = 1,
35466 M_AMD,
35467 M_CPU_TYPE_START,
35468 M_INTEL_BONNELL,
35469 M_INTEL_CORE2,
35470 M_INTEL_COREI7,
35471 M_AMDFAM10H,
35472 M_AMDFAM15H,
35473 M_INTEL_SILVERMONT,
35474 M_INTEL_KNL,
35475 M_AMD_BTVER1,
35476 M_AMD_BTVER2,
35477 M_CPU_SUBTYPE_START,
35478 M_INTEL_COREI7_NEHALEM,
35479 M_INTEL_COREI7_WESTMERE,
35480 M_INTEL_COREI7_SANDYBRIDGE,
35481 M_AMDFAM10H_BARCELONA,
35482 M_AMDFAM10H_SHANGHAI,
35483 M_AMDFAM10H_ISTANBUL,
35484 M_AMDFAM15H_BDVER1,
35485 M_AMDFAM15H_BDVER2,
35486 M_AMDFAM15H_BDVER3,
35487 M_AMDFAM15H_BDVER4,
35488 M_INTEL_COREI7_IVYBRIDGE,
35489 M_INTEL_COREI7_HASWELL,
35490 M_INTEL_COREI7_BROADWELL
35493 static struct _arch_names_table
35495 const char *const name;
35496 const enum processor_model model;
35498 const arch_names_table[] =
35500 {"amd", M_AMD},
35501 {"intel", M_INTEL},
35502 {"atom", M_INTEL_BONNELL},
35503 {"slm", M_INTEL_SILVERMONT},
35504 {"core2", M_INTEL_CORE2},
35505 {"corei7", M_INTEL_COREI7},
35506 {"nehalem", M_INTEL_COREI7_NEHALEM},
35507 {"westmere", M_INTEL_COREI7_WESTMERE},
35508 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
35509 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
35510 {"haswell", M_INTEL_COREI7_HASWELL},
35511 {"broadwell", M_INTEL_COREI7_BROADWELL},
35512 {"bonnell", M_INTEL_BONNELL},
35513 {"silvermont", M_INTEL_SILVERMONT},
35514 {"knl", M_INTEL_KNL},
35515 {"amdfam10h", M_AMDFAM10H},
35516 {"barcelona", M_AMDFAM10H_BARCELONA},
35517 {"shanghai", M_AMDFAM10H_SHANGHAI},
35518 {"istanbul", M_AMDFAM10H_ISTANBUL},
35519 {"btver1", M_AMD_BTVER1},
35520 {"amdfam15h", M_AMDFAM15H},
35521 {"bdver1", M_AMDFAM15H_BDVER1},
35522 {"bdver2", M_AMDFAM15H_BDVER2},
35523 {"bdver3", M_AMDFAM15H_BDVER3},
35524 {"bdver4", M_AMDFAM15H_BDVER4},
35525 {"btver2", M_AMD_BTVER2},
35528 static struct _isa_names_table
35530 const char *const name;
35531 const enum processor_features feature;
35533 const isa_names_table[] =
35535 {"cmov", F_CMOV},
35536 {"mmx", F_MMX},
35537 {"popcnt", F_POPCNT},
35538 {"sse", F_SSE},
35539 {"sse2", F_SSE2},
35540 {"sse3", F_SSE3},
35541 {"ssse3", F_SSSE3},
35542 {"sse4a", F_SSE4_A},
35543 {"sse4.1", F_SSE4_1},
35544 {"sse4.2", F_SSE4_2},
35545 {"avx", F_AVX},
35546 {"fma4", F_FMA4},
35547 {"xop", F_XOP},
35548 {"fma", F_FMA},
35549 {"avx2", F_AVX2},
35550 {"avx512f",F_AVX512F},
35551 {"bmi", F_BMI},
35552 {"bmi2", F_BMI2}
35555 tree __processor_model_type = build_processor_model_struct ();
35556 tree __cpu_model_var = make_var_decl (__processor_model_type,
35557 "__cpu_model");
35560 varpool_node::add (__cpu_model_var);
35562 gcc_assert ((args != NULL) && (*args != NULL));
35564 param_string_cst = *args;
35565 while (param_string_cst
35566 && TREE_CODE (param_string_cst) != STRING_CST)
35568 /* *args must be a expr that can contain other EXPRS leading to a
35569 STRING_CST. */
35570 if (!EXPR_P (param_string_cst))
35572 error ("Parameter to builtin must be a string constant or literal");
35573 return integer_zero_node;
35575 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
35578 gcc_assert (param_string_cst);
35580 if (fn_code == IX86_BUILTIN_CPU_IS)
35582 tree ref;
35583 tree field;
35584 tree final;
35586 unsigned int field_val = 0;
35587 unsigned int NUM_ARCH_NAMES
35588 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
35590 for (i = 0; i < NUM_ARCH_NAMES; i++)
35591 if (strcmp (arch_names_table[i].name,
35592 TREE_STRING_POINTER (param_string_cst)) == 0)
35593 break;
35595 if (i == NUM_ARCH_NAMES)
35597 error ("Parameter to builtin not valid: %s",
35598 TREE_STRING_POINTER (param_string_cst));
35599 return integer_zero_node;
35602 field = TYPE_FIELDS (__processor_model_type);
35603 field_val = arch_names_table[i].model;
35605 /* CPU types are stored in the next field. */
35606 if (field_val > M_CPU_TYPE_START
35607 && field_val < M_CPU_SUBTYPE_START)
35609 field = DECL_CHAIN (field);
35610 field_val -= M_CPU_TYPE_START;
35613 /* CPU subtypes are stored in the next field. */
35614 if (field_val > M_CPU_SUBTYPE_START)
35616 field = DECL_CHAIN ( DECL_CHAIN (field));
35617 field_val -= M_CPU_SUBTYPE_START;
35620 /* Get the appropriate field in __cpu_model. */
35621 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35622 field, NULL_TREE);
35624 /* Check the value. */
35625 final = build2 (EQ_EXPR, unsigned_type_node, ref,
35626 build_int_cstu (unsigned_type_node, field_val));
35627 return build1 (CONVERT_EXPR, integer_type_node, final);
35629 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35631 tree ref;
35632 tree array_elt;
35633 tree field;
35634 tree final;
35636 unsigned int field_val = 0;
35637 unsigned int NUM_ISA_NAMES
35638 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
35640 for (i = 0; i < NUM_ISA_NAMES; i++)
35641 if (strcmp (isa_names_table[i].name,
35642 TREE_STRING_POINTER (param_string_cst)) == 0)
35643 break;
35645 if (i == NUM_ISA_NAMES)
35647 error ("Parameter to builtin not valid: %s",
35648 TREE_STRING_POINTER (param_string_cst));
35649 return integer_zero_node;
35652 field = TYPE_FIELDS (__processor_model_type);
35653 /* Get the last field, which is __cpu_features. */
35654 while (DECL_CHAIN (field))
35655 field = DECL_CHAIN (field);
35657 /* Get the appropriate field: __cpu_model.__cpu_features */
35658 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35659 field, NULL_TREE);
35661 /* Access the 0th element of __cpu_features array. */
35662 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
35663 integer_zero_node, NULL_TREE, NULL_TREE);
35665 field_val = (1 << isa_names_table[i].feature);
35666 /* Return __cpu_model.__cpu_features[0] & field_val */
35667 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
35668 build_int_cstu (unsigned_type_node, field_val));
35669 return build1 (CONVERT_EXPR, integer_type_node, final);
35671 gcc_unreachable ();
35674 static tree
35675 ix86_fold_builtin (tree fndecl, int n_args,
35676 tree *args, bool ignore ATTRIBUTE_UNUSED)
35678 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35680 enum ix86_builtins fn_code = (enum ix86_builtins)
35681 DECL_FUNCTION_CODE (fndecl);
35682 if (fn_code == IX86_BUILTIN_CPU_IS
35683 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35685 gcc_assert (n_args == 1);
35686 return fold_builtin_cpu (fndecl, args);
35690 #ifdef SUBTARGET_FOLD_BUILTIN
35691 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
35692 #endif
35694 return NULL_TREE;
35697 /* Make builtins to detect cpu type and features supported. NAME is
35698 the builtin name, CODE is the builtin code, and FTYPE is the function
35699 type of the builtin. */
35701 static void
35702 make_cpu_type_builtin (const char* name, int code,
35703 enum ix86_builtin_func_type ftype, bool is_const)
35705 tree decl;
35706 tree type;
35708 type = ix86_get_builtin_func_type (ftype);
35709 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
35710 NULL, NULL_TREE);
35711 gcc_assert (decl != NULL_TREE);
35712 ix86_builtins[(int) code] = decl;
35713 TREE_READONLY (decl) = is_const;
35716 /* Make builtins to get CPU type and features supported. The created
35717 builtins are :
35719 __builtin_cpu_init (), to detect cpu type and features,
35720 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
35721 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
35724 static void
35725 ix86_init_platform_type_builtins (void)
35727 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
35728 INT_FTYPE_VOID, false);
35729 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
35730 INT_FTYPE_PCCHAR, true);
35731 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
35732 INT_FTYPE_PCCHAR, true);
35735 /* Internal method for ix86_init_builtins. */
35737 static void
35738 ix86_init_builtins_va_builtins_abi (void)
35740 tree ms_va_ref, sysv_va_ref;
35741 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
35742 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
35743 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
35744 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
35746 if (!TARGET_64BIT)
35747 return;
35748 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
35749 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
35750 ms_va_ref = build_reference_type (ms_va_list_type_node);
35751 sysv_va_ref =
35752 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
35754 fnvoid_va_end_ms =
35755 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35756 fnvoid_va_start_ms =
35757 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35758 fnvoid_va_end_sysv =
35759 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
35760 fnvoid_va_start_sysv =
35761 build_varargs_function_type_list (void_type_node, sysv_va_ref,
35762 NULL_TREE);
35763 fnvoid_va_copy_ms =
35764 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
35765 NULL_TREE);
35766 fnvoid_va_copy_sysv =
35767 build_function_type_list (void_type_node, sysv_va_ref,
35768 sysv_va_ref, NULL_TREE);
35770 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
35771 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
35772 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
35773 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
35774 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
35775 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
35776 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
35777 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35778 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
35779 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35780 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
35781 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35784 static void
35785 ix86_init_builtin_types (void)
35787 tree float128_type_node, float80_type_node;
35789 /* The __float80 type. */
35790 float80_type_node = long_double_type_node;
35791 if (TYPE_MODE (float80_type_node) != XFmode)
35793 /* The __float80 type. */
35794 float80_type_node = make_node (REAL_TYPE);
35796 TYPE_PRECISION (float80_type_node) = 80;
35797 layout_type (float80_type_node);
35799 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
35801 /* The __float128 type. */
35802 float128_type_node = make_node (REAL_TYPE);
35803 TYPE_PRECISION (float128_type_node) = 128;
35804 layout_type (float128_type_node);
35805 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
35807 /* This macro is built by i386-builtin-types.awk. */
35808 DEFINE_BUILTIN_PRIMITIVE_TYPES;
35811 static void
35812 ix86_init_builtins (void)
35814 tree t;
35816 ix86_init_builtin_types ();
35818 /* Builtins to get CPU type and features. */
35819 ix86_init_platform_type_builtins ();
35821 /* TFmode support builtins. */
35822 def_builtin_const (0, "__builtin_infq",
35823 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
35824 def_builtin_const (0, "__builtin_huge_valq",
35825 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
35827 /* We will expand them to normal call if SSE isn't available since
35828 they are used by libgcc. */
35829 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
35830 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
35831 BUILT_IN_MD, "__fabstf2", NULL_TREE);
35832 TREE_READONLY (t) = 1;
35833 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
35835 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
35836 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
35837 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
35838 TREE_READONLY (t) = 1;
35839 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
35841 ix86_init_tm_builtins ();
35842 ix86_init_mmx_sse_builtins ();
35843 ix86_init_mpx_builtins ();
35845 if (TARGET_LP64)
35846 ix86_init_builtins_va_builtins_abi ();
35848 #ifdef SUBTARGET_INIT_BUILTINS
35849 SUBTARGET_INIT_BUILTINS;
35850 #endif
35853 /* Return the ix86 builtin for CODE. */
35855 static tree
35856 ix86_builtin_decl (unsigned code, bool)
35858 if (code >= IX86_BUILTIN_MAX)
35859 return error_mark_node;
35861 return ix86_builtins[code];
35864 /* Errors in the source file can cause expand_expr to return const0_rtx
35865 where we expect a vector. To avoid crashing, use one of the vector
35866 clear instructions. */
35867 static rtx
35868 safe_vector_operand (rtx x, machine_mode mode)
35870 if (x == const0_rtx)
35871 x = CONST0_RTX (mode);
35872 return x;
35875 /* Fixup modeless constants to fit required mode. */
35876 static rtx
35877 fixup_modeless_constant (rtx x, machine_mode mode)
35879 if (GET_MODE (x) == VOIDmode)
35880 x = convert_to_mode (mode, x, 1);
35881 return x;
35884 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
35886 static rtx
35887 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
35889 rtx pat;
35890 tree arg0 = CALL_EXPR_ARG (exp, 0);
35891 tree arg1 = CALL_EXPR_ARG (exp, 1);
35892 rtx op0 = expand_normal (arg0);
35893 rtx op1 = expand_normal (arg1);
35894 machine_mode tmode = insn_data[icode].operand[0].mode;
35895 machine_mode mode0 = insn_data[icode].operand[1].mode;
35896 machine_mode mode1 = insn_data[icode].operand[2].mode;
35898 if (VECTOR_MODE_P (mode0))
35899 op0 = safe_vector_operand (op0, mode0);
35900 if (VECTOR_MODE_P (mode1))
35901 op1 = safe_vector_operand (op1, mode1);
35903 if (optimize || !target
35904 || GET_MODE (target) != tmode
35905 || !insn_data[icode].operand[0].predicate (target, tmode))
35906 target = gen_reg_rtx (tmode);
35908 if (GET_MODE (op1) == SImode && mode1 == TImode)
35910 rtx x = gen_reg_rtx (V4SImode);
35911 emit_insn (gen_sse2_loadd (x, op1));
35912 op1 = gen_lowpart (TImode, x);
35915 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35916 op0 = copy_to_mode_reg (mode0, op0);
35917 if (!insn_data[icode].operand[2].predicate (op1, mode1))
35918 op1 = copy_to_mode_reg (mode1, op1);
35920 pat = GEN_FCN (icode) (target, op0, op1);
35921 if (! pat)
35922 return 0;
35924 emit_insn (pat);
35926 return target;
35929 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
35931 static rtx
35932 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
35933 enum ix86_builtin_func_type m_type,
35934 enum rtx_code sub_code)
35936 rtx pat;
35937 int i;
35938 int nargs;
35939 bool comparison_p = false;
35940 bool tf_p = false;
35941 bool last_arg_constant = false;
35942 int num_memory = 0;
35943 struct {
35944 rtx op;
35945 machine_mode mode;
35946 } args[4];
35948 machine_mode tmode = insn_data[icode].operand[0].mode;
35950 switch (m_type)
35952 case MULTI_ARG_4_DF2_DI_I:
35953 case MULTI_ARG_4_DF2_DI_I1:
35954 case MULTI_ARG_4_SF2_SI_I:
35955 case MULTI_ARG_4_SF2_SI_I1:
35956 nargs = 4;
35957 last_arg_constant = true;
35958 break;
35960 case MULTI_ARG_3_SF:
35961 case MULTI_ARG_3_DF:
35962 case MULTI_ARG_3_SF2:
35963 case MULTI_ARG_3_DF2:
35964 case MULTI_ARG_3_DI:
35965 case MULTI_ARG_3_SI:
35966 case MULTI_ARG_3_SI_DI:
35967 case MULTI_ARG_3_HI:
35968 case MULTI_ARG_3_HI_SI:
35969 case MULTI_ARG_3_QI:
35970 case MULTI_ARG_3_DI2:
35971 case MULTI_ARG_3_SI2:
35972 case MULTI_ARG_3_HI2:
35973 case MULTI_ARG_3_QI2:
35974 nargs = 3;
35975 break;
35977 case MULTI_ARG_2_SF:
35978 case MULTI_ARG_2_DF:
35979 case MULTI_ARG_2_DI:
35980 case MULTI_ARG_2_SI:
35981 case MULTI_ARG_2_HI:
35982 case MULTI_ARG_2_QI:
35983 nargs = 2;
35984 break;
35986 case MULTI_ARG_2_DI_IMM:
35987 case MULTI_ARG_2_SI_IMM:
35988 case MULTI_ARG_2_HI_IMM:
35989 case MULTI_ARG_2_QI_IMM:
35990 nargs = 2;
35991 last_arg_constant = true;
35992 break;
35994 case MULTI_ARG_1_SF:
35995 case MULTI_ARG_1_DF:
35996 case MULTI_ARG_1_SF2:
35997 case MULTI_ARG_1_DF2:
35998 case MULTI_ARG_1_DI:
35999 case MULTI_ARG_1_SI:
36000 case MULTI_ARG_1_HI:
36001 case MULTI_ARG_1_QI:
36002 case MULTI_ARG_1_SI_DI:
36003 case MULTI_ARG_1_HI_DI:
36004 case MULTI_ARG_1_HI_SI:
36005 case MULTI_ARG_1_QI_DI:
36006 case MULTI_ARG_1_QI_SI:
36007 case MULTI_ARG_1_QI_HI:
36008 nargs = 1;
36009 break;
36011 case MULTI_ARG_2_DI_CMP:
36012 case MULTI_ARG_2_SI_CMP:
36013 case MULTI_ARG_2_HI_CMP:
36014 case MULTI_ARG_2_QI_CMP:
36015 nargs = 2;
36016 comparison_p = true;
36017 break;
36019 case MULTI_ARG_2_SF_TF:
36020 case MULTI_ARG_2_DF_TF:
36021 case MULTI_ARG_2_DI_TF:
36022 case MULTI_ARG_2_SI_TF:
36023 case MULTI_ARG_2_HI_TF:
36024 case MULTI_ARG_2_QI_TF:
36025 nargs = 2;
36026 tf_p = true;
36027 break;
36029 default:
36030 gcc_unreachable ();
36033 if (optimize || !target
36034 || GET_MODE (target) != tmode
36035 || !insn_data[icode].operand[0].predicate (target, tmode))
36036 target = gen_reg_rtx (tmode);
36038 gcc_assert (nargs <= 4);
36040 for (i = 0; i < nargs; i++)
36042 tree arg = CALL_EXPR_ARG (exp, i);
36043 rtx op = expand_normal (arg);
36044 int adjust = (comparison_p) ? 1 : 0;
36045 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
36047 if (last_arg_constant && i == nargs - 1)
36049 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
36051 enum insn_code new_icode = icode;
36052 switch (icode)
36054 case CODE_FOR_xop_vpermil2v2df3:
36055 case CODE_FOR_xop_vpermil2v4sf3:
36056 case CODE_FOR_xop_vpermil2v4df3:
36057 case CODE_FOR_xop_vpermil2v8sf3:
36058 error ("the last argument must be a 2-bit immediate");
36059 return gen_reg_rtx (tmode);
36060 case CODE_FOR_xop_rotlv2di3:
36061 new_icode = CODE_FOR_rotlv2di3;
36062 goto xop_rotl;
36063 case CODE_FOR_xop_rotlv4si3:
36064 new_icode = CODE_FOR_rotlv4si3;
36065 goto xop_rotl;
36066 case CODE_FOR_xop_rotlv8hi3:
36067 new_icode = CODE_FOR_rotlv8hi3;
36068 goto xop_rotl;
36069 case CODE_FOR_xop_rotlv16qi3:
36070 new_icode = CODE_FOR_rotlv16qi3;
36071 xop_rotl:
36072 if (CONST_INT_P (op))
36074 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
36075 op = GEN_INT (INTVAL (op) & mask);
36076 gcc_checking_assert
36077 (insn_data[icode].operand[i + 1].predicate (op, mode));
36079 else
36081 gcc_checking_assert
36082 (nargs == 2
36083 && insn_data[new_icode].operand[0].mode == tmode
36084 && insn_data[new_icode].operand[1].mode == tmode
36085 && insn_data[new_icode].operand[2].mode == mode
36086 && insn_data[new_icode].operand[0].predicate
36087 == insn_data[icode].operand[0].predicate
36088 && insn_data[new_icode].operand[1].predicate
36089 == insn_data[icode].operand[1].predicate);
36090 icode = new_icode;
36091 goto non_constant;
36093 break;
36094 default:
36095 gcc_unreachable ();
36099 else
36101 non_constant:
36102 if (VECTOR_MODE_P (mode))
36103 op = safe_vector_operand (op, mode);
36105 /* If we aren't optimizing, only allow one memory operand to be
36106 generated. */
36107 if (memory_operand (op, mode))
36108 num_memory++;
36110 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
36112 if (optimize
36113 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
36114 || num_memory > 1)
36115 op = force_reg (mode, op);
36118 args[i].op = op;
36119 args[i].mode = mode;
36122 switch (nargs)
36124 case 1:
36125 pat = GEN_FCN (icode) (target, args[0].op);
36126 break;
36128 case 2:
36129 if (tf_p)
36130 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
36131 GEN_INT ((int)sub_code));
36132 else if (! comparison_p)
36133 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
36134 else
36136 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
36137 args[0].op,
36138 args[1].op);
36140 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
36142 break;
36144 case 3:
36145 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
36146 break;
36148 case 4:
36149 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
36150 break;
36152 default:
36153 gcc_unreachable ();
36156 if (! pat)
36157 return 0;
36159 emit_insn (pat);
36160 return target;
36163 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
36164 insns with vec_merge. */
36166 static rtx
36167 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
36168 rtx target)
36170 rtx pat;
36171 tree arg0 = CALL_EXPR_ARG (exp, 0);
36172 rtx op1, op0 = expand_normal (arg0);
36173 machine_mode tmode = insn_data[icode].operand[0].mode;
36174 machine_mode mode0 = insn_data[icode].operand[1].mode;
36176 if (optimize || !target
36177 || GET_MODE (target) != tmode
36178 || !insn_data[icode].operand[0].predicate (target, tmode))
36179 target = gen_reg_rtx (tmode);
36181 if (VECTOR_MODE_P (mode0))
36182 op0 = safe_vector_operand (op0, mode0);
36184 if ((optimize && !register_operand (op0, mode0))
36185 || !insn_data[icode].operand[1].predicate (op0, mode0))
36186 op0 = copy_to_mode_reg (mode0, op0);
36188 op1 = op0;
36189 if (!insn_data[icode].operand[2].predicate (op1, mode0))
36190 op1 = copy_to_mode_reg (mode0, op1);
36192 pat = GEN_FCN (icode) (target, op0, op1);
36193 if (! pat)
36194 return 0;
36195 emit_insn (pat);
36196 return target;
36199 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
36201 static rtx
36202 ix86_expand_sse_compare (const struct builtin_description *d,
36203 tree exp, rtx target, bool swap)
36205 rtx pat;
36206 tree arg0 = CALL_EXPR_ARG (exp, 0);
36207 tree arg1 = CALL_EXPR_ARG (exp, 1);
36208 rtx op0 = expand_normal (arg0);
36209 rtx op1 = expand_normal (arg1);
36210 rtx op2;
36211 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36212 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36213 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36214 enum rtx_code comparison = d->comparison;
36216 if (VECTOR_MODE_P (mode0))
36217 op0 = safe_vector_operand (op0, mode0);
36218 if (VECTOR_MODE_P (mode1))
36219 op1 = safe_vector_operand (op1, mode1);
36221 /* Swap operands if we have a comparison that isn't available in
36222 hardware. */
36223 if (swap)
36224 std::swap (op0, op1);
36226 if (optimize || !target
36227 || GET_MODE (target) != tmode
36228 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36229 target = gen_reg_rtx (tmode);
36231 if ((optimize && !register_operand (op0, mode0))
36232 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
36233 op0 = copy_to_mode_reg (mode0, op0);
36234 if ((optimize && !register_operand (op1, mode1))
36235 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
36236 op1 = copy_to_mode_reg (mode1, op1);
36238 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
36239 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36240 if (! pat)
36241 return 0;
36242 emit_insn (pat);
36243 return target;
36246 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
36248 static rtx
36249 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
36250 rtx target)
36252 rtx pat;
36253 tree arg0 = CALL_EXPR_ARG (exp, 0);
36254 tree arg1 = CALL_EXPR_ARG (exp, 1);
36255 rtx op0 = expand_normal (arg0);
36256 rtx op1 = expand_normal (arg1);
36257 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36258 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36259 enum rtx_code comparison = d->comparison;
36261 if (VECTOR_MODE_P (mode0))
36262 op0 = safe_vector_operand (op0, mode0);
36263 if (VECTOR_MODE_P (mode1))
36264 op1 = safe_vector_operand (op1, mode1);
36266 /* Swap operands if we have a comparison that isn't available in
36267 hardware. */
36268 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
36269 std::swap (op0, op1);
36271 target = gen_reg_rtx (SImode);
36272 emit_move_insn (target, const0_rtx);
36273 target = gen_rtx_SUBREG (QImode, target, 0);
36275 if ((optimize && !register_operand (op0, mode0))
36276 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36277 op0 = copy_to_mode_reg (mode0, op0);
36278 if ((optimize && !register_operand (op1, mode1))
36279 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36280 op1 = copy_to_mode_reg (mode1, op1);
36282 pat = GEN_FCN (d->icode) (op0, op1);
36283 if (! pat)
36284 return 0;
36285 emit_insn (pat);
36286 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36287 gen_rtx_fmt_ee (comparison, QImode,
36288 SET_DEST (pat),
36289 const0_rtx)));
36291 return SUBREG_REG (target);
36294 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
36296 static rtx
36297 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
36298 rtx target)
36300 rtx pat;
36301 tree arg0 = CALL_EXPR_ARG (exp, 0);
36302 rtx op1, op0 = expand_normal (arg0);
36303 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36304 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36306 if (optimize || target == 0
36307 || GET_MODE (target) != tmode
36308 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36309 target = gen_reg_rtx (tmode);
36311 if (VECTOR_MODE_P (mode0))
36312 op0 = safe_vector_operand (op0, mode0);
36314 if ((optimize && !register_operand (op0, mode0))
36315 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36316 op0 = copy_to_mode_reg (mode0, op0);
36318 op1 = GEN_INT (d->comparison);
36320 pat = GEN_FCN (d->icode) (target, op0, op1);
36321 if (! pat)
36322 return 0;
36323 emit_insn (pat);
36324 return target;
36327 static rtx
36328 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
36329 tree exp, rtx target)
36331 rtx pat;
36332 tree arg0 = CALL_EXPR_ARG (exp, 0);
36333 tree arg1 = CALL_EXPR_ARG (exp, 1);
36334 rtx op0 = expand_normal (arg0);
36335 rtx op1 = expand_normal (arg1);
36336 rtx op2;
36337 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36338 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36339 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36341 if (optimize || target == 0
36342 || GET_MODE (target) != tmode
36343 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36344 target = gen_reg_rtx (tmode);
36346 op0 = safe_vector_operand (op0, mode0);
36347 op1 = safe_vector_operand (op1, mode1);
36349 if ((optimize && !register_operand (op0, mode0))
36350 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36351 op0 = copy_to_mode_reg (mode0, op0);
36352 if ((optimize && !register_operand (op1, mode1))
36353 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36354 op1 = copy_to_mode_reg (mode1, op1);
36356 op2 = GEN_INT (d->comparison);
36358 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36359 if (! pat)
36360 return 0;
36361 emit_insn (pat);
36362 return target;
36365 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
36367 static rtx
36368 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
36369 rtx target)
36371 rtx pat;
36372 tree arg0 = CALL_EXPR_ARG (exp, 0);
36373 tree arg1 = CALL_EXPR_ARG (exp, 1);
36374 rtx op0 = expand_normal (arg0);
36375 rtx op1 = expand_normal (arg1);
36376 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36377 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36378 enum rtx_code comparison = d->comparison;
36380 if (VECTOR_MODE_P (mode0))
36381 op0 = safe_vector_operand (op0, mode0);
36382 if (VECTOR_MODE_P (mode1))
36383 op1 = safe_vector_operand (op1, mode1);
36385 target = gen_reg_rtx (SImode);
36386 emit_move_insn (target, const0_rtx);
36387 target = gen_rtx_SUBREG (QImode, target, 0);
36389 if ((optimize && !register_operand (op0, mode0))
36390 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36391 op0 = copy_to_mode_reg (mode0, op0);
36392 if ((optimize && !register_operand (op1, mode1))
36393 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36394 op1 = copy_to_mode_reg (mode1, op1);
36396 pat = GEN_FCN (d->icode) (op0, op1);
36397 if (! pat)
36398 return 0;
36399 emit_insn (pat);
36400 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36401 gen_rtx_fmt_ee (comparison, QImode,
36402 SET_DEST (pat),
36403 const0_rtx)));
36405 return SUBREG_REG (target);
36408 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
36410 static rtx
36411 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
36412 tree exp, rtx target)
36414 rtx pat;
36415 tree arg0 = CALL_EXPR_ARG (exp, 0);
36416 tree arg1 = CALL_EXPR_ARG (exp, 1);
36417 tree arg2 = CALL_EXPR_ARG (exp, 2);
36418 tree arg3 = CALL_EXPR_ARG (exp, 3);
36419 tree arg4 = CALL_EXPR_ARG (exp, 4);
36420 rtx scratch0, scratch1;
36421 rtx op0 = expand_normal (arg0);
36422 rtx op1 = expand_normal (arg1);
36423 rtx op2 = expand_normal (arg2);
36424 rtx op3 = expand_normal (arg3);
36425 rtx op4 = expand_normal (arg4);
36426 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
36428 tmode0 = insn_data[d->icode].operand[0].mode;
36429 tmode1 = insn_data[d->icode].operand[1].mode;
36430 modev2 = insn_data[d->icode].operand[2].mode;
36431 modei3 = insn_data[d->icode].operand[3].mode;
36432 modev4 = insn_data[d->icode].operand[4].mode;
36433 modei5 = insn_data[d->icode].operand[5].mode;
36434 modeimm = insn_data[d->icode].operand[6].mode;
36436 if (VECTOR_MODE_P (modev2))
36437 op0 = safe_vector_operand (op0, modev2);
36438 if (VECTOR_MODE_P (modev4))
36439 op2 = safe_vector_operand (op2, modev4);
36441 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36442 op0 = copy_to_mode_reg (modev2, op0);
36443 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
36444 op1 = copy_to_mode_reg (modei3, op1);
36445 if ((optimize && !register_operand (op2, modev4))
36446 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
36447 op2 = copy_to_mode_reg (modev4, op2);
36448 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
36449 op3 = copy_to_mode_reg (modei5, op3);
36451 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
36453 error ("the fifth argument must be an 8-bit immediate");
36454 return const0_rtx;
36457 if (d->code == IX86_BUILTIN_PCMPESTRI128)
36459 if (optimize || !target
36460 || GET_MODE (target) != tmode0
36461 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36462 target = gen_reg_rtx (tmode0);
36464 scratch1 = gen_reg_rtx (tmode1);
36466 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
36468 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
36470 if (optimize || !target
36471 || GET_MODE (target) != tmode1
36472 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36473 target = gen_reg_rtx (tmode1);
36475 scratch0 = gen_reg_rtx (tmode0);
36477 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
36479 else
36481 gcc_assert (d->flag);
36483 scratch0 = gen_reg_rtx (tmode0);
36484 scratch1 = gen_reg_rtx (tmode1);
36486 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
36489 if (! pat)
36490 return 0;
36492 emit_insn (pat);
36494 if (d->flag)
36496 target = gen_reg_rtx (SImode);
36497 emit_move_insn (target, const0_rtx);
36498 target = gen_rtx_SUBREG (QImode, target, 0);
36500 emit_insn
36501 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36502 gen_rtx_fmt_ee (EQ, QImode,
36503 gen_rtx_REG ((machine_mode) d->flag,
36504 FLAGS_REG),
36505 const0_rtx)));
36506 return SUBREG_REG (target);
36508 else
36509 return target;
36513 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
36515 static rtx
36516 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
36517 tree exp, rtx target)
36519 rtx pat;
36520 tree arg0 = CALL_EXPR_ARG (exp, 0);
36521 tree arg1 = CALL_EXPR_ARG (exp, 1);
36522 tree arg2 = CALL_EXPR_ARG (exp, 2);
36523 rtx scratch0, scratch1;
36524 rtx op0 = expand_normal (arg0);
36525 rtx op1 = expand_normal (arg1);
36526 rtx op2 = expand_normal (arg2);
36527 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
36529 tmode0 = insn_data[d->icode].operand[0].mode;
36530 tmode1 = insn_data[d->icode].operand[1].mode;
36531 modev2 = insn_data[d->icode].operand[2].mode;
36532 modev3 = insn_data[d->icode].operand[3].mode;
36533 modeimm = insn_data[d->icode].operand[4].mode;
36535 if (VECTOR_MODE_P (modev2))
36536 op0 = safe_vector_operand (op0, modev2);
36537 if (VECTOR_MODE_P (modev3))
36538 op1 = safe_vector_operand (op1, modev3);
36540 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36541 op0 = copy_to_mode_reg (modev2, op0);
36542 if ((optimize && !register_operand (op1, modev3))
36543 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
36544 op1 = copy_to_mode_reg (modev3, op1);
36546 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
36548 error ("the third argument must be an 8-bit immediate");
36549 return const0_rtx;
36552 if (d->code == IX86_BUILTIN_PCMPISTRI128)
36554 if (optimize || !target
36555 || GET_MODE (target) != tmode0
36556 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36557 target = gen_reg_rtx (tmode0);
36559 scratch1 = gen_reg_rtx (tmode1);
36561 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
36563 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
36565 if (optimize || !target
36566 || GET_MODE (target) != tmode1
36567 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36568 target = gen_reg_rtx (tmode1);
36570 scratch0 = gen_reg_rtx (tmode0);
36572 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
36574 else
36576 gcc_assert (d->flag);
36578 scratch0 = gen_reg_rtx (tmode0);
36579 scratch1 = gen_reg_rtx (tmode1);
36581 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
36584 if (! pat)
36585 return 0;
36587 emit_insn (pat);
36589 if (d->flag)
36591 target = gen_reg_rtx (SImode);
36592 emit_move_insn (target, const0_rtx);
36593 target = gen_rtx_SUBREG (QImode, target, 0);
36595 emit_insn
36596 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36597 gen_rtx_fmt_ee (EQ, QImode,
36598 gen_rtx_REG ((machine_mode) d->flag,
36599 FLAGS_REG),
36600 const0_rtx)));
36601 return SUBREG_REG (target);
36603 else
36604 return target;
36607 /* Subroutine of ix86_expand_builtin to take care of insns with
36608 variable number of operands. */
36610 static rtx
36611 ix86_expand_args_builtin (const struct builtin_description *d,
36612 tree exp, rtx target)
36614 rtx pat, real_target;
36615 unsigned int i, nargs;
36616 unsigned int nargs_constant = 0;
36617 unsigned int mask_pos = 0;
36618 int num_memory = 0;
36619 struct
36621 rtx op;
36622 machine_mode mode;
36623 } args[6];
36624 bool last_arg_count = false;
36625 enum insn_code icode = d->icode;
36626 const struct insn_data_d *insn_p = &insn_data[icode];
36627 machine_mode tmode = insn_p->operand[0].mode;
36628 machine_mode rmode = VOIDmode;
36629 bool swap = false;
36630 enum rtx_code comparison = d->comparison;
36632 switch ((enum ix86_builtin_func_type) d->flag)
36634 case V2DF_FTYPE_V2DF_ROUND:
36635 case V4DF_FTYPE_V4DF_ROUND:
36636 case V4SF_FTYPE_V4SF_ROUND:
36637 case V8SF_FTYPE_V8SF_ROUND:
36638 case V4SI_FTYPE_V4SF_ROUND:
36639 case V8SI_FTYPE_V8SF_ROUND:
36640 return ix86_expand_sse_round (d, exp, target);
36641 case V4SI_FTYPE_V2DF_V2DF_ROUND:
36642 case V8SI_FTYPE_V4DF_V4DF_ROUND:
36643 case V16SI_FTYPE_V8DF_V8DF_ROUND:
36644 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
36645 case INT_FTYPE_V8SF_V8SF_PTEST:
36646 case INT_FTYPE_V4DI_V4DI_PTEST:
36647 case INT_FTYPE_V4DF_V4DF_PTEST:
36648 case INT_FTYPE_V4SF_V4SF_PTEST:
36649 case INT_FTYPE_V2DI_V2DI_PTEST:
36650 case INT_FTYPE_V2DF_V2DF_PTEST:
36651 return ix86_expand_sse_ptest (d, exp, target);
36652 case FLOAT128_FTYPE_FLOAT128:
36653 case FLOAT_FTYPE_FLOAT:
36654 case INT_FTYPE_INT:
36655 case UINT64_FTYPE_INT:
36656 case UINT16_FTYPE_UINT16:
36657 case INT64_FTYPE_INT64:
36658 case INT64_FTYPE_V4SF:
36659 case INT64_FTYPE_V2DF:
36660 case INT_FTYPE_V16QI:
36661 case INT_FTYPE_V8QI:
36662 case INT_FTYPE_V8SF:
36663 case INT_FTYPE_V4DF:
36664 case INT_FTYPE_V4SF:
36665 case INT_FTYPE_V2DF:
36666 case INT_FTYPE_V32QI:
36667 case V16QI_FTYPE_V16QI:
36668 case V8SI_FTYPE_V8SF:
36669 case V8SI_FTYPE_V4SI:
36670 case V8HI_FTYPE_V8HI:
36671 case V8HI_FTYPE_V16QI:
36672 case V8QI_FTYPE_V8QI:
36673 case V8SF_FTYPE_V8SF:
36674 case V8SF_FTYPE_V8SI:
36675 case V8SF_FTYPE_V4SF:
36676 case V8SF_FTYPE_V8HI:
36677 case V4SI_FTYPE_V4SI:
36678 case V4SI_FTYPE_V16QI:
36679 case V4SI_FTYPE_V4SF:
36680 case V4SI_FTYPE_V8SI:
36681 case V4SI_FTYPE_V8HI:
36682 case V4SI_FTYPE_V4DF:
36683 case V4SI_FTYPE_V2DF:
36684 case V4HI_FTYPE_V4HI:
36685 case V4DF_FTYPE_V4DF:
36686 case V4DF_FTYPE_V4SI:
36687 case V4DF_FTYPE_V4SF:
36688 case V4DF_FTYPE_V2DF:
36689 case V4SF_FTYPE_V4SF:
36690 case V4SF_FTYPE_V4SI:
36691 case V4SF_FTYPE_V8SF:
36692 case V4SF_FTYPE_V4DF:
36693 case V4SF_FTYPE_V8HI:
36694 case V4SF_FTYPE_V2DF:
36695 case V2DI_FTYPE_V2DI:
36696 case V2DI_FTYPE_V16QI:
36697 case V2DI_FTYPE_V8HI:
36698 case V2DI_FTYPE_V4SI:
36699 case V2DF_FTYPE_V2DF:
36700 case V2DF_FTYPE_V4SI:
36701 case V2DF_FTYPE_V4DF:
36702 case V2DF_FTYPE_V4SF:
36703 case V2DF_FTYPE_V2SI:
36704 case V2SI_FTYPE_V2SI:
36705 case V2SI_FTYPE_V4SF:
36706 case V2SI_FTYPE_V2SF:
36707 case V2SI_FTYPE_V2DF:
36708 case V2SF_FTYPE_V2SF:
36709 case V2SF_FTYPE_V2SI:
36710 case V32QI_FTYPE_V32QI:
36711 case V32QI_FTYPE_V16QI:
36712 case V16HI_FTYPE_V16HI:
36713 case V16HI_FTYPE_V8HI:
36714 case V8SI_FTYPE_V8SI:
36715 case V16HI_FTYPE_V16QI:
36716 case V8SI_FTYPE_V16QI:
36717 case V4DI_FTYPE_V16QI:
36718 case V8SI_FTYPE_V8HI:
36719 case V4DI_FTYPE_V8HI:
36720 case V4DI_FTYPE_V4SI:
36721 case V4DI_FTYPE_V2DI:
36722 case HI_FTYPE_HI:
36723 case HI_FTYPE_V16QI:
36724 case SI_FTYPE_V32QI:
36725 case DI_FTYPE_V64QI:
36726 case V16QI_FTYPE_HI:
36727 case V32QI_FTYPE_SI:
36728 case V64QI_FTYPE_DI:
36729 case V8HI_FTYPE_QI:
36730 case V16HI_FTYPE_HI:
36731 case V32HI_FTYPE_SI:
36732 case V4SI_FTYPE_QI:
36733 case V8SI_FTYPE_QI:
36734 case V4SI_FTYPE_HI:
36735 case V8SI_FTYPE_HI:
36736 case QI_FTYPE_V8HI:
36737 case HI_FTYPE_V16HI:
36738 case SI_FTYPE_V32HI:
36739 case QI_FTYPE_V4SI:
36740 case QI_FTYPE_V8SI:
36741 case HI_FTYPE_V16SI:
36742 case QI_FTYPE_V2DI:
36743 case QI_FTYPE_V4DI:
36744 case QI_FTYPE_V8DI:
36745 case UINT_FTYPE_V2DF:
36746 case UINT_FTYPE_V4SF:
36747 case UINT64_FTYPE_V2DF:
36748 case UINT64_FTYPE_V4SF:
36749 case V16QI_FTYPE_V8DI:
36750 case V16HI_FTYPE_V16SI:
36751 case V16SI_FTYPE_HI:
36752 case V2DI_FTYPE_QI:
36753 case V4DI_FTYPE_QI:
36754 case V16SI_FTYPE_V16SI:
36755 case V16SI_FTYPE_INT:
36756 case V16SF_FTYPE_FLOAT:
36757 case V16SF_FTYPE_V8SF:
36758 case V16SI_FTYPE_V8SI:
36759 case V16SF_FTYPE_V4SF:
36760 case V16SI_FTYPE_V4SI:
36761 case V16SF_FTYPE_V16SF:
36762 case V8HI_FTYPE_V8DI:
36763 case V8UHI_FTYPE_V8UHI:
36764 case V8SI_FTYPE_V8DI:
36765 case V8SF_FTYPE_V8DF:
36766 case V8DI_FTYPE_QI:
36767 case V8DI_FTYPE_INT64:
36768 case V8DI_FTYPE_V4DI:
36769 case V8DI_FTYPE_V8DI:
36770 case V8DF_FTYPE_DOUBLE:
36771 case V8DF_FTYPE_V4DF:
36772 case V8DF_FTYPE_V2DF:
36773 case V8DF_FTYPE_V8DF:
36774 case V8DF_FTYPE_V8SI:
36775 nargs = 1;
36776 break;
36777 case V4SF_FTYPE_V4SF_VEC_MERGE:
36778 case V2DF_FTYPE_V2DF_VEC_MERGE:
36779 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
36780 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
36781 case V16QI_FTYPE_V16QI_V16QI:
36782 case V16QI_FTYPE_V8HI_V8HI:
36783 case V16SI_FTYPE_V16SI_V16SI:
36784 case V16SF_FTYPE_V16SF_V16SF:
36785 case V16SF_FTYPE_V16SF_V16SI:
36786 case V8QI_FTYPE_V8QI_V8QI:
36787 case V8QI_FTYPE_V4HI_V4HI:
36788 case V8HI_FTYPE_V8HI_V8HI:
36789 case V8HI_FTYPE_V16QI_V16QI:
36790 case V8HI_FTYPE_V4SI_V4SI:
36791 case V8SF_FTYPE_V8SF_V8SF:
36792 case V8SF_FTYPE_V8SF_V8SI:
36793 case V8DI_FTYPE_V8DI_V8DI:
36794 case V8DF_FTYPE_V8DF_V8DF:
36795 case V8DF_FTYPE_V8DF_V8DI:
36796 case V4SI_FTYPE_V4SI_V4SI:
36797 case V4SI_FTYPE_V8HI_V8HI:
36798 case V4SI_FTYPE_V4SF_V4SF:
36799 case V4SI_FTYPE_V2DF_V2DF:
36800 case V4HI_FTYPE_V4HI_V4HI:
36801 case V4HI_FTYPE_V8QI_V8QI:
36802 case V4HI_FTYPE_V2SI_V2SI:
36803 case V4DF_FTYPE_V4DF_V4DF:
36804 case V4DF_FTYPE_V4DF_V4DI:
36805 case V4SF_FTYPE_V4SF_V4SF:
36806 case V4SF_FTYPE_V4SF_V4SI:
36807 case V4SF_FTYPE_V4SF_V2SI:
36808 case V4SF_FTYPE_V4SF_V2DF:
36809 case V4SF_FTYPE_V4SF_UINT:
36810 case V4SF_FTYPE_V4SF_UINT64:
36811 case V4SF_FTYPE_V4SF_DI:
36812 case V4SF_FTYPE_V4SF_SI:
36813 case V2DI_FTYPE_V2DI_V2DI:
36814 case V2DI_FTYPE_V16QI_V16QI:
36815 case V2DI_FTYPE_V4SI_V4SI:
36816 case V2UDI_FTYPE_V4USI_V4USI:
36817 case V2DI_FTYPE_V2DI_V16QI:
36818 case V2DI_FTYPE_V2DF_V2DF:
36819 case V2SI_FTYPE_V2SI_V2SI:
36820 case V2SI_FTYPE_V4HI_V4HI:
36821 case V2SI_FTYPE_V2SF_V2SF:
36822 case V2DF_FTYPE_V2DF_V2DF:
36823 case V2DF_FTYPE_V2DF_V4SF:
36824 case V2DF_FTYPE_V2DF_V2DI:
36825 case V2DF_FTYPE_V2DF_DI:
36826 case V2DF_FTYPE_V2DF_SI:
36827 case V2DF_FTYPE_V2DF_UINT:
36828 case V2DF_FTYPE_V2DF_UINT64:
36829 case V2SF_FTYPE_V2SF_V2SF:
36830 case V1DI_FTYPE_V1DI_V1DI:
36831 case V1DI_FTYPE_V8QI_V8QI:
36832 case V1DI_FTYPE_V2SI_V2SI:
36833 case V32QI_FTYPE_V16HI_V16HI:
36834 case V16HI_FTYPE_V8SI_V8SI:
36835 case V32QI_FTYPE_V32QI_V32QI:
36836 case V16HI_FTYPE_V32QI_V32QI:
36837 case V16HI_FTYPE_V16HI_V16HI:
36838 case V8SI_FTYPE_V4DF_V4DF:
36839 case V8SI_FTYPE_V8SI_V8SI:
36840 case V8SI_FTYPE_V16HI_V16HI:
36841 case V4DI_FTYPE_V4DI_V4DI:
36842 case V4DI_FTYPE_V8SI_V8SI:
36843 case V4UDI_FTYPE_V8USI_V8USI:
36844 case QI_FTYPE_V8DI_V8DI:
36845 case V8DI_FTYPE_V64QI_V64QI:
36846 case HI_FTYPE_V16SI_V16SI:
36847 if (comparison == UNKNOWN)
36848 return ix86_expand_binop_builtin (icode, exp, target);
36849 nargs = 2;
36850 break;
36851 case V4SF_FTYPE_V4SF_V4SF_SWAP:
36852 case V2DF_FTYPE_V2DF_V2DF_SWAP:
36853 gcc_assert (comparison != UNKNOWN);
36854 nargs = 2;
36855 swap = true;
36856 break;
36857 case V16HI_FTYPE_V16HI_V8HI_COUNT:
36858 case V16HI_FTYPE_V16HI_SI_COUNT:
36859 case V8SI_FTYPE_V8SI_V4SI_COUNT:
36860 case V8SI_FTYPE_V8SI_SI_COUNT:
36861 case V4DI_FTYPE_V4DI_V2DI_COUNT:
36862 case V4DI_FTYPE_V4DI_INT_COUNT:
36863 case V8HI_FTYPE_V8HI_V8HI_COUNT:
36864 case V8HI_FTYPE_V8HI_SI_COUNT:
36865 case V4SI_FTYPE_V4SI_V4SI_COUNT:
36866 case V4SI_FTYPE_V4SI_SI_COUNT:
36867 case V4HI_FTYPE_V4HI_V4HI_COUNT:
36868 case V4HI_FTYPE_V4HI_SI_COUNT:
36869 case V2DI_FTYPE_V2DI_V2DI_COUNT:
36870 case V2DI_FTYPE_V2DI_SI_COUNT:
36871 case V2SI_FTYPE_V2SI_V2SI_COUNT:
36872 case V2SI_FTYPE_V2SI_SI_COUNT:
36873 case V1DI_FTYPE_V1DI_V1DI_COUNT:
36874 case V1DI_FTYPE_V1DI_SI_COUNT:
36875 nargs = 2;
36876 last_arg_count = true;
36877 break;
36878 case UINT64_FTYPE_UINT64_UINT64:
36879 case UINT_FTYPE_UINT_UINT:
36880 case UINT_FTYPE_UINT_USHORT:
36881 case UINT_FTYPE_UINT_UCHAR:
36882 case UINT16_FTYPE_UINT16_INT:
36883 case UINT8_FTYPE_UINT8_INT:
36884 case HI_FTYPE_HI_HI:
36885 case SI_FTYPE_SI_SI:
36886 case DI_FTYPE_DI_DI:
36887 case V16SI_FTYPE_V8DF_V8DF:
36888 nargs = 2;
36889 break;
36890 case V2DI_FTYPE_V2DI_INT_CONVERT:
36891 nargs = 2;
36892 rmode = V1TImode;
36893 nargs_constant = 1;
36894 break;
36895 case V4DI_FTYPE_V4DI_INT_CONVERT:
36896 nargs = 2;
36897 rmode = V2TImode;
36898 nargs_constant = 1;
36899 break;
36900 case V8DI_FTYPE_V8DI_INT_CONVERT:
36901 nargs = 2;
36902 rmode = V4TImode;
36903 nargs_constant = 1;
36904 break;
36905 case V8HI_FTYPE_V8HI_INT:
36906 case V8HI_FTYPE_V8SF_INT:
36907 case V16HI_FTYPE_V16SF_INT:
36908 case V8HI_FTYPE_V4SF_INT:
36909 case V8SF_FTYPE_V8SF_INT:
36910 case V4SF_FTYPE_V16SF_INT:
36911 case V16SF_FTYPE_V16SF_INT:
36912 case V4SI_FTYPE_V4SI_INT:
36913 case V4SI_FTYPE_V8SI_INT:
36914 case V4HI_FTYPE_V4HI_INT:
36915 case V4DF_FTYPE_V4DF_INT:
36916 case V4DF_FTYPE_V8DF_INT:
36917 case V4SF_FTYPE_V4SF_INT:
36918 case V4SF_FTYPE_V8SF_INT:
36919 case V2DI_FTYPE_V2DI_INT:
36920 case V2DF_FTYPE_V2DF_INT:
36921 case V2DF_FTYPE_V4DF_INT:
36922 case V16HI_FTYPE_V16HI_INT:
36923 case V8SI_FTYPE_V8SI_INT:
36924 case V16SI_FTYPE_V16SI_INT:
36925 case V4SI_FTYPE_V16SI_INT:
36926 case V4DI_FTYPE_V4DI_INT:
36927 case V2DI_FTYPE_V4DI_INT:
36928 case V4DI_FTYPE_V8DI_INT:
36929 case HI_FTYPE_HI_INT:
36930 case QI_FTYPE_V4SF_INT:
36931 case QI_FTYPE_V2DF_INT:
36932 nargs = 2;
36933 nargs_constant = 1;
36934 break;
36935 case V16QI_FTYPE_V16QI_V16QI_V16QI:
36936 case V8SF_FTYPE_V8SF_V8SF_V8SF:
36937 case V4DF_FTYPE_V4DF_V4DF_V4DF:
36938 case V4SF_FTYPE_V4SF_V4SF_V4SF:
36939 case V2DF_FTYPE_V2DF_V2DF_V2DF:
36940 case V32QI_FTYPE_V32QI_V32QI_V32QI:
36941 case HI_FTYPE_V16SI_V16SI_HI:
36942 case QI_FTYPE_V8DI_V8DI_QI:
36943 case V16HI_FTYPE_V16SI_V16HI_HI:
36944 case V16QI_FTYPE_V16SI_V16QI_HI:
36945 case V16QI_FTYPE_V8DI_V16QI_QI:
36946 case V16SF_FTYPE_V16SF_V16SF_HI:
36947 case V16SF_FTYPE_V16SF_V16SF_V16SF:
36948 case V16SF_FTYPE_V16SF_V16SI_V16SF:
36949 case V16SF_FTYPE_V16SI_V16SF_HI:
36950 case V16SF_FTYPE_V16SI_V16SF_V16SF:
36951 case V16SF_FTYPE_V4SF_V16SF_HI:
36952 case V16SI_FTYPE_SI_V16SI_HI:
36953 case V16SI_FTYPE_V16HI_V16SI_HI:
36954 case V16SI_FTYPE_V16QI_V16SI_HI:
36955 case V16SI_FTYPE_V16SF_V16SI_HI:
36956 case V8SF_FTYPE_V4SF_V8SF_QI:
36957 case V4DF_FTYPE_V2DF_V4DF_QI:
36958 case V8SI_FTYPE_V4SI_V8SI_QI:
36959 case V8SI_FTYPE_SI_V8SI_QI:
36960 case V4SI_FTYPE_V4SI_V4SI_QI:
36961 case V4SI_FTYPE_SI_V4SI_QI:
36962 case V4DI_FTYPE_V2DI_V4DI_QI:
36963 case V4DI_FTYPE_DI_V4DI_QI:
36964 case V2DI_FTYPE_V2DI_V2DI_QI:
36965 case V2DI_FTYPE_DI_V2DI_QI:
36966 case V64QI_FTYPE_V64QI_V64QI_DI:
36967 case V64QI_FTYPE_V16QI_V64QI_DI:
36968 case V64QI_FTYPE_QI_V64QI_DI:
36969 case V32QI_FTYPE_V32QI_V32QI_SI:
36970 case V32QI_FTYPE_V16QI_V32QI_SI:
36971 case V32QI_FTYPE_QI_V32QI_SI:
36972 case V16QI_FTYPE_V16QI_V16QI_HI:
36973 case V16QI_FTYPE_QI_V16QI_HI:
36974 case V32HI_FTYPE_V8HI_V32HI_SI:
36975 case V32HI_FTYPE_HI_V32HI_SI:
36976 case V16HI_FTYPE_V8HI_V16HI_HI:
36977 case V16HI_FTYPE_HI_V16HI_HI:
36978 case V8HI_FTYPE_V8HI_V8HI_QI:
36979 case V8HI_FTYPE_HI_V8HI_QI:
36980 case V8SF_FTYPE_V8HI_V8SF_QI:
36981 case V4SF_FTYPE_V8HI_V4SF_QI:
36982 case V8SI_FTYPE_V8SF_V8SI_QI:
36983 case V4SI_FTYPE_V4SF_V4SI_QI:
36984 case V8DI_FTYPE_V8SF_V8DI_QI:
36985 case V4DI_FTYPE_V4SF_V4DI_QI:
36986 case V2DI_FTYPE_V4SF_V2DI_QI:
36987 case V8SF_FTYPE_V8DI_V8SF_QI:
36988 case V4SF_FTYPE_V4DI_V4SF_QI:
36989 case V4SF_FTYPE_V2DI_V4SF_QI:
36990 case V8DF_FTYPE_V8DI_V8DF_QI:
36991 case V4DF_FTYPE_V4DI_V4DF_QI:
36992 case V2DF_FTYPE_V2DI_V2DF_QI:
36993 case V16QI_FTYPE_V8HI_V16QI_QI:
36994 case V16QI_FTYPE_V16HI_V16QI_HI:
36995 case V16QI_FTYPE_V4SI_V16QI_QI:
36996 case V16QI_FTYPE_V8SI_V16QI_QI:
36997 case V8HI_FTYPE_V4SI_V8HI_QI:
36998 case V8HI_FTYPE_V8SI_V8HI_QI:
36999 case V16QI_FTYPE_V2DI_V16QI_QI:
37000 case V16QI_FTYPE_V4DI_V16QI_QI:
37001 case V8HI_FTYPE_V2DI_V8HI_QI:
37002 case V8HI_FTYPE_V4DI_V8HI_QI:
37003 case V4SI_FTYPE_V2DI_V4SI_QI:
37004 case V4SI_FTYPE_V4DI_V4SI_QI:
37005 case V32QI_FTYPE_V32HI_V32QI_SI:
37006 case HI_FTYPE_V16QI_V16QI_HI:
37007 case SI_FTYPE_V32QI_V32QI_SI:
37008 case DI_FTYPE_V64QI_V64QI_DI:
37009 case QI_FTYPE_V8HI_V8HI_QI:
37010 case HI_FTYPE_V16HI_V16HI_HI:
37011 case SI_FTYPE_V32HI_V32HI_SI:
37012 case QI_FTYPE_V4SI_V4SI_QI:
37013 case QI_FTYPE_V8SI_V8SI_QI:
37014 case QI_FTYPE_V2DI_V2DI_QI:
37015 case QI_FTYPE_V4DI_V4DI_QI:
37016 case V4SF_FTYPE_V2DF_V4SF_QI:
37017 case V4SF_FTYPE_V4DF_V4SF_QI:
37018 case V16SI_FTYPE_V16SI_V16SI_HI:
37019 case V16SI_FTYPE_V16SI_V16SI_V16SI:
37020 case V16SI_FTYPE_V4SI_V16SI_HI:
37021 case V2DI_FTYPE_V2DI_V2DI_V2DI:
37022 case V2DI_FTYPE_V4SI_V2DI_QI:
37023 case V2DI_FTYPE_V8HI_V2DI_QI:
37024 case V2DI_FTYPE_V16QI_V2DI_QI:
37025 case V4DI_FTYPE_V4DI_V4DI_QI:
37026 case V4DI_FTYPE_V4SI_V4DI_QI:
37027 case V4DI_FTYPE_V8HI_V4DI_QI:
37028 case V4DI_FTYPE_V16QI_V4DI_QI:
37029 case V8DI_FTYPE_V8DF_V8DI_QI:
37030 case V4DI_FTYPE_V4DF_V4DI_QI:
37031 case V2DI_FTYPE_V2DF_V2DI_QI:
37032 case V4SI_FTYPE_V4DF_V4SI_QI:
37033 case V4SI_FTYPE_V2DF_V4SI_QI:
37034 case V4SI_FTYPE_V8HI_V4SI_QI:
37035 case V4SI_FTYPE_V16QI_V4SI_QI:
37036 case V8SI_FTYPE_V8SI_V8SI_V8SI:
37037 case V4DI_FTYPE_V4DI_V4DI_V4DI:
37038 case V8DF_FTYPE_V2DF_V8DF_QI:
37039 case V8DF_FTYPE_V4DF_V8DF_QI:
37040 case V8DF_FTYPE_V8DF_V8DF_QI:
37041 case V8DF_FTYPE_V8DF_V8DF_V8DF:
37042 case V8SF_FTYPE_V8SF_V8SF_QI:
37043 case V8SF_FTYPE_V8SI_V8SF_QI:
37044 case V4DF_FTYPE_V4DF_V4DF_QI:
37045 case V4SF_FTYPE_V4SF_V4SF_QI:
37046 case V2DF_FTYPE_V2DF_V2DF_QI:
37047 case V2DF_FTYPE_V4SF_V2DF_QI:
37048 case V2DF_FTYPE_V4SI_V2DF_QI:
37049 case V4SF_FTYPE_V4SI_V4SF_QI:
37050 case V4DF_FTYPE_V4SF_V4DF_QI:
37051 case V4DF_FTYPE_V4SI_V4DF_QI:
37052 case V8SI_FTYPE_V8SI_V8SI_QI:
37053 case V8SI_FTYPE_V8HI_V8SI_QI:
37054 case V8SI_FTYPE_V16QI_V8SI_QI:
37055 case V8DF_FTYPE_V8DF_V8DI_V8DF:
37056 case V8DF_FTYPE_V8DI_V8DF_V8DF:
37057 case V8DF_FTYPE_V8SF_V8DF_QI:
37058 case V8DF_FTYPE_V8SI_V8DF_QI:
37059 case V8DI_FTYPE_DI_V8DI_QI:
37060 case V16SF_FTYPE_V8SF_V16SF_HI:
37061 case V16SI_FTYPE_V8SI_V16SI_HI:
37062 case V16HI_FTYPE_V16HI_V16HI_HI:
37063 case V8HI_FTYPE_V16QI_V8HI_QI:
37064 case V16HI_FTYPE_V16QI_V16HI_HI:
37065 case V32HI_FTYPE_V32HI_V32HI_SI:
37066 case V32HI_FTYPE_V32QI_V32HI_SI:
37067 case V8DI_FTYPE_V16QI_V8DI_QI:
37068 case V8DI_FTYPE_V2DI_V8DI_QI:
37069 case V8DI_FTYPE_V4DI_V8DI_QI:
37070 case V8DI_FTYPE_V8DI_V8DI_QI:
37071 case V8DI_FTYPE_V8DI_V8DI_V8DI:
37072 case V8DI_FTYPE_V8HI_V8DI_QI:
37073 case V8DI_FTYPE_V8SI_V8DI_QI:
37074 case V8HI_FTYPE_V8DI_V8HI_QI:
37075 case V8SF_FTYPE_V8DF_V8SF_QI:
37076 case V8SI_FTYPE_V8DF_V8SI_QI:
37077 case V8SI_FTYPE_V8DI_V8SI_QI:
37078 case V4SI_FTYPE_V4SI_V4SI_V4SI:
37079 nargs = 3;
37080 break;
37081 case V32QI_FTYPE_V32QI_V32QI_INT:
37082 case V16HI_FTYPE_V16HI_V16HI_INT:
37083 case V16QI_FTYPE_V16QI_V16QI_INT:
37084 case V4DI_FTYPE_V4DI_V4DI_INT:
37085 case V8HI_FTYPE_V8HI_V8HI_INT:
37086 case V8SI_FTYPE_V8SI_V8SI_INT:
37087 case V8SI_FTYPE_V8SI_V4SI_INT:
37088 case V8SF_FTYPE_V8SF_V8SF_INT:
37089 case V8SF_FTYPE_V8SF_V4SF_INT:
37090 case V4SI_FTYPE_V4SI_V4SI_INT:
37091 case V4DF_FTYPE_V4DF_V4DF_INT:
37092 case V16SF_FTYPE_V16SF_V16SF_INT:
37093 case V16SF_FTYPE_V16SF_V4SF_INT:
37094 case V16SI_FTYPE_V16SI_V4SI_INT:
37095 case V4DF_FTYPE_V4DF_V2DF_INT:
37096 case V4SF_FTYPE_V4SF_V4SF_INT:
37097 case V2DI_FTYPE_V2DI_V2DI_INT:
37098 case V4DI_FTYPE_V4DI_V2DI_INT:
37099 case V2DF_FTYPE_V2DF_V2DF_INT:
37100 case QI_FTYPE_V8DI_V8DI_INT:
37101 case QI_FTYPE_V8DF_V8DF_INT:
37102 case QI_FTYPE_V2DF_V2DF_INT:
37103 case QI_FTYPE_V4SF_V4SF_INT:
37104 case HI_FTYPE_V16SI_V16SI_INT:
37105 case HI_FTYPE_V16SF_V16SF_INT:
37106 nargs = 3;
37107 nargs_constant = 1;
37108 break;
37109 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
37110 nargs = 3;
37111 rmode = V4DImode;
37112 nargs_constant = 1;
37113 break;
37114 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
37115 nargs = 3;
37116 rmode = V2DImode;
37117 nargs_constant = 1;
37118 break;
37119 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
37120 nargs = 3;
37121 rmode = DImode;
37122 nargs_constant = 1;
37123 break;
37124 case V2DI_FTYPE_V2DI_UINT_UINT:
37125 nargs = 3;
37126 nargs_constant = 2;
37127 break;
37128 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
37129 nargs = 3;
37130 rmode = V8DImode;
37131 nargs_constant = 1;
37132 break;
37133 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
37134 nargs = 5;
37135 rmode = V8DImode;
37136 mask_pos = 2;
37137 nargs_constant = 1;
37138 break;
37139 case QI_FTYPE_V8DF_INT_QI:
37140 case QI_FTYPE_V4DF_INT_QI:
37141 case QI_FTYPE_V2DF_INT_QI:
37142 case HI_FTYPE_V16SF_INT_HI:
37143 case QI_FTYPE_V8SF_INT_QI:
37144 case QI_FTYPE_V4SF_INT_QI:
37145 nargs = 3;
37146 mask_pos = 1;
37147 nargs_constant = 1;
37148 break;
37149 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
37150 nargs = 5;
37151 rmode = V4DImode;
37152 mask_pos = 2;
37153 nargs_constant = 1;
37154 break;
37155 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
37156 nargs = 5;
37157 rmode = V2DImode;
37158 mask_pos = 2;
37159 nargs_constant = 1;
37160 break;
37161 case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
37162 case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
37163 case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
37164 case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
37165 case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
37166 case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
37167 case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
37168 case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
37169 case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
37170 case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
37171 case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
37172 case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
37173 case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
37174 case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
37175 case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
37176 case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
37177 case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
37178 case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
37179 case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
37180 case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
37181 case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
37182 case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
37183 case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
37184 case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
37185 case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
37186 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
37187 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
37188 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
37189 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
37190 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
37191 case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
37192 case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
37193 case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
37194 case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
37195 case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
37196 case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
37197 case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
37198 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
37199 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
37200 case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
37201 case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
37202 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
37203 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
37204 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
37205 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
37206 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
37207 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
37208 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
37209 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
37210 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
37211 case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
37212 case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
37213 case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
37214 case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
37215 nargs = 4;
37216 break;
37217 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
37218 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
37219 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
37220 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
37221 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
37222 nargs = 4;
37223 nargs_constant = 1;
37224 break;
37225 case QI_FTYPE_V4DI_V4DI_INT_QI:
37226 case QI_FTYPE_V8SI_V8SI_INT_QI:
37227 case QI_FTYPE_V4DF_V4DF_INT_QI:
37228 case QI_FTYPE_V8SF_V8SF_INT_QI:
37229 case QI_FTYPE_V2DI_V2DI_INT_QI:
37230 case QI_FTYPE_V4SI_V4SI_INT_QI:
37231 case QI_FTYPE_V2DF_V2DF_INT_QI:
37232 case QI_FTYPE_V4SF_V4SF_INT_QI:
37233 case DI_FTYPE_V64QI_V64QI_INT_DI:
37234 case SI_FTYPE_V32QI_V32QI_INT_SI:
37235 case HI_FTYPE_V16QI_V16QI_INT_HI:
37236 case SI_FTYPE_V32HI_V32HI_INT_SI:
37237 case HI_FTYPE_V16HI_V16HI_INT_HI:
37238 case QI_FTYPE_V8HI_V8HI_INT_QI:
37239 nargs = 4;
37240 mask_pos = 1;
37241 nargs_constant = 1;
37242 break;
37243 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
37244 nargs = 4;
37245 nargs_constant = 2;
37246 break;
37247 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
37248 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
37249 nargs = 4;
37250 break;
37251 case QI_FTYPE_V8DI_V8DI_INT_QI:
37252 case HI_FTYPE_V16SI_V16SI_INT_HI:
37253 case QI_FTYPE_V8DF_V8DF_INT_QI:
37254 case HI_FTYPE_V16SF_V16SF_INT_HI:
37255 mask_pos = 1;
37256 nargs = 4;
37257 nargs_constant = 1;
37258 break;
37259 case V8SF_FTYPE_V8SF_INT_V8SF_QI:
37260 case V4SF_FTYPE_V4SF_INT_V4SF_QI:
37261 case V2DF_FTYPE_V4DF_INT_V2DF_QI:
37262 case V2DI_FTYPE_V4DI_INT_V2DI_QI:
37263 case V8SF_FTYPE_V16SF_INT_V8SF_QI:
37264 case V8SI_FTYPE_V16SI_INT_V8SI_QI:
37265 case V2DF_FTYPE_V8DF_INT_V2DF_QI:
37266 case V2DI_FTYPE_V8DI_INT_V2DI_QI:
37267 case V4SF_FTYPE_V8SF_INT_V4SF_QI:
37268 case V4SI_FTYPE_V8SI_INT_V4SI_QI:
37269 case V8HI_FTYPE_V8SF_INT_V8HI_QI:
37270 case V8HI_FTYPE_V4SF_INT_V8HI_QI:
37271 case V32HI_FTYPE_V32HI_INT_V32HI_SI:
37272 case V16HI_FTYPE_V16HI_INT_V16HI_HI:
37273 case V8HI_FTYPE_V8HI_INT_V8HI_QI:
37274 case V4DI_FTYPE_V4DI_INT_V4DI_QI:
37275 case V2DI_FTYPE_V2DI_INT_V2DI_QI:
37276 case V8SI_FTYPE_V8SI_INT_V8SI_QI:
37277 case V4SI_FTYPE_V4SI_INT_V4SI_QI:
37278 case V4DF_FTYPE_V4DF_INT_V4DF_QI:
37279 case V2DF_FTYPE_V2DF_INT_V2DF_QI:
37280 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
37281 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
37282 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
37283 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
37284 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
37285 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
37286 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
37287 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
37288 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
37289 nargs = 4;
37290 mask_pos = 2;
37291 nargs_constant = 1;
37292 break;
37293 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
37294 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
37295 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
37296 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
37297 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
37298 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
37299 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
37300 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
37301 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
37302 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
37303 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
37304 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
37305 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
37306 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
37307 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
37308 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
37309 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
37310 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
37311 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
37312 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
37313 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
37314 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
37315 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
37316 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
37317 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
37318 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
37319 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
37320 nargs = 5;
37321 mask_pos = 2;
37322 nargs_constant = 1;
37323 break;
37324 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
37325 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
37326 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
37327 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
37328 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
37329 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
37330 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
37331 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
37332 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
37333 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
37334 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
37335 nargs = 5;
37336 nargs = 5;
37337 mask_pos = 1;
37338 nargs_constant = 1;
37339 break;
37341 default:
37342 gcc_unreachable ();
37345 gcc_assert (nargs <= ARRAY_SIZE (args));
37347 if (comparison != UNKNOWN)
37349 gcc_assert (nargs == 2);
37350 return ix86_expand_sse_compare (d, exp, target, swap);
37353 if (rmode == VOIDmode || rmode == tmode)
37355 if (optimize
37356 || target == 0
37357 || GET_MODE (target) != tmode
37358 || !insn_p->operand[0].predicate (target, tmode))
37359 target = gen_reg_rtx (tmode);
37360 real_target = target;
37362 else
37364 real_target = gen_reg_rtx (tmode);
37365 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
37368 for (i = 0; i < nargs; i++)
37370 tree arg = CALL_EXPR_ARG (exp, i);
37371 rtx op = expand_normal (arg);
37372 machine_mode mode = insn_p->operand[i + 1].mode;
37373 bool match = insn_p->operand[i + 1].predicate (op, mode);
37375 if (last_arg_count && (i + 1) == nargs)
37377 /* SIMD shift insns take either an 8-bit immediate or
37378 register as count. But builtin functions take int as
37379 count. If count doesn't match, we put it in register. */
37380 if (!match)
37382 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
37383 if (!insn_p->operand[i + 1].predicate (op, mode))
37384 op = copy_to_reg (op);
37387 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37388 (!mask_pos && (nargs - i) <= nargs_constant))
37390 if (!match)
37391 switch (icode)
37393 case CODE_FOR_avx_vinsertf128v4di:
37394 case CODE_FOR_avx_vextractf128v4di:
37395 error ("the last argument must be an 1-bit immediate");
37396 return const0_rtx;
37398 case CODE_FOR_avx512f_cmpv8di3_mask:
37399 case CODE_FOR_avx512f_cmpv16si3_mask:
37400 case CODE_FOR_avx512f_ucmpv8di3_mask:
37401 case CODE_FOR_avx512f_ucmpv16si3_mask:
37402 case CODE_FOR_avx512vl_cmpv4di3_mask:
37403 case CODE_FOR_avx512vl_cmpv8si3_mask:
37404 case CODE_FOR_avx512vl_ucmpv4di3_mask:
37405 case CODE_FOR_avx512vl_ucmpv8si3_mask:
37406 case CODE_FOR_avx512vl_cmpv2di3_mask:
37407 case CODE_FOR_avx512vl_cmpv4si3_mask:
37408 case CODE_FOR_avx512vl_ucmpv2di3_mask:
37409 case CODE_FOR_avx512vl_ucmpv4si3_mask:
37410 error ("the last argument must be a 3-bit immediate");
37411 return const0_rtx;
37413 case CODE_FOR_sse4_1_roundsd:
37414 case CODE_FOR_sse4_1_roundss:
37416 case CODE_FOR_sse4_1_roundpd:
37417 case CODE_FOR_sse4_1_roundps:
37418 case CODE_FOR_avx_roundpd256:
37419 case CODE_FOR_avx_roundps256:
37421 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
37422 case CODE_FOR_sse4_1_roundps_sfix:
37423 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
37424 case CODE_FOR_avx_roundps_sfix256:
37426 case CODE_FOR_sse4_1_blendps:
37427 case CODE_FOR_avx_blendpd256:
37428 case CODE_FOR_avx_vpermilv4df:
37429 case CODE_FOR_avx_vpermilv4df_mask:
37430 case CODE_FOR_avx512f_getmantv8df_mask:
37431 case CODE_FOR_avx512f_getmantv16sf_mask:
37432 case CODE_FOR_avx512vl_getmantv8sf_mask:
37433 case CODE_FOR_avx512vl_getmantv4df_mask:
37434 case CODE_FOR_avx512vl_getmantv4sf_mask:
37435 case CODE_FOR_avx512vl_getmantv2df_mask:
37436 case CODE_FOR_avx512dq_rangepv8df_mask_round:
37437 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
37438 case CODE_FOR_avx512dq_rangepv4df_mask:
37439 case CODE_FOR_avx512dq_rangepv8sf_mask:
37440 case CODE_FOR_avx512dq_rangepv2df_mask:
37441 case CODE_FOR_avx512dq_rangepv4sf_mask:
37442 case CODE_FOR_avx_shufpd256_mask:
37443 error ("the last argument must be a 4-bit immediate");
37444 return const0_rtx;
37446 case CODE_FOR_sha1rnds4:
37447 case CODE_FOR_sse4_1_blendpd:
37448 case CODE_FOR_avx_vpermilv2df:
37449 case CODE_FOR_avx_vpermilv2df_mask:
37450 case CODE_FOR_xop_vpermil2v2df3:
37451 case CODE_FOR_xop_vpermil2v4sf3:
37452 case CODE_FOR_xop_vpermil2v4df3:
37453 case CODE_FOR_xop_vpermil2v8sf3:
37454 case CODE_FOR_avx512f_vinsertf32x4_mask:
37455 case CODE_FOR_avx512f_vinserti32x4_mask:
37456 case CODE_FOR_avx512f_vextractf32x4_mask:
37457 case CODE_FOR_avx512f_vextracti32x4_mask:
37458 case CODE_FOR_sse2_shufpd:
37459 case CODE_FOR_sse2_shufpd_mask:
37460 case CODE_FOR_avx512dq_shuf_f64x2_mask:
37461 case CODE_FOR_avx512dq_shuf_i64x2_mask:
37462 case CODE_FOR_avx512vl_shuf_i32x4_mask:
37463 case CODE_FOR_avx512vl_shuf_f32x4_mask:
37464 error ("the last argument must be a 2-bit immediate");
37465 return const0_rtx;
37467 case CODE_FOR_avx_vextractf128v4df:
37468 case CODE_FOR_avx_vextractf128v8sf:
37469 case CODE_FOR_avx_vextractf128v8si:
37470 case CODE_FOR_avx_vinsertf128v4df:
37471 case CODE_FOR_avx_vinsertf128v8sf:
37472 case CODE_FOR_avx_vinsertf128v8si:
37473 case CODE_FOR_avx512f_vinsertf64x4_mask:
37474 case CODE_FOR_avx512f_vinserti64x4_mask:
37475 case CODE_FOR_avx512f_vextractf64x4_mask:
37476 case CODE_FOR_avx512f_vextracti64x4_mask:
37477 case CODE_FOR_avx512dq_vinsertf32x8_mask:
37478 case CODE_FOR_avx512dq_vinserti32x8_mask:
37479 case CODE_FOR_avx512vl_vinsertv4df:
37480 case CODE_FOR_avx512vl_vinsertv4di:
37481 case CODE_FOR_avx512vl_vinsertv8sf:
37482 case CODE_FOR_avx512vl_vinsertv8si:
37483 error ("the last argument must be a 1-bit immediate");
37484 return const0_rtx;
37486 case CODE_FOR_avx_vmcmpv2df3:
37487 case CODE_FOR_avx_vmcmpv4sf3:
37488 case CODE_FOR_avx_cmpv2df3:
37489 case CODE_FOR_avx_cmpv4sf3:
37490 case CODE_FOR_avx_cmpv4df3:
37491 case CODE_FOR_avx_cmpv8sf3:
37492 case CODE_FOR_avx512f_cmpv8df3_mask:
37493 case CODE_FOR_avx512f_cmpv16sf3_mask:
37494 case CODE_FOR_avx512f_vmcmpv2df3_mask:
37495 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
37496 error ("the last argument must be a 5-bit immediate");
37497 return const0_rtx;
37499 default:
37500 switch (nargs_constant)
37502 case 2:
37503 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37504 (!mask_pos && (nargs - i) == nargs_constant))
37506 error ("the next to last argument must be an 8-bit immediate");
37507 break;
37509 case 1:
37510 error ("the last argument must be an 8-bit immediate");
37511 break;
37512 default:
37513 gcc_unreachable ();
37515 return const0_rtx;
37518 else
37520 if (VECTOR_MODE_P (mode))
37521 op = safe_vector_operand (op, mode);
37523 /* If we aren't optimizing, only allow one memory operand to
37524 be generated. */
37525 if (memory_operand (op, mode))
37526 num_memory++;
37528 op = fixup_modeless_constant (op, mode);
37530 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37532 if (optimize || !match || num_memory > 1)
37533 op = copy_to_mode_reg (mode, op);
37535 else
37537 op = copy_to_reg (op);
37538 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37542 args[i].op = op;
37543 args[i].mode = mode;
37546 switch (nargs)
37548 case 1:
37549 pat = GEN_FCN (icode) (real_target, args[0].op);
37550 break;
37551 case 2:
37552 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
37553 break;
37554 case 3:
37555 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37556 args[2].op);
37557 break;
37558 case 4:
37559 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37560 args[2].op, args[3].op);
37561 break;
37562 case 5:
37563 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37564 args[2].op, args[3].op, args[4].op);
37565 case 6:
37566 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37567 args[2].op, args[3].op, args[4].op,
37568 args[5].op);
37569 break;
37570 default:
37571 gcc_unreachable ();
37574 if (! pat)
37575 return 0;
37577 emit_insn (pat);
37578 return target;
37581 /* Transform pattern of following layout:
37582 (parallel [
37583 set (A B)
37584 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
37586 into:
37587 (set (A B))
37590 (parallel [ A B
37592 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
37595 into:
37596 (parallel [ A B ... ]) */
37598 static rtx
37599 ix86_erase_embedded_rounding (rtx pat)
37601 if (GET_CODE (pat) == INSN)
37602 pat = PATTERN (pat);
37604 gcc_assert (GET_CODE (pat) == PARALLEL);
37606 if (XVECLEN (pat, 0) == 2)
37608 rtx p0 = XVECEXP (pat, 0, 0);
37609 rtx p1 = XVECEXP (pat, 0, 1);
37611 gcc_assert (GET_CODE (p0) == SET
37612 && GET_CODE (p1) == UNSPEC
37613 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
37615 return p0;
37617 else
37619 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
37620 int i = 0;
37621 int j = 0;
37623 for (; i < XVECLEN (pat, 0); ++i)
37625 rtx elem = XVECEXP (pat, 0, i);
37626 if (GET_CODE (elem) != UNSPEC
37627 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
37628 res [j++] = elem;
37631 /* No more than 1 occurence was removed. */
37632 gcc_assert (j >= XVECLEN (pat, 0) - 1);
37634 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
37638 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
37639 with rounding. */
37640 static rtx
37641 ix86_expand_sse_comi_round (const struct builtin_description *d,
37642 tree exp, rtx target)
37644 rtx pat, set_dst;
37645 tree arg0 = CALL_EXPR_ARG (exp, 0);
37646 tree arg1 = CALL_EXPR_ARG (exp, 1);
37647 tree arg2 = CALL_EXPR_ARG (exp, 2);
37648 tree arg3 = CALL_EXPR_ARG (exp, 3);
37649 rtx op0 = expand_normal (arg0);
37650 rtx op1 = expand_normal (arg1);
37651 rtx op2 = expand_normal (arg2);
37652 rtx op3 = expand_normal (arg3);
37653 enum insn_code icode = d->icode;
37654 const struct insn_data_d *insn_p = &insn_data[icode];
37655 machine_mode mode0 = insn_p->operand[0].mode;
37656 machine_mode mode1 = insn_p->operand[1].mode;
37657 enum rtx_code comparison = UNEQ;
37658 bool need_ucomi = false;
37660 /* See avxintrin.h for values. */
37661 enum rtx_code comi_comparisons[32] =
37663 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
37664 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
37665 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
37667 bool need_ucomi_values[32] =
37669 true, false, false, true, true, false, false, true,
37670 true, false, false, true, true, false, false, true,
37671 false, true, true, false, false, true, true, false,
37672 false, true, true, false, false, true, true, false
37675 if (!CONST_INT_P (op2))
37677 error ("the third argument must be comparison constant");
37678 return const0_rtx;
37680 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
37682 error ("incorrect comparison mode");
37683 return const0_rtx;
37686 if (!insn_p->operand[2].predicate (op3, SImode))
37688 error ("incorrect rounding operand");
37689 return const0_rtx;
37692 comparison = comi_comparisons[INTVAL (op2)];
37693 need_ucomi = need_ucomi_values[INTVAL (op2)];
37695 if (VECTOR_MODE_P (mode0))
37696 op0 = safe_vector_operand (op0, mode0);
37697 if (VECTOR_MODE_P (mode1))
37698 op1 = safe_vector_operand (op1, mode1);
37700 target = gen_reg_rtx (SImode);
37701 emit_move_insn (target, const0_rtx);
37702 target = gen_rtx_SUBREG (QImode, target, 0);
37704 if ((optimize && !register_operand (op0, mode0))
37705 || !insn_p->operand[0].predicate (op0, mode0))
37706 op0 = copy_to_mode_reg (mode0, op0);
37707 if ((optimize && !register_operand (op1, mode1))
37708 || !insn_p->operand[1].predicate (op1, mode1))
37709 op1 = copy_to_mode_reg (mode1, op1);
37711 if (need_ucomi)
37712 icode = icode == CODE_FOR_sse_comi_round
37713 ? CODE_FOR_sse_ucomi_round
37714 : CODE_FOR_sse2_ucomi_round;
37716 pat = GEN_FCN (icode) (op0, op1, op3);
37717 if (! pat)
37718 return 0;
37720 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
37721 if (INTVAL (op3) == NO_ROUND)
37723 pat = ix86_erase_embedded_rounding (pat);
37724 if (! pat)
37725 return 0;
37727 set_dst = SET_DEST (pat);
37729 else
37731 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
37732 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
37735 emit_insn (pat);
37736 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37737 gen_rtx_fmt_ee (comparison, QImode,
37738 set_dst,
37739 const0_rtx)));
37741 return SUBREG_REG (target);
37744 static rtx
37745 ix86_expand_round_builtin (const struct builtin_description *d,
37746 tree exp, rtx target)
37748 rtx pat;
37749 unsigned int i, nargs;
37750 struct
37752 rtx op;
37753 machine_mode mode;
37754 } args[6];
37755 enum insn_code icode = d->icode;
37756 const struct insn_data_d *insn_p = &insn_data[icode];
37757 machine_mode tmode = insn_p->operand[0].mode;
37758 unsigned int nargs_constant = 0;
37759 unsigned int redundant_embed_rnd = 0;
37761 switch ((enum ix86_builtin_func_type) d->flag)
37763 case UINT64_FTYPE_V2DF_INT:
37764 case UINT64_FTYPE_V4SF_INT:
37765 case UINT_FTYPE_V2DF_INT:
37766 case UINT_FTYPE_V4SF_INT:
37767 case INT64_FTYPE_V2DF_INT:
37768 case INT64_FTYPE_V4SF_INT:
37769 case INT_FTYPE_V2DF_INT:
37770 case INT_FTYPE_V4SF_INT:
37771 nargs = 2;
37772 break;
37773 case V4SF_FTYPE_V4SF_UINT_INT:
37774 case V4SF_FTYPE_V4SF_UINT64_INT:
37775 case V2DF_FTYPE_V2DF_UINT64_INT:
37776 case V4SF_FTYPE_V4SF_INT_INT:
37777 case V4SF_FTYPE_V4SF_INT64_INT:
37778 case V2DF_FTYPE_V2DF_INT64_INT:
37779 case V4SF_FTYPE_V4SF_V4SF_INT:
37780 case V2DF_FTYPE_V2DF_V2DF_INT:
37781 case V4SF_FTYPE_V4SF_V2DF_INT:
37782 case V2DF_FTYPE_V2DF_V4SF_INT:
37783 nargs = 3;
37784 break;
37785 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
37786 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
37787 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
37788 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
37789 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
37790 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
37791 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
37792 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
37793 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
37794 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
37795 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
37796 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
37797 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
37798 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
37799 nargs = 4;
37800 break;
37801 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
37802 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
37803 nargs_constant = 2;
37804 nargs = 4;
37805 break;
37806 case INT_FTYPE_V4SF_V4SF_INT_INT:
37807 case INT_FTYPE_V2DF_V2DF_INT_INT:
37808 return ix86_expand_sse_comi_round (d, exp, target);
37809 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
37810 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
37811 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
37812 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
37813 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
37814 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
37815 nargs = 5;
37816 break;
37817 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
37818 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
37819 nargs_constant = 4;
37820 nargs = 5;
37821 break;
37822 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
37823 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
37824 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
37825 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
37826 nargs_constant = 3;
37827 nargs = 5;
37828 break;
37829 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
37830 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
37831 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
37832 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
37833 nargs = 6;
37834 nargs_constant = 4;
37835 break;
37836 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
37837 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
37838 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
37839 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
37840 nargs = 6;
37841 nargs_constant = 3;
37842 break;
37843 default:
37844 gcc_unreachable ();
37846 gcc_assert (nargs <= ARRAY_SIZE (args));
37848 if (optimize
37849 || target == 0
37850 || GET_MODE (target) != tmode
37851 || !insn_p->operand[0].predicate (target, tmode))
37852 target = gen_reg_rtx (tmode);
37854 for (i = 0; i < nargs; i++)
37856 tree arg = CALL_EXPR_ARG (exp, i);
37857 rtx op = expand_normal (arg);
37858 machine_mode mode = insn_p->operand[i + 1].mode;
37859 bool match = insn_p->operand[i + 1].predicate (op, mode);
37861 if (i == nargs - nargs_constant)
37863 if (!match)
37865 switch (icode)
37867 case CODE_FOR_avx512f_getmantv8df_mask_round:
37868 case CODE_FOR_avx512f_getmantv16sf_mask_round:
37869 case CODE_FOR_avx512f_vgetmantv2df_round:
37870 case CODE_FOR_avx512f_vgetmantv4sf_round:
37871 error ("the immediate argument must be a 4-bit immediate");
37872 return const0_rtx;
37873 case CODE_FOR_avx512f_cmpv8df3_mask_round:
37874 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
37875 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
37876 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
37877 error ("the immediate argument must be a 5-bit immediate");
37878 return const0_rtx;
37879 default:
37880 error ("the immediate argument must be an 8-bit immediate");
37881 return const0_rtx;
37885 else if (i == nargs-1)
37887 if (!insn_p->operand[nargs].predicate (op, SImode))
37889 error ("incorrect rounding operand");
37890 return const0_rtx;
37893 /* If there is no rounding use normal version of the pattern. */
37894 if (INTVAL (op) == NO_ROUND)
37895 redundant_embed_rnd = 1;
37897 else
37899 if (VECTOR_MODE_P (mode))
37900 op = safe_vector_operand (op, mode);
37902 op = fixup_modeless_constant (op, mode);
37904 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37906 if (optimize || !match)
37907 op = copy_to_mode_reg (mode, op);
37909 else
37911 op = copy_to_reg (op);
37912 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37916 args[i].op = op;
37917 args[i].mode = mode;
37920 switch (nargs)
37922 case 1:
37923 pat = GEN_FCN (icode) (target, args[0].op);
37924 break;
37925 case 2:
37926 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37927 break;
37928 case 3:
37929 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37930 args[2].op);
37931 break;
37932 case 4:
37933 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37934 args[2].op, args[3].op);
37935 break;
37936 case 5:
37937 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37938 args[2].op, args[3].op, args[4].op);
37939 case 6:
37940 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37941 args[2].op, args[3].op, args[4].op,
37942 args[5].op);
37943 break;
37944 default:
37945 gcc_unreachable ();
37948 if (!pat)
37949 return 0;
37951 if (redundant_embed_rnd)
37952 pat = ix86_erase_embedded_rounding (pat);
37954 emit_insn (pat);
37955 return target;
37958 /* Subroutine of ix86_expand_builtin to take care of special insns
37959 with variable number of operands. */
37961 static rtx
37962 ix86_expand_special_args_builtin (const struct builtin_description *d,
37963 tree exp, rtx target)
37965 tree arg;
37966 rtx pat, op;
37967 unsigned int i, nargs, arg_adjust, memory;
37968 bool aligned_mem = false;
37969 struct
37971 rtx op;
37972 machine_mode mode;
37973 } args[3];
37974 enum insn_code icode = d->icode;
37975 bool last_arg_constant = false;
37976 const struct insn_data_d *insn_p = &insn_data[icode];
37977 machine_mode tmode = insn_p->operand[0].mode;
37978 enum { load, store } klass;
37980 switch ((enum ix86_builtin_func_type) d->flag)
37982 case VOID_FTYPE_VOID:
37983 emit_insn (GEN_FCN (icode) (target));
37984 return 0;
37985 case VOID_FTYPE_UINT64:
37986 case VOID_FTYPE_UNSIGNED:
37987 nargs = 0;
37988 klass = store;
37989 memory = 0;
37990 break;
37992 case INT_FTYPE_VOID:
37993 case USHORT_FTYPE_VOID:
37994 case UINT64_FTYPE_VOID:
37995 case UNSIGNED_FTYPE_VOID:
37996 nargs = 0;
37997 klass = load;
37998 memory = 0;
37999 break;
38000 case UINT64_FTYPE_PUNSIGNED:
38001 case V2DI_FTYPE_PV2DI:
38002 case V4DI_FTYPE_PV4DI:
38003 case V32QI_FTYPE_PCCHAR:
38004 case V16QI_FTYPE_PCCHAR:
38005 case V8SF_FTYPE_PCV4SF:
38006 case V8SF_FTYPE_PCFLOAT:
38007 case V4SF_FTYPE_PCFLOAT:
38008 case V4DF_FTYPE_PCV2DF:
38009 case V4DF_FTYPE_PCDOUBLE:
38010 case V2DF_FTYPE_PCDOUBLE:
38011 case VOID_FTYPE_PVOID:
38012 case V16SI_FTYPE_PV4SI:
38013 case V16SF_FTYPE_PV4SF:
38014 case V8DI_FTYPE_PV4DI:
38015 case V8DI_FTYPE_PV8DI:
38016 case V8DF_FTYPE_PV4DF:
38017 nargs = 1;
38018 klass = load;
38019 memory = 0;
38020 switch (icode)
38022 case CODE_FOR_sse4_1_movntdqa:
38023 case CODE_FOR_avx2_movntdqa:
38024 case CODE_FOR_avx512f_movntdqa:
38025 aligned_mem = true;
38026 break;
38027 default:
38028 break;
38030 break;
38031 case VOID_FTYPE_PV2SF_V4SF:
38032 case VOID_FTYPE_PV8DI_V8DI:
38033 case VOID_FTYPE_PV4DI_V4DI:
38034 case VOID_FTYPE_PV2DI_V2DI:
38035 case VOID_FTYPE_PCHAR_V32QI:
38036 case VOID_FTYPE_PCHAR_V16QI:
38037 case VOID_FTYPE_PFLOAT_V16SF:
38038 case VOID_FTYPE_PFLOAT_V8SF:
38039 case VOID_FTYPE_PFLOAT_V4SF:
38040 case VOID_FTYPE_PDOUBLE_V8DF:
38041 case VOID_FTYPE_PDOUBLE_V4DF:
38042 case VOID_FTYPE_PDOUBLE_V2DF:
38043 case VOID_FTYPE_PLONGLONG_LONGLONG:
38044 case VOID_FTYPE_PULONGLONG_ULONGLONG:
38045 case VOID_FTYPE_PINT_INT:
38046 nargs = 1;
38047 klass = store;
38048 /* Reserve memory operand for target. */
38049 memory = ARRAY_SIZE (args);
38050 switch (icode)
38052 /* These builtins and instructions require the memory
38053 to be properly aligned. */
38054 case CODE_FOR_avx_movntv4di:
38055 case CODE_FOR_sse2_movntv2di:
38056 case CODE_FOR_avx_movntv8sf:
38057 case CODE_FOR_sse_movntv4sf:
38058 case CODE_FOR_sse4a_vmmovntv4sf:
38059 case CODE_FOR_avx_movntv4df:
38060 case CODE_FOR_sse2_movntv2df:
38061 case CODE_FOR_sse4a_vmmovntv2df:
38062 case CODE_FOR_sse2_movntidi:
38063 case CODE_FOR_sse_movntq:
38064 case CODE_FOR_sse2_movntisi:
38065 case CODE_FOR_avx512f_movntv16sf:
38066 case CODE_FOR_avx512f_movntv8df:
38067 case CODE_FOR_avx512f_movntv8di:
38068 aligned_mem = true;
38069 break;
38070 default:
38071 break;
38073 break;
38074 case V4SF_FTYPE_V4SF_PCV2SF:
38075 case V2DF_FTYPE_V2DF_PCDOUBLE:
38076 nargs = 2;
38077 klass = load;
38078 memory = 1;
38079 break;
38080 case V8SF_FTYPE_PCV8SF_V8SI:
38081 case V4DF_FTYPE_PCV4DF_V4DI:
38082 case V4SF_FTYPE_PCV4SF_V4SI:
38083 case V2DF_FTYPE_PCV2DF_V2DI:
38084 case V8SI_FTYPE_PCV8SI_V8SI:
38085 case V4DI_FTYPE_PCV4DI_V4DI:
38086 case V4SI_FTYPE_PCV4SI_V4SI:
38087 case V2DI_FTYPE_PCV2DI_V2DI:
38088 nargs = 2;
38089 klass = load;
38090 memory = 0;
38091 break;
38092 case VOID_FTYPE_PV8DF_V8DF_QI:
38093 case VOID_FTYPE_PV16SF_V16SF_HI:
38094 case VOID_FTYPE_PV8DI_V8DI_QI:
38095 case VOID_FTYPE_PV4DI_V4DI_QI:
38096 case VOID_FTYPE_PV2DI_V2DI_QI:
38097 case VOID_FTYPE_PV16SI_V16SI_HI:
38098 case VOID_FTYPE_PV8SI_V8SI_QI:
38099 case VOID_FTYPE_PV4SI_V4SI_QI:
38100 switch (icode)
38102 /* These builtins and instructions require the memory
38103 to be properly aligned. */
38104 case CODE_FOR_avx512f_storev16sf_mask:
38105 case CODE_FOR_avx512f_storev16si_mask:
38106 case CODE_FOR_avx512f_storev8df_mask:
38107 case CODE_FOR_avx512f_storev8di_mask:
38108 case CODE_FOR_avx512vl_storev8sf_mask:
38109 case CODE_FOR_avx512vl_storev8si_mask:
38110 case CODE_FOR_avx512vl_storev4df_mask:
38111 case CODE_FOR_avx512vl_storev4di_mask:
38112 case CODE_FOR_avx512vl_storev4sf_mask:
38113 case CODE_FOR_avx512vl_storev4si_mask:
38114 case CODE_FOR_avx512vl_storev2df_mask:
38115 case CODE_FOR_avx512vl_storev2di_mask:
38116 aligned_mem = true;
38117 break;
38118 default:
38119 break;
38121 /* FALLTHRU */
38122 case VOID_FTYPE_PV8SF_V8SI_V8SF:
38123 case VOID_FTYPE_PV4DF_V4DI_V4DF:
38124 case VOID_FTYPE_PV4SF_V4SI_V4SF:
38125 case VOID_FTYPE_PV2DF_V2DI_V2DF:
38126 case VOID_FTYPE_PV8SI_V8SI_V8SI:
38127 case VOID_FTYPE_PV4DI_V4DI_V4DI:
38128 case VOID_FTYPE_PV4SI_V4SI_V4SI:
38129 case VOID_FTYPE_PV2DI_V2DI_V2DI:
38130 case VOID_FTYPE_PDOUBLE_V2DF_QI:
38131 case VOID_FTYPE_PFLOAT_V4SF_QI:
38132 case VOID_FTYPE_PV8SI_V8DI_QI:
38133 case VOID_FTYPE_PV8HI_V8DI_QI:
38134 case VOID_FTYPE_PV16HI_V16SI_HI:
38135 case VOID_FTYPE_PV16QI_V8DI_QI:
38136 case VOID_FTYPE_PV16QI_V16SI_HI:
38137 case VOID_FTYPE_PV4SI_V4DI_QI:
38138 case VOID_FTYPE_PV4SI_V2DI_QI:
38139 case VOID_FTYPE_PV8HI_V4DI_QI:
38140 case VOID_FTYPE_PV8HI_V2DI_QI:
38141 case VOID_FTYPE_PV8HI_V8SI_QI:
38142 case VOID_FTYPE_PV8HI_V4SI_QI:
38143 case VOID_FTYPE_PV16QI_V4DI_QI:
38144 case VOID_FTYPE_PV16QI_V2DI_QI:
38145 case VOID_FTYPE_PV16QI_V8SI_QI:
38146 case VOID_FTYPE_PV16QI_V4SI_QI:
38147 case VOID_FTYPE_PV8HI_V8HI_QI:
38148 case VOID_FTYPE_PV16HI_V16HI_HI:
38149 case VOID_FTYPE_PV32HI_V32HI_SI:
38150 case VOID_FTYPE_PV16QI_V16QI_HI:
38151 case VOID_FTYPE_PV32QI_V32QI_SI:
38152 case VOID_FTYPE_PV64QI_V64QI_DI:
38153 case VOID_FTYPE_PV4DF_V4DF_QI:
38154 case VOID_FTYPE_PV2DF_V2DF_QI:
38155 case VOID_FTYPE_PV8SF_V8SF_QI:
38156 case VOID_FTYPE_PV4SF_V4SF_QI:
38157 nargs = 2;
38158 klass = store;
38159 /* Reserve memory operand for target. */
38160 memory = ARRAY_SIZE (args);
38161 break;
38162 case V4SF_FTYPE_PCV4SF_V4SF_QI:
38163 case V8SF_FTYPE_PCV8SF_V8SF_QI:
38164 case V16SF_FTYPE_PCV16SF_V16SF_HI:
38165 case V4SI_FTYPE_PCV4SI_V4SI_QI:
38166 case V8SI_FTYPE_PCV8SI_V8SI_QI:
38167 case V16SI_FTYPE_PCV16SI_V16SI_HI:
38168 case V2DF_FTYPE_PCV2DF_V2DF_QI:
38169 case V4DF_FTYPE_PCV4DF_V4DF_QI:
38170 case V8DF_FTYPE_PCV8DF_V8DF_QI:
38171 case V2DI_FTYPE_PCV2DI_V2DI_QI:
38172 case V4DI_FTYPE_PCV4DI_V4DI_QI:
38173 case V8DI_FTYPE_PCV8DI_V8DI_QI:
38174 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
38175 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
38176 case V8HI_FTYPE_PCV8HI_V8HI_QI:
38177 case V16HI_FTYPE_PCV16HI_V16HI_HI:
38178 case V32HI_FTYPE_PCV32HI_V32HI_SI:
38179 case V16QI_FTYPE_PCV16QI_V16QI_HI:
38180 case V32QI_FTYPE_PCV32QI_V32QI_SI:
38181 case V64QI_FTYPE_PCV64QI_V64QI_DI:
38182 nargs = 3;
38183 klass = load;
38184 memory = 0;
38185 switch (icode)
38187 /* These builtins and instructions require the memory
38188 to be properly aligned. */
38189 case CODE_FOR_avx512f_loadv16sf_mask:
38190 case CODE_FOR_avx512f_loadv16si_mask:
38191 case CODE_FOR_avx512f_loadv8df_mask:
38192 case CODE_FOR_avx512f_loadv8di_mask:
38193 case CODE_FOR_avx512vl_loadv8sf_mask:
38194 case CODE_FOR_avx512vl_loadv8si_mask:
38195 case CODE_FOR_avx512vl_loadv4df_mask:
38196 case CODE_FOR_avx512vl_loadv4di_mask:
38197 case CODE_FOR_avx512vl_loadv4sf_mask:
38198 case CODE_FOR_avx512vl_loadv4si_mask:
38199 case CODE_FOR_avx512vl_loadv2df_mask:
38200 case CODE_FOR_avx512vl_loadv2di_mask:
38201 case CODE_FOR_avx512bw_loadv64qi_mask:
38202 case CODE_FOR_avx512vl_loadv32qi_mask:
38203 case CODE_FOR_avx512vl_loadv16qi_mask:
38204 case CODE_FOR_avx512bw_loadv32hi_mask:
38205 case CODE_FOR_avx512vl_loadv16hi_mask:
38206 case CODE_FOR_avx512vl_loadv8hi_mask:
38207 aligned_mem = true;
38208 break;
38209 default:
38210 break;
38212 break;
38213 case VOID_FTYPE_UINT_UINT_UINT:
38214 case VOID_FTYPE_UINT64_UINT_UINT:
38215 case UCHAR_FTYPE_UINT_UINT_UINT:
38216 case UCHAR_FTYPE_UINT64_UINT_UINT:
38217 nargs = 3;
38218 klass = load;
38219 memory = ARRAY_SIZE (args);
38220 last_arg_constant = true;
38221 break;
38222 default:
38223 gcc_unreachable ();
38226 gcc_assert (nargs <= ARRAY_SIZE (args));
38228 if (klass == store)
38230 arg = CALL_EXPR_ARG (exp, 0);
38231 op = expand_normal (arg);
38232 gcc_assert (target == 0);
38233 if (memory)
38235 op = ix86_zero_extend_to_Pmode (op);
38236 target = gen_rtx_MEM (tmode, op);
38237 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
38238 on it. Try to improve it using get_pointer_alignment,
38239 and if the special builtin is one that requires strict
38240 mode alignment, also from it's GET_MODE_ALIGNMENT.
38241 Failure to do so could lead to ix86_legitimate_combined_insn
38242 rejecting all changes to such insns. */
38243 unsigned int align = get_pointer_alignment (arg);
38244 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
38245 align = GET_MODE_ALIGNMENT (tmode);
38246 if (MEM_ALIGN (target) < align)
38247 set_mem_align (target, align);
38249 else
38250 target = force_reg (tmode, op);
38251 arg_adjust = 1;
38253 else
38255 arg_adjust = 0;
38256 if (optimize
38257 || target == 0
38258 || !register_operand (target, tmode)
38259 || GET_MODE (target) != tmode)
38260 target = gen_reg_rtx (tmode);
38263 for (i = 0; i < nargs; i++)
38265 machine_mode mode = insn_p->operand[i + 1].mode;
38266 bool match;
38268 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
38269 op = expand_normal (arg);
38270 match = insn_p->operand[i + 1].predicate (op, mode);
38272 if (last_arg_constant && (i + 1) == nargs)
38274 if (!match)
38276 if (icode == CODE_FOR_lwp_lwpvalsi3
38277 || icode == CODE_FOR_lwp_lwpinssi3
38278 || icode == CODE_FOR_lwp_lwpvaldi3
38279 || icode == CODE_FOR_lwp_lwpinsdi3)
38280 error ("the last argument must be a 32-bit immediate");
38281 else
38282 error ("the last argument must be an 8-bit immediate");
38283 return const0_rtx;
38286 else
38288 if (i == memory)
38290 /* This must be the memory operand. */
38291 op = ix86_zero_extend_to_Pmode (op);
38292 op = gen_rtx_MEM (mode, op);
38293 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
38294 on it. Try to improve it using get_pointer_alignment,
38295 and if the special builtin is one that requires strict
38296 mode alignment, also from it's GET_MODE_ALIGNMENT.
38297 Failure to do so could lead to ix86_legitimate_combined_insn
38298 rejecting all changes to such insns. */
38299 unsigned int align = get_pointer_alignment (arg);
38300 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
38301 align = GET_MODE_ALIGNMENT (mode);
38302 if (MEM_ALIGN (op) < align)
38303 set_mem_align (op, align);
38305 else
38307 /* This must be register. */
38308 if (VECTOR_MODE_P (mode))
38309 op = safe_vector_operand (op, mode);
38311 op = fixup_modeless_constant (op, mode);
38313 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38314 op = copy_to_mode_reg (mode, op);
38315 else
38317 op = copy_to_reg (op);
38318 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38323 args[i].op = op;
38324 args[i].mode = mode;
38327 switch (nargs)
38329 case 0:
38330 pat = GEN_FCN (icode) (target);
38331 break;
38332 case 1:
38333 pat = GEN_FCN (icode) (target, args[0].op);
38334 break;
38335 case 2:
38336 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38337 break;
38338 case 3:
38339 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
38340 break;
38341 default:
38342 gcc_unreachable ();
38345 if (! pat)
38346 return 0;
38347 emit_insn (pat);
38348 return klass == store ? 0 : target;
38351 /* Return the integer constant in ARG. Constrain it to be in the range
38352 of the subparts of VEC_TYPE; issue an error if not. */
38354 static int
38355 get_element_number (tree vec_type, tree arg)
38357 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
38359 if (!tree_fits_uhwi_p (arg)
38360 || (elt = tree_to_uhwi (arg), elt > max))
38362 error ("selector must be an integer constant in the range 0..%wi", max);
38363 return 0;
38366 return elt;
38369 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38370 ix86_expand_vector_init. We DO have language-level syntax for this, in
38371 the form of (type){ init-list }. Except that since we can't place emms
38372 instructions from inside the compiler, we can't allow the use of MMX
38373 registers unless the user explicitly asks for it. So we do *not* define
38374 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
38375 we have builtins invoked by mmintrin.h that gives us license to emit
38376 these sorts of instructions. */
38378 static rtx
38379 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
38381 machine_mode tmode = TYPE_MODE (type);
38382 machine_mode inner_mode = GET_MODE_INNER (tmode);
38383 int i, n_elt = GET_MODE_NUNITS (tmode);
38384 rtvec v = rtvec_alloc (n_elt);
38386 gcc_assert (VECTOR_MODE_P (tmode));
38387 gcc_assert (call_expr_nargs (exp) == n_elt);
38389 for (i = 0; i < n_elt; ++i)
38391 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
38392 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
38395 if (!target || !register_operand (target, tmode))
38396 target = gen_reg_rtx (tmode);
38398 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
38399 return target;
38402 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38403 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
38404 had a language-level syntax for referencing vector elements. */
38406 static rtx
38407 ix86_expand_vec_ext_builtin (tree exp, rtx target)
38409 machine_mode tmode, mode0;
38410 tree arg0, arg1;
38411 int elt;
38412 rtx op0;
38414 arg0 = CALL_EXPR_ARG (exp, 0);
38415 arg1 = CALL_EXPR_ARG (exp, 1);
38417 op0 = expand_normal (arg0);
38418 elt = get_element_number (TREE_TYPE (arg0), arg1);
38420 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38421 mode0 = TYPE_MODE (TREE_TYPE (arg0));
38422 gcc_assert (VECTOR_MODE_P (mode0));
38424 op0 = force_reg (mode0, op0);
38426 if (optimize || !target || !register_operand (target, tmode))
38427 target = gen_reg_rtx (tmode);
38429 ix86_expand_vector_extract (true, target, op0, elt);
38431 return target;
38434 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38435 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
38436 a language-level syntax for referencing vector elements. */
38438 static rtx
38439 ix86_expand_vec_set_builtin (tree exp)
38441 machine_mode tmode, mode1;
38442 tree arg0, arg1, arg2;
38443 int elt;
38444 rtx op0, op1, target;
38446 arg0 = CALL_EXPR_ARG (exp, 0);
38447 arg1 = CALL_EXPR_ARG (exp, 1);
38448 arg2 = CALL_EXPR_ARG (exp, 2);
38450 tmode = TYPE_MODE (TREE_TYPE (arg0));
38451 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38452 gcc_assert (VECTOR_MODE_P (tmode));
38454 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
38455 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
38456 elt = get_element_number (TREE_TYPE (arg0), arg2);
38458 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
38459 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
38461 op0 = force_reg (tmode, op0);
38462 op1 = force_reg (mode1, op1);
38464 /* OP0 is the source of these builtin functions and shouldn't be
38465 modified. Create a copy, use it and return it as target. */
38466 target = gen_reg_rtx (tmode);
38467 emit_move_insn (target, op0);
38468 ix86_expand_vector_set (true, target, op1, elt);
38470 return target;
38473 /* Emit conditional move of SRC to DST with condition
38474 OP1 CODE OP2. */
38475 static void
38476 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
38478 rtx t;
38480 if (TARGET_CMOVE)
38482 t = ix86_expand_compare (code, op1, op2);
38483 emit_insn (gen_rtx_SET (dst, gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
38484 src, dst)));
38486 else
38488 rtx_code_label *nomove = gen_label_rtx ();
38489 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
38490 const0_rtx, GET_MODE (op1), 1, nomove);
38491 emit_move_insn (dst, src);
38492 emit_label (nomove);
38496 /* Choose max of DST and SRC and put it to DST. */
38497 static void
38498 ix86_emit_move_max (rtx dst, rtx src)
38500 ix86_emit_cmove (dst, src, LTU, dst, src);
38503 /* Expand an expression EXP that calls a built-in function,
38504 with result going to TARGET if that's convenient
38505 (and in mode MODE if that's convenient).
38506 SUBTARGET may be used as the target for computing one of EXP's operands.
38507 IGNORE is nonzero if the value is to be ignored. */
38509 static rtx
38510 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
38511 machine_mode mode, int ignore)
38513 const struct builtin_description *d;
38514 size_t i;
38515 enum insn_code icode;
38516 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
38517 tree arg0, arg1, arg2, arg3, arg4;
38518 rtx op0, op1, op2, op3, op4, pat, insn;
38519 machine_mode mode0, mode1, mode2, mode3, mode4;
38520 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
38522 /* For CPU builtins that can be folded, fold first and expand the fold. */
38523 switch (fcode)
38525 case IX86_BUILTIN_CPU_INIT:
38527 /* Make it call __cpu_indicator_init in libgcc. */
38528 tree call_expr, fndecl, type;
38529 type = build_function_type_list (integer_type_node, NULL_TREE);
38530 fndecl = build_fn_decl ("__cpu_indicator_init", type);
38531 call_expr = build_call_expr (fndecl, 0);
38532 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
38534 case IX86_BUILTIN_CPU_IS:
38535 case IX86_BUILTIN_CPU_SUPPORTS:
38537 tree arg0 = CALL_EXPR_ARG (exp, 0);
38538 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
38539 gcc_assert (fold_expr != NULL_TREE);
38540 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
38544 /* Determine whether the builtin function is available under the current ISA.
38545 Originally the builtin was not created if it wasn't applicable to the
38546 current ISA based on the command line switches. With function specific
38547 options, we need to check in the context of the function making the call
38548 whether it is supported. */
38549 if (ix86_builtins_isa[fcode].isa
38550 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
38552 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
38553 NULL, (enum fpmath_unit) 0, false);
38555 if (!opts)
38556 error ("%qE needs unknown isa option", fndecl);
38557 else
38559 gcc_assert (opts != NULL);
38560 error ("%qE needs isa option %s", fndecl, opts);
38561 free (opts);
38563 return const0_rtx;
38566 switch (fcode)
38568 case IX86_BUILTIN_BNDMK:
38569 if (!target
38570 || GET_MODE (target) != BNDmode
38571 || !register_operand (target, BNDmode))
38572 target = gen_reg_rtx (BNDmode);
38574 arg0 = CALL_EXPR_ARG (exp, 0);
38575 arg1 = CALL_EXPR_ARG (exp, 1);
38577 op0 = expand_normal (arg0);
38578 op1 = expand_normal (arg1);
38580 if (!register_operand (op0, Pmode))
38581 op0 = ix86_zero_extend_to_Pmode (op0);
38582 if (!register_operand (op1, Pmode))
38583 op1 = ix86_zero_extend_to_Pmode (op1);
38585 /* Builtin arg1 is size of block but instruction op1 should
38586 be (size - 1). */
38587 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
38588 NULL_RTX, 1, OPTAB_DIRECT);
38590 emit_insn (BNDmode == BND64mode
38591 ? gen_bnd64_mk (target, op0, op1)
38592 : gen_bnd32_mk (target, op0, op1));
38593 return target;
38595 case IX86_BUILTIN_BNDSTX:
38596 arg0 = CALL_EXPR_ARG (exp, 0);
38597 arg1 = CALL_EXPR_ARG (exp, 1);
38598 arg2 = CALL_EXPR_ARG (exp, 2);
38600 op0 = expand_normal (arg0);
38601 op1 = expand_normal (arg1);
38602 op2 = expand_normal (arg2);
38604 if (!register_operand (op0, Pmode))
38605 op0 = ix86_zero_extend_to_Pmode (op0);
38606 if (!register_operand (op1, BNDmode))
38607 op1 = copy_to_mode_reg (BNDmode, op1);
38608 if (!register_operand (op2, Pmode))
38609 op2 = ix86_zero_extend_to_Pmode (op2);
38611 emit_insn (BNDmode == BND64mode
38612 ? gen_bnd64_stx (op2, op0, op1)
38613 : gen_bnd32_stx (op2, op0, op1));
38614 return 0;
38616 case IX86_BUILTIN_BNDLDX:
38617 if (!target
38618 || GET_MODE (target) != BNDmode
38619 || !register_operand (target, BNDmode))
38620 target = gen_reg_rtx (BNDmode);
38622 arg0 = CALL_EXPR_ARG (exp, 0);
38623 arg1 = CALL_EXPR_ARG (exp, 1);
38625 op0 = expand_normal (arg0);
38626 op1 = expand_normal (arg1);
38628 if (!register_operand (op0, Pmode))
38629 op0 = ix86_zero_extend_to_Pmode (op0);
38630 if (!register_operand (op1, Pmode))
38631 op1 = ix86_zero_extend_to_Pmode (op1);
38633 emit_insn (BNDmode == BND64mode
38634 ? gen_bnd64_ldx (target, op0, op1)
38635 : gen_bnd32_ldx (target, op0, op1));
38636 return target;
38638 case IX86_BUILTIN_BNDCL:
38639 arg0 = CALL_EXPR_ARG (exp, 0);
38640 arg1 = CALL_EXPR_ARG (exp, 1);
38642 op0 = expand_normal (arg0);
38643 op1 = expand_normal (arg1);
38645 if (!register_operand (op0, Pmode))
38646 op0 = ix86_zero_extend_to_Pmode (op0);
38647 if (!register_operand (op1, BNDmode))
38648 op1 = copy_to_mode_reg (BNDmode, op1);
38650 emit_insn (BNDmode == BND64mode
38651 ? gen_bnd64_cl (op1, op0)
38652 : gen_bnd32_cl (op1, op0));
38653 return 0;
38655 case IX86_BUILTIN_BNDCU:
38656 arg0 = CALL_EXPR_ARG (exp, 0);
38657 arg1 = CALL_EXPR_ARG (exp, 1);
38659 op0 = expand_normal (arg0);
38660 op1 = expand_normal (arg1);
38662 if (!register_operand (op0, Pmode))
38663 op0 = ix86_zero_extend_to_Pmode (op0);
38664 if (!register_operand (op1, BNDmode))
38665 op1 = copy_to_mode_reg (BNDmode, op1);
38667 emit_insn (BNDmode == BND64mode
38668 ? gen_bnd64_cu (op1, op0)
38669 : gen_bnd32_cu (op1, op0));
38670 return 0;
38672 case IX86_BUILTIN_BNDRET:
38673 arg0 = CALL_EXPR_ARG (exp, 0);
38674 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
38675 target = chkp_get_rtl_bounds (arg0);
38677 /* If no bounds were specified for returned value,
38678 then use INIT bounds. It usually happens when
38679 some built-in function is expanded. */
38680 if (!target)
38682 rtx t1 = gen_reg_rtx (Pmode);
38683 rtx t2 = gen_reg_rtx (Pmode);
38684 target = gen_reg_rtx (BNDmode);
38685 emit_move_insn (t1, const0_rtx);
38686 emit_move_insn (t2, constm1_rtx);
38687 emit_insn (BNDmode == BND64mode
38688 ? gen_bnd64_mk (target, t1, t2)
38689 : gen_bnd32_mk (target, t1, t2));
38692 gcc_assert (target && REG_P (target));
38693 return target;
38695 case IX86_BUILTIN_BNDNARROW:
38697 rtx m1, m1h1, m1h2, lb, ub, t1;
38699 /* Return value and lb. */
38700 arg0 = CALL_EXPR_ARG (exp, 0);
38701 /* Bounds. */
38702 arg1 = CALL_EXPR_ARG (exp, 1);
38703 /* Size. */
38704 arg2 = CALL_EXPR_ARG (exp, 2);
38706 lb = expand_normal (arg0);
38707 op1 = expand_normal (arg1);
38708 op2 = expand_normal (arg2);
38710 /* Size was passed but we need to use (size - 1) as for bndmk. */
38711 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
38712 NULL_RTX, 1, OPTAB_DIRECT);
38714 /* Add LB to size and inverse to get UB. */
38715 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
38716 op2, 1, OPTAB_DIRECT);
38717 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
38719 if (!register_operand (lb, Pmode))
38720 lb = ix86_zero_extend_to_Pmode (lb);
38721 if (!register_operand (ub, Pmode))
38722 ub = ix86_zero_extend_to_Pmode (ub);
38724 /* We need to move bounds to memory before any computations. */
38725 if (MEM_P (op1))
38726 m1 = op1;
38727 else
38729 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
38730 emit_move_insn (m1, op1);
38733 /* Generate mem expression to be used for access to LB and UB. */
38734 m1h1 = adjust_address (m1, Pmode, 0);
38735 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
38737 t1 = gen_reg_rtx (Pmode);
38739 /* Compute LB. */
38740 emit_move_insn (t1, m1h1);
38741 ix86_emit_move_max (t1, lb);
38742 emit_move_insn (m1h1, t1);
38744 /* Compute UB. UB is stored in 1's complement form. Therefore
38745 we also use max here. */
38746 emit_move_insn (t1, m1h2);
38747 ix86_emit_move_max (t1, ub);
38748 emit_move_insn (m1h2, t1);
38750 op2 = gen_reg_rtx (BNDmode);
38751 emit_move_insn (op2, m1);
38753 return chkp_join_splitted_slot (lb, op2);
38756 case IX86_BUILTIN_BNDINT:
38758 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
38760 if (!target
38761 || GET_MODE (target) != BNDmode
38762 || !register_operand (target, BNDmode))
38763 target = gen_reg_rtx (BNDmode);
38765 arg0 = CALL_EXPR_ARG (exp, 0);
38766 arg1 = CALL_EXPR_ARG (exp, 1);
38768 op0 = expand_normal (arg0);
38769 op1 = expand_normal (arg1);
38771 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
38772 rh1 = adjust_address (res, Pmode, 0);
38773 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
38775 /* Put first bounds to temporaries. */
38776 lb1 = gen_reg_rtx (Pmode);
38777 ub1 = gen_reg_rtx (Pmode);
38778 if (MEM_P (op0))
38780 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
38781 emit_move_insn (ub1, adjust_address (op0, Pmode,
38782 GET_MODE_SIZE (Pmode)));
38784 else
38786 emit_move_insn (res, op0);
38787 emit_move_insn (lb1, rh1);
38788 emit_move_insn (ub1, rh2);
38791 /* Put second bounds to temporaries. */
38792 lb2 = gen_reg_rtx (Pmode);
38793 ub2 = gen_reg_rtx (Pmode);
38794 if (MEM_P (op1))
38796 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
38797 emit_move_insn (ub2, adjust_address (op1, Pmode,
38798 GET_MODE_SIZE (Pmode)));
38800 else
38802 emit_move_insn (res, op1);
38803 emit_move_insn (lb2, rh1);
38804 emit_move_insn (ub2, rh2);
38807 /* Compute LB. */
38808 ix86_emit_move_max (lb1, lb2);
38809 emit_move_insn (rh1, lb1);
38811 /* Compute UB. UB is stored in 1's complement form. Therefore
38812 we also use max here. */
38813 ix86_emit_move_max (ub1, ub2);
38814 emit_move_insn (rh2, ub1);
38816 emit_move_insn (target, res);
38818 return target;
38821 case IX86_BUILTIN_SIZEOF:
38823 tree name;
38824 rtx symbol;
38826 if (!target
38827 || GET_MODE (target) != Pmode
38828 || !register_operand (target, Pmode))
38829 target = gen_reg_rtx (Pmode);
38831 arg0 = CALL_EXPR_ARG (exp, 0);
38832 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
38834 name = DECL_ASSEMBLER_NAME (arg0);
38835 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
38837 emit_insn (Pmode == SImode
38838 ? gen_move_size_reloc_si (target, symbol)
38839 : gen_move_size_reloc_di (target, symbol));
38841 return target;
38844 case IX86_BUILTIN_BNDLOWER:
38846 rtx mem, hmem;
38848 if (!target
38849 || GET_MODE (target) != Pmode
38850 || !register_operand (target, Pmode))
38851 target = gen_reg_rtx (Pmode);
38853 arg0 = CALL_EXPR_ARG (exp, 0);
38854 op0 = expand_normal (arg0);
38856 /* We need to move bounds to memory first. */
38857 if (MEM_P (op0))
38858 mem = op0;
38859 else
38861 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38862 emit_move_insn (mem, op0);
38865 /* Generate mem expression to access LB and load it. */
38866 hmem = adjust_address (mem, Pmode, 0);
38867 emit_move_insn (target, hmem);
38869 return target;
38872 case IX86_BUILTIN_BNDUPPER:
38874 rtx mem, hmem, res;
38876 if (!target
38877 || GET_MODE (target) != Pmode
38878 || !register_operand (target, Pmode))
38879 target = gen_reg_rtx (Pmode);
38881 arg0 = CALL_EXPR_ARG (exp, 0);
38882 op0 = expand_normal (arg0);
38884 /* We need to move bounds to memory first. */
38885 if (MEM_P (op0))
38886 mem = op0;
38887 else
38889 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38890 emit_move_insn (mem, op0);
38893 /* Generate mem expression to access UB. */
38894 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
38896 /* We need to inverse all bits of UB. */
38897 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
38899 if (res != target)
38900 emit_move_insn (target, res);
38902 return target;
38905 case IX86_BUILTIN_MASKMOVQ:
38906 case IX86_BUILTIN_MASKMOVDQU:
38907 icode = (fcode == IX86_BUILTIN_MASKMOVQ
38908 ? CODE_FOR_mmx_maskmovq
38909 : CODE_FOR_sse2_maskmovdqu);
38910 /* Note the arg order is different from the operand order. */
38911 arg1 = CALL_EXPR_ARG (exp, 0);
38912 arg2 = CALL_EXPR_ARG (exp, 1);
38913 arg0 = CALL_EXPR_ARG (exp, 2);
38914 op0 = expand_normal (arg0);
38915 op1 = expand_normal (arg1);
38916 op2 = expand_normal (arg2);
38917 mode0 = insn_data[icode].operand[0].mode;
38918 mode1 = insn_data[icode].operand[1].mode;
38919 mode2 = insn_data[icode].operand[2].mode;
38921 op0 = ix86_zero_extend_to_Pmode (op0);
38922 op0 = gen_rtx_MEM (mode1, op0);
38924 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38925 op0 = copy_to_mode_reg (mode0, op0);
38926 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38927 op1 = copy_to_mode_reg (mode1, op1);
38928 if (!insn_data[icode].operand[2].predicate (op2, mode2))
38929 op2 = copy_to_mode_reg (mode2, op2);
38930 pat = GEN_FCN (icode) (op0, op1, op2);
38931 if (! pat)
38932 return 0;
38933 emit_insn (pat);
38934 return 0;
38936 case IX86_BUILTIN_LDMXCSR:
38937 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
38938 target = assign_386_stack_local (SImode, SLOT_TEMP);
38939 emit_move_insn (target, op0);
38940 emit_insn (gen_sse_ldmxcsr (target));
38941 return 0;
38943 case IX86_BUILTIN_STMXCSR:
38944 target = assign_386_stack_local (SImode, SLOT_TEMP);
38945 emit_insn (gen_sse_stmxcsr (target));
38946 return copy_to_mode_reg (SImode, target);
38948 case IX86_BUILTIN_CLFLUSH:
38949 arg0 = CALL_EXPR_ARG (exp, 0);
38950 op0 = expand_normal (arg0);
38951 icode = CODE_FOR_sse2_clflush;
38952 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38953 op0 = ix86_zero_extend_to_Pmode (op0);
38955 emit_insn (gen_sse2_clflush (op0));
38956 return 0;
38958 case IX86_BUILTIN_CLWB:
38959 arg0 = CALL_EXPR_ARG (exp, 0);
38960 op0 = expand_normal (arg0);
38961 icode = CODE_FOR_clwb;
38962 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38963 op0 = ix86_zero_extend_to_Pmode (op0);
38965 emit_insn (gen_clwb (op0));
38966 return 0;
38968 case IX86_BUILTIN_CLFLUSHOPT:
38969 arg0 = CALL_EXPR_ARG (exp, 0);
38970 op0 = expand_normal (arg0);
38971 icode = CODE_FOR_clflushopt;
38972 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38973 op0 = ix86_zero_extend_to_Pmode (op0);
38975 emit_insn (gen_clflushopt (op0));
38976 return 0;
38978 case IX86_BUILTIN_MONITOR:
38979 case IX86_BUILTIN_MONITORX:
38980 arg0 = CALL_EXPR_ARG (exp, 0);
38981 arg1 = CALL_EXPR_ARG (exp, 1);
38982 arg2 = CALL_EXPR_ARG (exp, 2);
38983 op0 = expand_normal (arg0);
38984 op1 = expand_normal (arg1);
38985 op2 = expand_normal (arg2);
38986 if (!REG_P (op0))
38987 op0 = ix86_zero_extend_to_Pmode (op0);
38988 if (!REG_P (op1))
38989 op1 = copy_to_mode_reg (SImode, op1);
38990 if (!REG_P (op2))
38991 op2 = copy_to_mode_reg (SImode, op2);
38993 emit_insn (fcode == IX86_BUILTIN_MONITOR
38994 ? ix86_gen_monitor (op0, op1, op2)
38995 : ix86_gen_monitorx (op0, op1, op2));
38996 return 0;
38998 case IX86_BUILTIN_MWAIT:
38999 arg0 = CALL_EXPR_ARG (exp, 0);
39000 arg1 = CALL_EXPR_ARG (exp, 1);
39001 op0 = expand_normal (arg0);
39002 op1 = expand_normal (arg1);
39003 if (!REG_P (op0))
39004 op0 = copy_to_mode_reg (SImode, op0);
39005 if (!REG_P (op1))
39006 op1 = copy_to_mode_reg (SImode, op1);
39007 emit_insn (gen_sse3_mwait (op0, op1));
39008 return 0;
39010 case IX86_BUILTIN_MWAITX:
39011 arg0 = CALL_EXPR_ARG (exp, 0);
39012 arg1 = CALL_EXPR_ARG (exp, 1);
39013 arg2 = CALL_EXPR_ARG (exp, 2);
39014 op0 = expand_normal (arg0);
39015 op1 = expand_normal (arg1);
39016 op2 = expand_normal (arg2);
39017 if (!REG_P (op0))
39018 op0 = copy_to_mode_reg (SImode, op0);
39019 if (!REG_P (op1))
39020 op1 = copy_to_mode_reg (SImode, op1);
39021 if (!REG_P (op2))
39022 op2 = copy_to_mode_reg (SImode, op2);
39023 emit_insn (gen_mwaitx (op0, op1, op2));
39024 return 0;
39026 case IX86_BUILTIN_VEC_INIT_V2SI:
39027 case IX86_BUILTIN_VEC_INIT_V4HI:
39028 case IX86_BUILTIN_VEC_INIT_V8QI:
39029 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
39031 case IX86_BUILTIN_VEC_EXT_V2DF:
39032 case IX86_BUILTIN_VEC_EXT_V2DI:
39033 case IX86_BUILTIN_VEC_EXT_V4SF:
39034 case IX86_BUILTIN_VEC_EXT_V4SI:
39035 case IX86_BUILTIN_VEC_EXT_V8HI:
39036 case IX86_BUILTIN_VEC_EXT_V2SI:
39037 case IX86_BUILTIN_VEC_EXT_V4HI:
39038 case IX86_BUILTIN_VEC_EXT_V16QI:
39039 return ix86_expand_vec_ext_builtin (exp, target);
39041 case IX86_BUILTIN_VEC_SET_V2DI:
39042 case IX86_BUILTIN_VEC_SET_V4SF:
39043 case IX86_BUILTIN_VEC_SET_V4SI:
39044 case IX86_BUILTIN_VEC_SET_V8HI:
39045 case IX86_BUILTIN_VEC_SET_V4HI:
39046 case IX86_BUILTIN_VEC_SET_V16QI:
39047 return ix86_expand_vec_set_builtin (exp);
39049 case IX86_BUILTIN_INFQ:
39050 case IX86_BUILTIN_HUGE_VALQ:
39052 REAL_VALUE_TYPE inf;
39053 rtx tmp;
39055 real_inf (&inf);
39056 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
39058 tmp = validize_mem (force_const_mem (mode, tmp));
39060 if (target == 0)
39061 target = gen_reg_rtx (mode);
39063 emit_move_insn (target, tmp);
39064 return target;
39067 case IX86_BUILTIN_RDPMC:
39068 case IX86_BUILTIN_RDTSC:
39069 case IX86_BUILTIN_RDTSCP:
39071 op0 = gen_reg_rtx (DImode);
39072 op1 = gen_reg_rtx (DImode);
39074 if (fcode == IX86_BUILTIN_RDPMC)
39076 arg0 = CALL_EXPR_ARG (exp, 0);
39077 op2 = expand_normal (arg0);
39078 if (!register_operand (op2, SImode))
39079 op2 = copy_to_mode_reg (SImode, op2);
39081 insn = (TARGET_64BIT
39082 ? gen_rdpmc_rex64 (op0, op1, op2)
39083 : gen_rdpmc (op0, op2));
39084 emit_insn (insn);
39086 else if (fcode == IX86_BUILTIN_RDTSC)
39088 insn = (TARGET_64BIT
39089 ? gen_rdtsc_rex64 (op0, op1)
39090 : gen_rdtsc (op0));
39091 emit_insn (insn);
39093 else
39095 op2 = gen_reg_rtx (SImode);
39097 insn = (TARGET_64BIT
39098 ? gen_rdtscp_rex64 (op0, op1, op2)
39099 : gen_rdtscp (op0, op2));
39100 emit_insn (insn);
39102 arg0 = CALL_EXPR_ARG (exp, 0);
39103 op4 = expand_normal (arg0);
39104 if (!address_operand (op4, VOIDmode))
39106 op4 = convert_memory_address (Pmode, op4);
39107 op4 = copy_addr_to_reg (op4);
39109 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
39112 if (target == 0)
39114 /* mode is VOIDmode if __builtin_rd* has been called
39115 without lhs. */
39116 if (mode == VOIDmode)
39117 return target;
39118 target = gen_reg_rtx (mode);
39121 if (TARGET_64BIT)
39123 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
39124 op1, 1, OPTAB_DIRECT);
39125 op0 = expand_simple_binop (DImode, IOR, op0, op1,
39126 op0, 1, OPTAB_DIRECT);
39129 emit_move_insn (target, op0);
39130 return target;
39132 case IX86_BUILTIN_FXSAVE:
39133 case IX86_BUILTIN_FXRSTOR:
39134 case IX86_BUILTIN_FXSAVE64:
39135 case IX86_BUILTIN_FXRSTOR64:
39136 case IX86_BUILTIN_FNSTENV:
39137 case IX86_BUILTIN_FLDENV:
39138 mode0 = BLKmode;
39139 switch (fcode)
39141 case IX86_BUILTIN_FXSAVE:
39142 icode = CODE_FOR_fxsave;
39143 break;
39144 case IX86_BUILTIN_FXRSTOR:
39145 icode = CODE_FOR_fxrstor;
39146 break;
39147 case IX86_BUILTIN_FXSAVE64:
39148 icode = CODE_FOR_fxsave64;
39149 break;
39150 case IX86_BUILTIN_FXRSTOR64:
39151 icode = CODE_FOR_fxrstor64;
39152 break;
39153 case IX86_BUILTIN_FNSTENV:
39154 icode = CODE_FOR_fnstenv;
39155 break;
39156 case IX86_BUILTIN_FLDENV:
39157 icode = CODE_FOR_fldenv;
39158 break;
39159 default:
39160 gcc_unreachable ();
39163 arg0 = CALL_EXPR_ARG (exp, 0);
39164 op0 = expand_normal (arg0);
39166 if (!address_operand (op0, VOIDmode))
39168 op0 = convert_memory_address (Pmode, op0);
39169 op0 = copy_addr_to_reg (op0);
39171 op0 = gen_rtx_MEM (mode0, op0);
39173 pat = GEN_FCN (icode) (op0);
39174 if (pat)
39175 emit_insn (pat);
39176 return 0;
39178 case IX86_BUILTIN_XSAVE:
39179 case IX86_BUILTIN_XRSTOR:
39180 case IX86_BUILTIN_XSAVE64:
39181 case IX86_BUILTIN_XRSTOR64:
39182 case IX86_BUILTIN_XSAVEOPT:
39183 case IX86_BUILTIN_XSAVEOPT64:
39184 case IX86_BUILTIN_XSAVES:
39185 case IX86_BUILTIN_XRSTORS:
39186 case IX86_BUILTIN_XSAVES64:
39187 case IX86_BUILTIN_XRSTORS64:
39188 case IX86_BUILTIN_XSAVEC:
39189 case IX86_BUILTIN_XSAVEC64:
39190 arg0 = CALL_EXPR_ARG (exp, 0);
39191 arg1 = CALL_EXPR_ARG (exp, 1);
39192 op0 = expand_normal (arg0);
39193 op1 = expand_normal (arg1);
39195 if (!address_operand (op0, VOIDmode))
39197 op0 = convert_memory_address (Pmode, op0);
39198 op0 = copy_addr_to_reg (op0);
39200 op0 = gen_rtx_MEM (BLKmode, op0);
39202 op1 = force_reg (DImode, op1);
39204 if (TARGET_64BIT)
39206 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
39207 NULL, 1, OPTAB_DIRECT);
39208 switch (fcode)
39210 case IX86_BUILTIN_XSAVE:
39211 icode = CODE_FOR_xsave_rex64;
39212 break;
39213 case IX86_BUILTIN_XRSTOR:
39214 icode = CODE_FOR_xrstor_rex64;
39215 break;
39216 case IX86_BUILTIN_XSAVE64:
39217 icode = CODE_FOR_xsave64;
39218 break;
39219 case IX86_BUILTIN_XRSTOR64:
39220 icode = CODE_FOR_xrstor64;
39221 break;
39222 case IX86_BUILTIN_XSAVEOPT:
39223 icode = CODE_FOR_xsaveopt_rex64;
39224 break;
39225 case IX86_BUILTIN_XSAVEOPT64:
39226 icode = CODE_FOR_xsaveopt64;
39227 break;
39228 case IX86_BUILTIN_XSAVES:
39229 icode = CODE_FOR_xsaves_rex64;
39230 break;
39231 case IX86_BUILTIN_XRSTORS:
39232 icode = CODE_FOR_xrstors_rex64;
39233 break;
39234 case IX86_BUILTIN_XSAVES64:
39235 icode = CODE_FOR_xsaves64;
39236 break;
39237 case IX86_BUILTIN_XRSTORS64:
39238 icode = CODE_FOR_xrstors64;
39239 break;
39240 case IX86_BUILTIN_XSAVEC:
39241 icode = CODE_FOR_xsavec_rex64;
39242 break;
39243 case IX86_BUILTIN_XSAVEC64:
39244 icode = CODE_FOR_xsavec64;
39245 break;
39246 default:
39247 gcc_unreachable ();
39250 op2 = gen_lowpart (SImode, op2);
39251 op1 = gen_lowpart (SImode, op1);
39252 pat = GEN_FCN (icode) (op0, op1, op2);
39254 else
39256 switch (fcode)
39258 case IX86_BUILTIN_XSAVE:
39259 icode = CODE_FOR_xsave;
39260 break;
39261 case IX86_BUILTIN_XRSTOR:
39262 icode = CODE_FOR_xrstor;
39263 break;
39264 case IX86_BUILTIN_XSAVEOPT:
39265 icode = CODE_FOR_xsaveopt;
39266 break;
39267 case IX86_BUILTIN_XSAVES:
39268 icode = CODE_FOR_xsaves;
39269 break;
39270 case IX86_BUILTIN_XRSTORS:
39271 icode = CODE_FOR_xrstors;
39272 break;
39273 case IX86_BUILTIN_XSAVEC:
39274 icode = CODE_FOR_xsavec;
39275 break;
39276 default:
39277 gcc_unreachable ();
39279 pat = GEN_FCN (icode) (op0, op1);
39282 if (pat)
39283 emit_insn (pat);
39284 return 0;
39286 case IX86_BUILTIN_LLWPCB:
39287 arg0 = CALL_EXPR_ARG (exp, 0);
39288 op0 = expand_normal (arg0);
39289 icode = CODE_FOR_lwp_llwpcb;
39290 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39291 op0 = ix86_zero_extend_to_Pmode (op0);
39292 emit_insn (gen_lwp_llwpcb (op0));
39293 return 0;
39295 case IX86_BUILTIN_SLWPCB:
39296 icode = CODE_FOR_lwp_slwpcb;
39297 if (!target
39298 || !insn_data[icode].operand[0].predicate (target, Pmode))
39299 target = gen_reg_rtx (Pmode);
39300 emit_insn (gen_lwp_slwpcb (target));
39301 return target;
39303 case IX86_BUILTIN_BEXTRI32:
39304 case IX86_BUILTIN_BEXTRI64:
39305 arg0 = CALL_EXPR_ARG (exp, 0);
39306 arg1 = CALL_EXPR_ARG (exp, 1);
39307 op0 = expand_normal (arg0);
39308 op1 = expand_normal (arg1);
39309 icode = (fcode == IX86_BUILTIN_BEXTRI32
39310 ? CODE_FOR_tbm_bextri_si
39311 : CODE_FOR_tbm_bextri_di);
39312 if (!CONST_INT_P (op1))
39314 error ("last argument must be an immediate");
39315 return const0_rtx;
39317 else
39319 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
39320 unsigned char lsb_index = INTVAL (op1) & 0xFF;
39321 op1 = GEN_INT (length);
39322 op2 = GEN_INT (lsb_index);
39323 pat = GEN_FCN (icode) (target, op0, op1, op2);
39324 if (pat)
39325 emit_insn (pat);
39326 return target;
39329 case IX86_BUILTIN_RDRAND16_STEP:
39330 icode = CODE_FOR_rdrandhi_1;
39331 mode0 = HImode;
39332 goto rdrand_step;
39334 case IX86_BUILTIN_RDRAND32_STEP:
39335 icode = CODE_FOR_rdrandsi_1;
39336 mode0 = SImode;
39337 goto rdrand_step;
39339 case IX86_BUILTIN_RDRAND64_STEP:
39340 icode = CODE_FOR_rdranddi_1;
39341 mode0 = DImode;
39343 rdrand_step:
39344 op0 = gen_reg_rtx (mode0);
39345 emit_insn (GEN_FCN (icode) (op0));
39347 arg0 = CALL_EXPR_ARG (exp, 0);
39348 op1 = expand_normal (arg0);
39349 if (!address_operand (op1, VOIDmode))
39351 op1 = convert_memory_address (Pmode, op1);
39352 op1 = copy_addr_to_reg (op1);
39354 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39356 op1 = gen_reg_rtx (SImode);
39357 emit_move_insn (op1, CONST1_RTX (SImode));
39359 /* Emit SImode conditional move. */
39360 if (mode0 == HImode)
39362 op2 = gen_reg_rtx (SImode);
39363 emit_insn (gen_zero_extendhisi2 (op2, op0));
39365 else if (mode0 == SImode)
39366 op2 = op0;
39367 else
39368 op2 = gen_rtx_SUBREG (SImode, op0, 0);
39370 if (target == 0
39371 || !register_operand (target, SImode))
39372 target = gen_reg_rtx (SImode);
39374 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
39375 const0_rtx);
39376 emit_insn (gen_rtx_SET (target,
39377 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
39378 return target;
39380 case IX86_BUILTIN_RDSEED16_STEP:
39381 icode = CODE_FOR_rdseedhi_1;
39382 mode0 = HImode;
39383 goto rdseed_step;
39385 case IX86_BUILTIN_RDSEED32_STEP:
39386 icode = CODE_FOR_rdseedsi_1;
39387 mode0 = SImode;
39388 goto rdseed_step;
39390 case IX86_BUILTIN_RDSEED64_STEP:
39391 icode = CODE_FOR_rdseeddi_1;
39392 mode0 = DImode;
39394 rdseed_step:
39395 op0 = gen_reg_rtx (mode0);
39396 emit_insn (GEN_FCN (icode) (op0));
39398 arg0 = CALL_EXPR_ARG (exp, 0);
39399 op1 = expand_normal (arg0);
39400 if (!address_operand (op1, VOIDmode))
39402 op1 = convert_memory_address (Pmode, op1);
39403 op1 = copy_addr_to_reg (op1);
39405 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39407 op2 = gen_reg_rtx (QImode);
39409 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
39410 const0_rtx);
39411 emit_insn (gen_rtx_SET (op2, pat));
39413 if (target == 0
39414 || !register_operand (target, SImode))
39415 target = gen_reg_rtx (SImode);
39417 emit_insn (gen_zero_extendqisi2 (target, op2));
39418 return target;
39420 case IX86_BUILTIN_SBB32:
39421 icode = CODE_FOR_subsi3_carry;
39422 mode0 = SImode;
39423 goto addcarryx;
39425 case IX86_BUILTIN_SBB64:
39426 icode = CODE_FOR_subdi3_carry;
39427 mode0 = DImode;
39428 goto addcarryx;
39430 case IX86_BUILTIN_ADDCARRYX32:
39431 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
39432 mode0 = SImode;
39433 goto addcarryx;
39435 case IX86_BUILTIN_ADDCARRYX64:
39436 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
39437 mode0 = DImode;
39439 addcarryx:
39440 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
39441 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
39442 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
39443 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
39445 op0 = gen_reg_rtx (QImode);
39447 /* Generate CF from input operand. */
39448 op1 = expand_normal (arg0);
39449 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
39450 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
39452 /* Gen ADCX instruction to compute X+Y+CF. */
39453 op2 = expand_normal (arg1);
39454 op3 = expand_normal (arg2);
39456 if (!REG_P (op2))
39457 op2 = copy_to_mode_reg (mode0, op2);
39458 if (!REG_P (op3))
39459 op3 = copy_to_mode_reg (mode0, op3);
39461 op0 = gen_reg_rtx (mode0);
39463 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
39464 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
39465 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
39467 /* Store the result. */
39468 op4 = expand_normal (arg3);
39469 if (!address_operand (op4, VOIDmode))
39471 op4 = convert_memory_address (Pmode, op4);
39472 op4 = copy_addr_to_reg (op4);
39474 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
39476 /* Return current CF value. */
39477 if (target == 0)
39478 target = gen_reg_rtx (QImode);
39480 PUT_MODE (pat, QImode);
39481 emit_insn (gen_rtx_SET (target, pat));
39482 return target;
39484 case IX86_BUILTIN_READ_FLAGS:
39485 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
39487 if (optimize
39488 || target == NULL_RTX
39489 || !nonimmediate_operand (target, word_mode)
39490 || GET_MODE (target) != word_mode)
39491 target = gen_reg_rtx (word_mode);
39493 emit_insn (gen_pop (target));
39494 return target;
39496 case IX86_BUILTIN_WRITE_FLAGS:
39498 arg0 = CALL_EXPR_ARG (exp, 0);
39499 op0 = expand_normal (arg0);
39500 if (!general_no_elim_operand (op0, word_mode))
39501 op0 = copy_to_mode_reg (word_mode, op0);
39503 emit_insn (gen_push (op0));
39504 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
39505 return 0;
39507 case IX86_BUILTIN_KORTESTC16:
39508 icode = CODE_FOR_kortestchi;
39509 mode0 = HImode;
39510 mode1 = CCCmode;
39511 goto kortest;
39513 case IX86_BUILTIN_KORTESTZ16:
39514 icode = CODE_FOR_kortestzhi;
39515 mode0 = HImode;
39516 mode1 = CCZmode;
39518 kortest:
39519 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
39520 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
39521 op0 = expand_normal (arg0);
39522 op1 = expand_normal (arg1);
39524 op0 = copy_to_reg (op0);
39525 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39526 op1 = copy_to_reg (op1);
39527 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
39529 target = gen_reg_rtx (QImode);
39530 emit_insn (gen_rtx_SET (target, const0_rtx));
39532 /* Emit kortest. */
39533 emit_insn (GEN_FCN (icode) (op0, op1));
39534 /* And use setcc to return result from flags. */
39535 ix86_expand_setcc (target, EQ,
39536 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
39537 return target;
39539 case IX86_BUILTIN_GATHERSIV2DF:
39540 icode = CODE_FOR_avx2_gathersiv2df;
39541 goto gather_gen;
39542 case IX86_BUILTIN_GATHERSIV4DF:
39543 icode = CODE_FOR_avx2_gathersiv4df;
39544 goto gather_gen;
39545 case IX86_BUILTIN_GATHERDIV2DF:
39546 icode = CODE_FOR_avx2_gatherdiv2df;
39547 goto gather_gen;
39548 case IX86_BUILTIN_GATHERDIV4DF:
39549 icode = CODE_FOR_avx2_gatherdiv4df;
39550 goto gather_gen;
39551 case IX86_BUILTIN_GATHERSIV4SF:
39552 icode = CODE_FOR_avx2_gathersiv4sf;
39553 goto gather_gen;
39554 case IX86_BUILTIN_GATHERSIV8SF:
39555 icode = CODE_FOR_avx2_gathersiv8sf;
39556 goto gather_gen;
39557 case IX86_BUILTIN_GATHERDIV4SF:
39558 icode = CODE_FOR_avx2_gatherdiv4sf;
39559 goto gather_gen;
39560 case IX86_BUILTIN_GATHERDIV8SF:
39561 icode = CODE_FOR_avx2_gatherdiv8sf;
39562 goto gather_gen;
39563 case IX86_BUILTIN_GATHERSIV2DI:
39564 icode = CODE_FOR_avx2_gathersiv2di;
39565 goto gather_gen;
39566 case IX86_BUILTIN_GATHERSIV4DI:
39567 icode = CODE_FOR_avx2_gathersiv4di;
39568 goto gather_gen;
39569 case IX86_BUILTIN_GATHERDIV2DI:
39570 icode = CODE_FOR_avx2_gatherdiv2di;
39571 goto gather_gen;
39572 case IX86_BUILTIN_GATHERDIV4DI:
39573 icode = CODE_FOR_avx2_gatherdiv4di;
39574 goto gather_gen;
39575 case IX86_BUILTIN_GATHERSIV4SI:
39576 icode = CODE_FOR_avx2_gathersiv4si;
39577 goto gather_gen;
39578 case IX86_BUILTIN_GATHERSIV8SI:
39579 icode = CODE_FOR_avx2_gathersiv8si;
39580 goto gather_gen;
39581 case IX86_BUILTIN_GATHERDIV4SI:
39582 icode = CODE_FOR_avx2_gatherdiv4si;
39583 goto gather_gen;
39584 case IX86_BUILTIN_GATHERDIV8SI:
39585 icode = CODE_FOR_avx2_gatherdiv8si;
39586 goto gather_gen;
39587 case IX86_BUILTIN_GATHERALTSIV4DF:
39588 icode = CODE_FOR_avx2_gathersiv4df;
39589 goto gather_gen;
39590 case IX86_BUILTIN_GATHERALTDIV8SF:
39591 icode = CODE_FOR_avx2_gatherdiv8sf;
39592 goto gather_gen;
39593 case IX86_BUILTIN_GATHERALTSIV4DI:
39594 icode = CODE_FOR_avx2_gathersiv4di;
39595 goto gather_gen;
39596 case IX86_BUILTIN_GATHERALTDIV8SI:
39597 icode = CODE_FOR_avx2_gatherdiv8si;
39598 goto gather_gen;
39599 case IX86_BUILTIN_GATHER3SIV16SF:
39600 icode = CODE_FOR_avx512f_gathersiv16sf;
39601 goto gather_gen;
39602 case IX86_BUILTIN_GATHER3SIV8DF:
39603 icode = CODE_FOR_avx512f_gathersiv8df;
39604 goto gather_gen;
39605 case IX86_BUILTIN_GATHER3DIV16SF:
39606 icode = CODE_FOR_avx512f_gatherdiv16sf;
39607 goto gather_gen;
39608 case IX86_BUILTIN_GATHER3DIV8DF:
39609 icode = CODE_FOR_avx512f_gatherdiv8df;
39610 goto gather_gen;
39611 case IX86_BUILTIN_GATHER3SIV16SI:
39612 icode = CODE_FOR_avx512f_gathersiv16si;
39613 goto gather_gen;
39614 case IX86_BUILTIN_GATHER3SIV8DI:
39615 icode = CODE_FOR_avx512f_gathersiv8di;
39616 goto gather_gen;
39617 case IX86_BUILTIN_GATHER3DIV16SI:
39618 icode = CODE_FOR_avx512f_gatherdiv16si;
39619 goto gather_gen;
39620 case IX86_BUILTIN_GATHER3DIV8DI:
39621 icode = CODE_FOR_avx512f_gatherdiv8di;
39622 goto gather_gen;
39623 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39624 icode = CODE_FOR_avx512f_gathersiv8df;
39625 goto gather_gen;
39626 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39627 icode = CODE_FOR_avx512f_gatherdiv16sf;
39628 goto gather_gen;
39629 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39630 icode = CODE_FOR_avx512f_gathersiv8di;
39631 goto gather_gen;
39632 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39633 icode = CODE_FOR_avx512f_gatherdiv16si;
39634 goto gather_gen;
39635 case IX86_BUILTIN_GATHER3SIV2DF:
39636 icode = CODE_FOR_avx512vl_gathersiv2df;
39637 goto gather_gen;
39638 case IX86_BUILTIN_GATHER3SIV4DF:
39639 icode = CODE_FOR_avx512vl_gathersiv4df;
39640 goto gather_gen;
39641 case IX86_BUILTIN_GATHER3DIV2DF:
39642 icode = CODE_FOR_avx512vl_gatherdiv2df;
39643 goto gather_gen;
39644 case IX86_BUILTIN_GATHER3DIV4DF:
39645 icode = CODE_FOR_avx512vl_gatherdiv4df;
39646 goto gather_gen;
39647 case IX86_BUILTIN_GATHER3SIV4SF:
39648 icode = CODE_FOR_avx512vl_gathersiv4sf;
39649 goto gather_gen;
39650 case IX86_BUILTIN_GATHER3SIV8SF:
39651 icode = CODE_FOR_avx512vl_gathersiv8sf;
39652 goto gather_gen;
39653 case IX86_BUILTIN_GATHER3DIV4SF:
39654 icode = CODE_FOR_avx512vl_gatherdiv4sf;
39655 goto gather_gen;
39656 case IX86_BUILTIN_GATHER3DIV8SF:
39657 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39658 goto gather_gen;
39659 case IX86_BUILTIN_GATHER3SIV2DI:
39660 icode = CODE_FOR_avx512vl_gathersiv2di;
39661 goto gather_gen;
39662 case IX86_BUILTIN_GATHER3SIV4DI:
39663 icode = CODE_FOR_avx512vl_gathersiv4di;
39664 goto gather_gen;
39665 case IX86_BUILTIN_GATHER3DIV2DI:
39666 icode = CODE_FOR_avx512vl_gatherdiv2di;
39667 goto gather_gen;
39668 case IX86_BUILTIN_GATHER3DIV4DI:
39669 icode = CODE_FOR_avx512vl_gatherdiv4di;
39670 goto gather_gen;
39671 case IX86_BUILTIN_GATHER3SIV4SI:
39672 icode = CODE_FOR_avx512vl_gathersiv4si;
39673 goto gather_gen;
39674 case IX86_BUILTIN_GATHER3SIV8SI:
39675 icode = CODE_FOR_avx512vl_gathersiv8si;
39676 goto gather_gen;
39677 case IX86_BUILTIN_GATHER3DIV4SI:
39678 icode = CODE_FOR_avx512vl_gatherdiv4si;
39679 goto gather_gen;
39680 case IX86_BUILTIN_GATHER3DIV8SI:
39681 icode = CODE_FOR_avx512vl_gatherdiv8si;
39682 goto gather_gen;
39683 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39684 icode = CODE_FOR_avx512vl_gathersiv4df;
39685 goto gather_gen;
39686 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39687 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39688 goto gather_gen;
39689 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39690 icode = CODE_FOR_avx512vl_gathersiv4di;
39691 goto gather_gen;
39692 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39693 icode = CODE_FOR_avx512vl_gatherdiv8si;
39694 goto gather_gen;
39695 case IX86_BUILTIN_SCATTERSIV16SF:
39696 icode = CODE_FOR_avx512f_scattersiv16sf;
39697 goto scatter_gen;
39698 case IX86_BUILTIN_SCATTERSIV8DF:
39699 icode = CODE_FOR_avx512f_scattersiv8df;
39700 goto scatter_gen;
39701 case IX86_BUILTIN_SCATTERDIV16SF:
39702 icode = CODE_FOR_avx512f_scatterdiv16sf;
39703 goto scatter_gen;
39704 case IX86_BUILTIN_SCATTERDIV8DF:
39705 icode = CODE_FOR_avx512f_scatterdiv8df;
39706 goto scatter_gen;
39707 case IX86_BUILTIN_SCATTERSIV16SI:
39708 icode = CODE_FOR_avx512f_scattersiv16si;
39709 goto scatter_gen;
39710 case IX86_BUILTIN_SCATTERSIV8DI:
39711 icode = CODE_FOR_avx512f_scattersiv8di;
39712 goto scatter_gen;
39713 case IX86_BUILTIN_SCATTERDIV16SI:
39714 icode = CODE_FOR_avx512f_scatterdiv16si;
39715 goto scatter_gen;
39716 case IX86_BUILTIN_SCATTERDIV8DI:
39717 icode = CODE_FOR_avx512f_scatterdiv8di;
39718 goto scatter_gen;
39719 case IX86_BUILTIN_SCATTERSIV8SF:
39720 icode = CODE_FOR_avx512vl_scattersiv8sf;
39721 goto scatter_gen;
39722 case IX86_BUILTIN_SCATTERSIV4SF:
39723 icode = CODE_FOR_avx512vl_scattersiv4sf;
39724 goto scatter_gen;
39725 case IX86_BUILTIN_SCATTERSIV4DF:
39726 icode = CODE_FOR_avx512vl_scattersiv4df;
39727 goto scatter_gen;
39728 case IX86_BUILTIN_SCATTERSIV2DF:
39729 icode = CODE_FOR_avx512vl_scattersiv2df;
39730 goto scatter_gen;
39731 case IX86_BUILTIN_SCATTERDIV8SF:
39732 icode = CODE_FOR_avx512vl_scatterdiv8sf;
39733 goto scatter_gen;
39734 case IX86_BUILTIN_SCATTERDIV4SF:
39735 icode = CODE_FOR_avx512vl_scatterdiv4sf;
39736 goto scatter_gen;
39737 case IX86_BUILTIN_SCATTERDIV4DF:
39738 icode = CODE_FOR_avx512vl_scatterdiv4df;
39739 goto scatter_gen;
39740 case IX86_BUILTIN_SCATTERDIV2DF:
39741 icode = CODE_FOR_avx512vl_scatterdiv2df;
39742 goto scatter_gen;
39743 case IX86_BUILTIN_SCATTERSIV8SI:
39744 icode = CODE_FOR_avx512vl_scattersiv8si;
39745 goto scatter_gen;
39746 case IX86_BUILTIN_SCATTERSIV4SI:
39747 icode = CODE_FOR_avx512vl_scattersiv4si;
39748 goto scatter_gen;
39749 case IX86_BUILTIN_SCATTERSIV4DI:
39750 icode = CODE_FOR_avx512vl_scattersiv4di;
39751 goto scatter_gen;
39752 case IX86_BUILTIN_SCATTERSIV2DI:
39753 icode = CODE_FOR_avx512vl_scattersiv2di;
39754 goto scatter_gen;
39755 case IX86_BUILTIN_SCATTERDIV8SI:
39756 icode = CODE_FOR_avx512vl_scatterdiv8si;
39757 goto scatter_gen;
39758 case IX86_BUILTIN_SCATTERDIV4SI:
39759 icode = CODE_FOR_avx512vl_scatterdiv4si;
39760 goto scatter_gen;
39761 case IX86_BUILTIN_SCATTERDIV4DI:
39762 icode = CODE_FOR_avx512vl_scatterdiv4di;
39763 goto scatter_gen;
39764 case IX86_BUILTIN_SCATTERDIV2DI:
39765 icode = CODE_FOR_avx512vl_scatterdiv2di;
39766 goto scatter_gen;
39767 case IX86_BUILTIN_GATHERPFDPD:
39768 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
39769 goto vec_prefetch_gen;
39770 case IX86_BUILTIN_GATHERPFDPS:
39771 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
39772 goto vec_prefetch_gen;
39773 case IX86_BUILTIN_GATHERPFQPD:
39774 icode = CODE_FOR_avx512pf_gatherpfv8didf;
39775 goto vec_prefetch_gen;
39776 case IX86_BUILTIN_GATHERPFQPS:
39777 icode = CODE_FOR_avx512pf_gatherpfv8disf;
39778 goto vec_prefetch_gen;
39779 case IX86_BUILTIN_SCATTERPFDPD:
39780 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
39781 goto vec_prefetch_gen;
39782 case IX86_BUILTIN_SCATTERPFDPS:
39783 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
39784 goto vec_prefetch_gen;
39785 case IX86_BUILTIN_SCATTERPFQPD:
39786 icode = CODE_FOR_avx512pf_scatterpfv8didf;
39787 goto vec_prefetch_gen;
39788 case IX86_BUILTIN_SCATTERPFQPS:
39789 icode = CODE_FOR_avx512pf_scatterpfv8disf;
39790 goto vec_prefetch_gen;
39792 gather_gen:
39793 rtx half;
39794 rtx (*gen) (rtx, rtx);
39796 arg0 = CALL_EXPR_ARG (exp, 0);
39797 arg1 = CALL_EXPR_ARG (exp, 1);
39798 arg2 = CALL_EXPR_ARG (exp, 2);
39799 arg3 = CALL_EXPR_ARG (exp, 3);
39800 arg4 = CALL_EXPR_ARG (exp, 4);
39801 op0 = expand_normal (arg0);
39802 op1 = expand_normal (arg1);
39803 op2 = expand_normal (arg2);
39804 op3 = expand_normal (arg3);
39805 op4 = expand_normal (arg4);
39806 /* Note the arg order is different from the operand order. */
39807 mode0 = insn_data[icode].operand[1].mode;
39808 mode2 = insn_data[icode].operand[3].mode;
39809 mode3 = insn_data[icode].operand[4].mode;
39810 mode4 = insn_data[icode].operand[5].mode;
39812 if (target == NULL_RTX
39813 || GET_MODE (target) != insn_data[icode].operand[0].mode
39814 || !insn_data[icode].operand[0].predicate (target,
39815 GET_MODE (target)))
39816 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
39817 else
39818 subtarget = target;
39820 switch (fcode)
39822 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39823 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39824 half = gen_reg_rtx (V8SImode);
39825 if (!nonimmediate_operand (op2, V16SImode))
39826 op2 = copy_to_mode_reg (V16SImode, op2);
39827 emit_insn (gen_vec_extract_lo_v16si (half, op2));
39828 op2 = half;
39829 break;
39830 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39831 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39832 case IX86_BUILTIN_GATHERALTSIV4DF:
39833 case IX86_BUILTIN_GATHERALTSIV4DI:
39834 half = gen_reg_rtx (V4SImode);
39835 if (!nonimmediate_operand (op2, V8SImode))
39836 op2 = copy_to_mode_reg (V8SImode, op2);
39837 emit_insn (gen_vec_extract_lo_v8si (half, op2));
39838 op2 = half;
39839 break;
39840 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39841 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39842 half = gen_reg_rtx (mode0);
39843 if (mode0 == V8SFmode)
39844 gen = gen_vec_extract_lo_v16sf;
39845 else
39846 gen = gen_vec_extract_lo_v16si;
39847 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39848 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39849 emit_insn (gen (half, op0));
39850 op0 = half;
39851 if (GET_MODE (op3) != VOIDmode)
39853 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39854 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39855 emit_insn (gen (half, op3));
39856 op3 = half;
39858 break;
39859 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39860 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39861 case IX86_BUILTIN_GATHERALTDIV8SF:
39862 case IX86_BUILTIN_GATHERALTDIV8SI:
39863 half = gen_reg_rtx (mode0);
39864 if (mode0 == V4SFmode)
39865 gen = gen_vec_extract_lo_v8sf;
39866 else
39867 gen = gen_vec_extract_lo_v8si;
39868 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39869 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39870 emit_insn (gen (half, op0));
39871 op0 = half;
39872 if (GET_MODE (op3) != VOIDmode)
39874 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39875 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39876 emit_insn (gen (half, op3));
39877 op3 = half;
39879 break;
39880 default:
39881 break;
39884 /* Force memory operand only with base register here. But we
39885 don't want to do it on memory operand for other builtin
39886 functions. */
39887 op1 = ix86_zero_extend_to_Pmode (op1);
39889 if (!insn_data[icode].operand[1].predicate (op0, mode0))
39890 op0 = copy_to_mode_reg (mode0, op0);
39891 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
39892 op1 = copy_to_mode_reg (Pmode, op1);
39893 if (!insn_data[icode].operand[3].predicate (op2, mode2))
39894 op2 = copy_to_mode_reg (mode2, op2);
39896 op3 = fixup_modeless_constant (op3, mode3);
39898 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
39900 if (!insn_data[icode].operand[4].predicate (op3, mode3))
39901 op3 = copy_to_mode_reg (mode3, op3);
39903 else
39905 op3 = copy_to_reg (op3);
39906 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
39908 if (!insn_data[icode].operand[5].predicate (op4, mode4))
39910 error ("the last argument must be scale 1, 2, 4, 8");
39911 return const0_rtx;
39914 /* Optimize. If mask is known to have all high bits set,
39915 replace op0 with pc_rtx to signal that the instruction
39916 overwrites the whole destination and doesn't use its
39917 previous contents. */
39918 if (optimize)
39920 if (TREE_CODE (arg3) == INTEGER_CST)
39922 if (integer_all_onesp (arg3))
39923 op0 = pc_rtx;
39925 else if (TREE_CODE (arg3) == VECTOR_CST)
39927 unsigned int negative = 0;
39928 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
39930 tree cst = VECTOR_CST_ELT (arg3, i);
39931 if (TREE_CODE (cst) == INTEGER_CST
39932 && tree_int_cst_sign_bit (cst))
39933 negative++;
39934 else if (TREE_CODE (cst) == REAL_CST
39935 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
39936 negative++;
39938 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
39939 op0 = pc_rtx;
39941 else if (TREE_CODE (arg3) == SSA_NAME
39942 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
39944 /* Recognize also when mask is like:
39945 __v2df src = _mm_setzero_pd ();
39946 __v2df mask = _mm_cmpeq_pd (src, src);
39948 __v8sf src = _mm256_setzero_ps ();
39949 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
39950 as that is a cheaper way to load all ones into
39951 a register than having to load a constant from
39952 memory. */
39953 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
39954 if (is_gimple_call (def_stmt))
39956 tree fndecl = gimple_call_fndecl (def_stmt);
39957 if (fndecl
39958 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
39959 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
39961 case IX86_BUILTIN_CMPPD:
39962 case IX86_BUILTIN_CMPPS:
39963 case IX86_BUILTIN_CMPPD256:
39964 case IX86_BUILTIN_CMPPS256:
39965 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
39966 break;
39967 /* FALLTHRU */
39968 case IX86_BUILTIN_CMPEQPD:
39969 case IX86_BUILTIN_CMPEQPS:
39970 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
39971 && initializer_zerop (gimple_call_arg (def_stmt,
39972 1)))
39973 op0 = pc_rtx;
39974 break;
39975 default:
39976 break;
39982 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
39983 if (! pat)
39984 return const0_rtx;
39985 emit_insn (pat);
39987 switch (fcode)
39989 case IX86_BUILTIN_GATHER3DIV16SF:
39990 if (target == NULL_RTX)
39991 target = gen_reg_rtx (V8SFmode);
39992 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
39993 break;
39994 case IX86_BUILTIN_GATHER3DIV16SI:
39995 if (target == NULL_RTX)
39996 target = gen_reg_rtx (V8SImode);
39997 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
39998 break;
39999 case IX86_BUILTIN_GATHER3DIV8SF:
40000 case IX86_BUILTIN_GATHERDIV8SF:
40001 if (target == NULL_RTX)
40002 target = gen_reg_rtx (V4SFmode);
40003 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
40004 break;
40005 case IX86_BUILTIN_GATHER3DIV8SI:
40006 case IX86_BUILTIN_GATHERDIV8SI:
40007 if (target == NULL_RTX)
40008 target = gen_reg_rtx (V4SImode);
40009 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
40010 break;
40011 default:
40012 target = subtarget;
40013 break;
40015 return target;
40017 scatter_gen:
40018 arg0 = CALL_EXPR_ARG (exp, 0);
40019 arg1 = CALL_EXPR_ARG (exp, 1);
40020 arg2 = CALL_EXPR_ARG (exp, 2);
40021 arg3 = CALL_EXPR_ARG (exp, 3);
40022 arg4 = CALL_EXPR_ARG (exp, 4);
40023 op0 = expand_normal (arg0);
40024 op1 = expand_normal (arg1);
40025 op2 = expand_normal (arg2);
40026 op3 = expand_normal (arg3);
40027 op4 = expand_normal (arg4);
40028 mode1 = insn_data[icode].operand[1].mode;
40029 mode2 = insn_data[icode].operand[2].mode;
40030 mode3 = insn_data[icode].operand[3].mode;
40031 mode4 = insn_data[icode].operand[4].mode;
40033 /* Force memory operand only with base register here. But we
40034 don't want to do it on memory operand for other builtin
40035 functions. */
40036 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
40038 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40039 op0 = copy_to_mode_reg (Pmode, op0);
40041 op1 = fixup_modeless_constant (op1, mode1);
40043 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
40045 if (!insn_data[icode].operand[1].predicate (op1, mode1))
40046 op1 = copy_to_mode_reg (mode1, op1);
40048 else
40050 op1 = copy_to_reg (op1);
40051 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
40054 if (!insn_data[icode].operand[2].predicate (op2, mode2))
40055 op2 = copy_to_mode_reg (mode2, op2);
40057 if (!insn_data[icode].operand[3].predicate (op3, mode3))
40058 op3 = copy_to_mode_reg (mode3, op3);
40060 if (!insn_data[icode].operand[4].predicate (op4, mode4))
40062 error ("the last argument must be scale 1, 2, 4, 8");
40063 return const0_rtx;
40066 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
40067 if (! pat)
40068 return const0_rtx;
40070 emit_insn (pat);
40071 return 0;
40073 vec_prefetch_gen:
40074 arg0 = CALL_EXPR_ARG (exp, 0);
40075 arg1 = CALL_EXPR_ARG (exp, 1);
40076 arg2 = CALL_EXPR_ARG (exp, 2);
40077 arg3 = CALL_EXPR_ARG (exp, 3);
40078 arg4 = CALL_EXPR_ARG (exp, 4);
40079 op0 = expand_normal (arg0);
40080 op1 = expand_normal (arg1);
40081 op2 = expand_normal (arg2);
40082 op3 = expand_normal (arg3);
40083 op4 = expand_normal (arg4);
40084 mode0 = insn_data[icode].operand[0].mode;
40085 mode1 = insn_data[icode].operand[1].mode;
40086 mode3 = insn_data[icode].operand[3].mode;
40087 mode4 = insn_data[icode].operand[4].mode;
40089 op0 = fixup_modeless_constant (op0, mode0);
40091 if (GET_MODE (op0) == mode0
40092 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
40094 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40095 op0 = copy_to_mode_reg (mode0, op0);
40097 else if (op0 != constm1_rtx)
40099 op0 = copy_to_reg (op0);
40100 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
40103 if (!insn_data[icode].operand[1].predicate (op1, mode1))
40104 op1 = copy_to_mode_reg (mode1, op1);
40106 /* Force memory operand only with base register here. But we
40107 don't want to do it on memory operand for other builtin
40108 functions. */
40109 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
40111 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
40112 op2 = copy_to_mode_reg (Pmode, op2);
40114 if (!insn_data[icode].operand[3].predicate (op3, mode3))
40116 error ("the forth argument must be scale 1, 2, 4, 8");
40117 return const0_rtx;
40120 if (!insn_data[icode].operand[4].predicate (op4, mode4))
40122 error ("incorrect hint operand");
40123 return const0_rtx;
40126 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
40127 if (! pat)
40128 return const0_rtx;
40130 emit_insn (pat);
40132 return 0;
40134 case IX86_BUILTIN_XABORT:
40135 icode = CODE_FOR_xabort;
40136 arg0 = CALL_EXPR_ARG (exp, 0);
40137 op0 = expand_normal (arg0);
40138 mode0 = insn_data[icode].operand[0].mode;
40139 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40141 error ("the xabort's argument must be an 8-bit immediate");
40142 return const0_rtx;
40144 emit_insn (gen_xabort (op0));
40145 return 0;
40147 default:
40148 break;
40151 for (i = 0, d = bdesc_special_args;
40152 i < ARRAY_SIZE (bdesc_special_args);
40153 i++, d++)
40154 if (d->code == fcode)
40155 return ix86_expand_special_args_builtin (d, exp, target);
40157 for (i = 0, d = bdesc_args;
40158 i < ARRAY_SIZE (bdesc_args);
40159 i++, d++)
40160 if (d->code == fcode)
40161 switch (fcode)
40163 case IX86_BUILTIN_FABSQ:
40164 case IX86_BUILTIN_COPYSIGNQ:
40165 if (!TARGET_SSE)
40166 /* Emit a normal call if SSE isn't available. */
40167 return expand_call (exp, target, ignore);
40168 default:
40169 return ix86_expand_args_builtin (d, exp, target);
40172 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
40173 if (d->code == fcode)
40174 return ix86_expand_sse_comi (d, exp, target);
40176 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
40177 if (d->code == fcode)
40178 return ix86_expand_round_builtin (d, exp, target);
40180 for (i = 0, d = bdesc_pcmpestr;
40181 i < ARRAY_SIZE (bdesc_pcmpestr);
40182 i++, d++)
40183 if (d->code == fcode)
40184 return ix86_expand_sse_pcmpestr (d, exp, target);
40186 for (i = 0, d = bdesc_pcmpistr;
40187 i < ARRAY_SIZE (bdesc_pcmpistr);
40188 i++, d++)
40189 if (d->code == fcode)
40190 return ix86_expand_sse_pcmpistr (d, exp, target);
40192 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
40193 if (d->code == fcode)
40194 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
40195 (enum ix86_builtin_func_type)
40196 d->flag, d->comparison);
40198 gcc_unreachable ();
40201 /* This returns the target-specific builtin with code CODE if
40202 current_function_decl has visibility on this builtin, which is checked
40203 using isa flags. Returns NULL_TREE otherwise. */
40205 static tree ix86_get_builtin (enum ix86_builtins code)
40207 struct cl_target_option *opts;
40208 tree target_tree = NULL_TREE;
40210 /* Determine the isa flags of current_function_decl. */
40212 if (current_function_decl)
40213 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
40215 if (target_tree == NULL)
40216 target_tree = target_option_default_node;
40218 opts = TREE_TARGET_OPTION (target_tree);
40220 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
40221 return ix86_builtin_decl (code, true);
40222 else
40223 return NULL_TREE;
40226 /* Return function decl for target specific builtin
40227 for given MPX builtin passed i FCODE. */
40228 static tree
40229 ix86_builtin_mpx_function (unsigned fcode)
40231 switch (fcode)
40233 case BUILT_IN_CHKP_BNDMK:
40234 return ix86_builtins[IX86_BUILTIN_BNDMK];
40236 case BUILT_IN_CHKP_BNDSTX:
40237 return ix86_builtins[IX86_BUILTIN_BNDSTX];
40239 case BUILT_IN_CHKP_BNDLDX:
40240 return ix86_builtins[IX86_BUILTIN_BNDLDX];
40242 case BUILT_IN_CHKP_BNDCL:
40243 return ix86_builtins[IX86_BUILTIN_BNDCL];
40245 case BUILT_IN_CHKP_BNDCU:
40246 return ix86_builtins[IX86_BUILTIN_BNDCU];
40248 case BUILT_IN_CHKP_BNDRET:
40249 return ix86_builtins[IX86_BUILTIN_BNDRET];
40251 case BUILT_IN_CHKP_INTERSECT:
40252 return ix86_builtins[IX86_BUILTIN_BNDINT];
40254 case BUILT_IN_CHKP_NARROW:
40255 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
40257 case BUILT_IN_CHKP_SIZEOF:
40258 return ix86_builtins[IX86_BUILTIN_SIZEOF];
40260 case BUILT_IN_CHKP_EXTRACT_LOWER:
40261 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
40263 case BUILT_IN_CHKP_EXTRACT_UPPER:
40264 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
40266 default:
40267 return NULL_TREE;
40270 gcc_unreachable ();
40273 /* Helper function for ix86_load_bounds and ix86_store_bounds.
40275 Return an address to be used to load/store bounds for pointer
40276 passed in SLOT.
40278 SLOT_NO is an integer constant holding number of a target
40279 dependent special slot to be used in case SLOT is not a memory.
40281 SPECIAL_BASE is a pointer to be used as a base of fake address
40282 to access special slots in Bounds Table. SPECIAL_BASE[-1],
40283 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
40285 static rtx
40286 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
40288 rtx addr = NULL;
40290 /* NULL slot means we pass bounds for pointer not passed to the
40291 function at all. Register slot means we pass pointer in a
40292 register. In both these cases bounds are passed via Bounds
40293 Table. Since we do not have actual pointer stored in memory,
40294 we have to use fake addresses to access Bounds Table. We
40295 start with (special_base - sizeof (void*)) and decrease this
40296 address by pointer size to get addresses for other slots. */
40297 if (!slot || REG_P (slot))
40299 gcc_assert (CONST_INT_P (slot_no));
40300 addr = plus_constant (Pmode, special_base,
40301 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
40303 /* If pointer is passed in a memory then its address is used to
40304 access Bounds Table. */
40305 else if (MEM_P (slot))
40307 addr = XEXP (slot, 0);
40308 if (!register_operand (addr, Pmode))
40309 addr = copy_addr_to_reg (addr);
40311 else
40312 gcc_unreachable ();
40314 return addr;
40317 /* Expand pass uses this hook to load bounds for function parameter
40318 PTR passed in SLOT in case its bounds are not passed in a register.
40320 If SLOT is a memory, then bounds are loaded as for regular pointer
40321 loaded from memory. PTR may be NULL in case SLOT is a memory.
40322 In such case value of PTR (if required) may be loaded from SLOT.
40324 If SLOT is NULL or a register then SLOT_NO is an integer constant
40325 holding number of the target dependent special slot which should be
40326 used to obtain bounds.
40328 Return loaded bounds. */
40330 static rtx
40331 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
40333 rtx reg = gen_reg_rtx (BNDmode);
40334 rtx addr;
40336 /* Get address to be used to access Bounds Table. Special slots start
40337 at the location of return address of the current function. */
40338 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
40340 /* Load pointer value from a memory if we don't have it. */
40341 if (!ptr)
40343 gcc_assert (MEM_P (slot));
40344 ptr = copy_addr_to_reg (slot);
40347 emit_insn (BNDmode == BND64mode
40348 ? gen_bnd64_ldx (reg, addr, ptr)
40349 : gen_bnd32_ldx (reg, addr, ptr));
40351 return reg;
40354 /* Expand pass uses this hook to store BOUNDS for call argument PTR
40355 passed in SLOT in case BOUNDS are not passed in a register.
40357 If SLOT is a memory, then BOUNDS are stored as for regular pointer
40358 stored in memory. PTR may be NULL in case SLOT is a memory.
40359 In such case value of PTR (if required) may be loaded from SLOT.
40361 If SLOT is NULL or a register then SLOT_NO is an integer constant
40362 holding number of the target dependent special slot which should be
40363 used to store BOUNDS. */
40365 static void
40366 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
40368 rtx addr;
40370 /* Get address to be used to access Bounds Table. Special slots start
40371 at the location of return address of a called function. */
40372 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
40374 /* Load pointer value from a memory if we don't have it. */
40375 if (!ptr)
40377 gcc_assert (MEM_P (slot));
40378 ptr = copy_addr_to_reg (slot);
40381 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
40382 if (!register_operand (bounds, BNDmode))
40383 bounds = copy_to_mode_reg (BNDmode, bounds);
40385 emit_insn (BNDmode == BND64mode
40386 ? gen_bnd64_stx (addr, ptr, bounds)
40387 : gen_bnd32_stx (addr, ptr, bounds));
40390 /* Load and return bounds returned by function in SLOT. */
40392 static rtx
40393 ix86_load_returned_bounds (rtx slot)
40395 rtx res;
40397 gcc_assert (REG_P (slot));
40398 res = gen_reg_rtx (BNDmode);
40399 emit_move_insn (res, slot);
40401 return res;
40404 /* Store BOUNDS returned by function into SLOT. */
40406 static void
40407 ix86_store_returned_bounds (rtx slot, rtx bounds)
40409 gcc_assert (REG_P (slot));
40410 emit_move_insn (slot, bounds);
40413 /* Returns a function decl for a vectorized version of the builtin function
40414 with builtin function code FN and the result vector type TYPE, or NULL_TREE
40415 if it is not available. */
40417 static tree
40418 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
40419 tree type_in)
40421 machine_mode in_mode, out_mode;
40422 int in_n, out_n;
40423 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
40425 if (TREE_CODE (type_out) != VECTOR_TYPE
40426 || TREE_CODE (type_in) != VECTOR_TYPE
40427 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
40428 return NULL_TREE;
40430 out_mode = TYPE_MODE (TREE_TYPE (type_out));
40431 out_n = TYPE_VECTOR_SUBPARTS (type_out);
40432 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40433 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40435 switch (fn)
40437 case BUILT_IN_SQRT:
40438 if (out_mode == DFmode && in_mode == DFmode)
40440 if (out_n == 2 && in_n == 2)
40441 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
40442 else if (out_n == 4 && in_n == 4)
40443 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
40444 else if (out_n == 8 && in_n == 8)
40445 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
40447 break;
40449 case BUILT_IN_EXP2F:
40450 if (out_mode == SFmode && in_mode == SFmode)
40452 if (out_n == 16 && in_n == 16)
40453 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
40455 break;
40457 case BUILT_IN_SQRTF:
40458 if (out_mode == SFmode && in_mode == SFmode)
40460 if (out_n == 4 && in_n == 4)
40461 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
40462 else if (out_n == 8 && in_n == 8)
40463 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
40464 else if (out_n == 16 && in_n == 16)
40465 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
40467 break;
40469 case BUILT_IN_IFLOOR:
40470 case BUILT_IN_LFLOOR:
40471 case BUILT_IN_LLFLOOR:
40472 /* The round insn does not trap on denormals. */
40473 if (flag_trapping_math || !TARGET_ROUND)
40474 break;
40476 if (out_mode == SImode && in_mode == DFmode)
40478 if (out_n == 4 && in_n == 2)
40479 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
40480 else if (out_n == 8 && in_n == 4)
40481 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
40482 else if (out_n == 16 && in_n == 8)
40483 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
40485 break;
40487 case BUILT_IN_IFLOORF:
40488 case BUILT_IN_LFLOORF:
40489 case BUILT_IN_LLFLOORF:
40490 /* The round insn does not trap on denormals. */
40491 if (flag_trapping_math || !TARGET_ROUND)
40492 break;
40494 if (out_mode == SImode && in_mode == SFmode)
40496 if (out_n == 4 && in_n == 4)
40497 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
40498 else if (out_n == 8 && in_n == 8)
40499 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
40501 break;
40503 case BUILT_IN_ICEIL:
40504 case BUILT_IN_LCEIL:
40505 case BUILT_IN_LLCEIL:
40506 /* The round insn does not trap on denormals. */
40507 if (flag_trapping_math || !TARGET_ROUND)
40508 break;
40510 if (out_mode == SImode && in_mode == DFmode)
40512 if (out_n == 4 && in_n == 2)
40513 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
40514 else if (out_n == 8 && in_n == 4)
40515 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
40516 else if (out_n == 16 && in_n == 8)
40517 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
40519 break;
40521 case BUILT_IN_ICEILF:
40522 case BUILT_IN_LCEILF:
40523 case BUILT_IN_LLCEILF:
40524 /* The round insn does not trap on denormals. */
40525 if (flag_trapping_math || !TARGET_ROUND)
40526 break;
40528 if (out_mode == SImode && in_mode == SFmode)
40530 if (out_n == 4 && in_n == 4)
40531 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
40532 else if (out_n == 8 && in_n == 8)
40533 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
40535 break;
40537 case BUILT_IN_IRINT:
40538 case BUILT_IN_LRINT:
40539 case BUILT_IN_LLRINT:
40540 if (out_mode == SImode && in_mode == DFmode)
40542 if (out_n == 4 && in_n == 2)
40543 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
40544 else if (out_n == 8 && in_n == 4)
40545 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
40547 break;
40549 case BUILT_IN_IRINTF:
40550 case BUILT_IN_LRINTF:
40551 case BUILT_IN_LLRINTF:
40552 if (out_mode == SImode && in_mode == SFmode)
40554 if (out_n == 4 && in_n == 4)
40555 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
40556 else if (out_n == 8 && in_n == 8)
40557 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
40559 break;
40561 case BUILT_IN_IROUND:
40562 case BUILT_IN_LROUND:
40563 case BUILT_IN_LLROUND:
40564 /* The round insn does not trap on denormals. */
40565 if (flag_trapping_math || !TARGET_ROUND)
40566 break;
40568 if (out_mode == SImode && in_mode == DFmode)
40570 if (out_n == 4 && in_n == 2)
40571 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
40572 else if (out_n == 8 && in_n == 4)
40573 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
40574 else if (out_n == 16 && in_n == 8)
40575 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
40577 break;
40579 case BUILT_IN_IROUNDF:
40580 case BUILT_IN_LROUNDF:
40581 case BUILT_IN_LLROUNDF:
40582 /* The round insn does not trap on denormals. */
40583 if (flag_trapping_math || !TARGET_ROUND)
40584 break;
40586 if (out_mode == SImode && in_mode == SFmode)
40588 if (out_n == 4 && in_n == 4)
40589 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
40590 else if (out_n == 8 && in_n == 8)
40591 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
40593 break;
40595 case BUILT_IN_COPYSIGN:
40596 if (out_mode == DFmode && in_mode == DFmode)
40598 if (out_n == 2 && in_n == 2)
40599 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
40600 else if (out_n == 4 && in_n == 4)
40601 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
40602 else if (out_n == 8 && in_n == 8)
40603 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
40605 break;
40607 case BUILT_IN_COPYSIGNF:
40608 if (out_mode == SFmode && in_mode == SFmode)
40610 if (out_n == 4 && in_n == 4)
40611 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
40612 else if (out_n == 8 && in_n == 8)
40613 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
40614 else if (out_n == 16 && in_n == 16)
40615 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
40617 break;
40619 case BUILT_IN_FLOOR:
40620 /* The round insn does not trap on denormals. */
40621 if (flag_trapping_math || !TARGET_ROUND)
40622 break;
40624 if (out_mode == DFmode && in_mode == DFmode)
40626 if (out_n == 2 && in_n == 2)
40627 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
40628 else if (out_n == 4 && in_n == 4)
40629 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
40631 break;
40633 case BUILT_IN_FLOORF:
40634 /* The round insn does not trap on denormals. */
40635 if (flag_trapping_math || !TARGET_ROUND)
40636 break;
40638 if (out_mode == SFmode && in_mode == SFmode)
40640 if (out_n == 4 && in_n == 4)
40641 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
40642 else if (out_n == 8 && in_n == 8)
40643 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
40645 break;
40647 case BUILT_IN_CEIL:
40648 /* The round insn does not trap on denormals. */
40649 if (flag_trapping_math || !TARGET_ROUND)
40650 break;
40652 if (out_mode == DFmode && in_mode == DFmode)
40654 if (out_n == 2 && in_n == 2)
40655 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
40656 else if (out_n == 4 && in_n == 4)
40657 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
40659 break;
40661 case BUILT_IN_CEILF:
40662 /* The round insn does not trap on denormals. */
40663 if (flag_trapping_math || !TARGET_ROUND)
40664 break;
40666 if (out_mode == SFmode && in_mode == SFmode)
40668 if (out_n == 4 && in_n == 4)
40669 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
40670 else if (out_n == 8 && in_n == 8)
40671 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
40673 break;
40675 case BUILT_IN_TRUNC:
40676 /* The round insn does not trap on denormals. */
40677 if (flag_trapping_math || !TARGET_ROUND)
40678 break;
40680 if (out_mode == DFmode && in_mode == DFmode)
40682 if (out_n == 2 && in_n == 2)
40683 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
40684 else if (out_n == 4 && in_n == 4)
40685 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
40687 break;
40689 case BUILT_IN_TRUNCF:
40690 /* The round insn does not trap on denormals. */
40691 if (flag_trapping_math || !TARGET_ROUND)
40692 break;
40694 if (out_mode == SFmode && in_mode == SFmode)
40696 if (out_n == 4 && in_n == 4)
40697 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
40698 else if (out_n == 8 && in_n == 8)
40699 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
40701 break;
40703 case BUILT_IN_RINT:
40704 /* The round insn does not trap on denormals. */
40705 if (flag_trapping_math || !TARGET_ROUND)
40706 break;
40708 if (out_mode == DFmode && in_mode == DFmode)
40710 if (out_n == 2 && in_n == 2)
40711 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
40712 else if (out_n == 4 && in_n == 4)
40713 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
40715 break;
40717 case BUILT_IN_RINTF:
40718 /* The round insn does not trap on denormals. */
40719 if (flag_trapping_math || !TARGET_ROUND)
40720 break;
40722 if (out_mode == SFmode && in_mode == SFmode)
40724 if (out_n == 4 && in_n == 4)
40725 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
40726 else if (out_n == 8 && in_n == 8)
40727 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
40729 break;
40731 case BUILT_IN_ROUND:
40732 /* The round insn does not trap on denormals. */
40733 if (flag_trapping_math || !TARGET_ROUND)
40734 break;
40736 if (out_mode == DFmode && in_mode == DFmode)
40738 if (out_n == 2 && in_n == 2)
40739 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
40740 else if (out_n == 4 && in_n == 4)
40741 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
40743 break;
40745 case BUILT_IN_ROUNDF:
40746 /* The round insn does not trap on denormals. */
40747 if (flag_trapping_math || !TARGET_ROUND)
40748 break;
40750 if (out_mode == SFmode && in_mode == SFmode)
40752 if (out_n == 4 && in_n == 4)
40753 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
40754 else if (out_n == 8 && in_n == 8)
40755 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
40757 break;
40759 case BUILT_IN_FMA:
40760 if (out_mode == DFmode && in_mode == DFmode)
40762 if (out_n == 2 && in_n == 2)
40763 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
40764 if (out_n == 4 && in_n == 4)
40765 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
40767 break;
40769 case BUILT_IN_FMAF:
40770 if (out_mode == SFmode && in_mode == SFmode)
40772 if (out_n == 4 && in_n == 4)
40773 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
40774 if (out_n == 8 && in_n == 8)
40775 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
40777 break;
40779 default:
40780 break;
40783 /* Dispatch to a handler for a vectorization library. */
40784 if (ix86_veclib_handler)
40785 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
40786 type_in);
40788 return NULL_TREE;
40791 /* Handler for an SVML-style interface to
40792 a library with vectorized intrinsics. */
40794 static tree
40795 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
40797 char name[20];
40798 tree fntype, new_fndecl, args;
40799 unsigned arity;
40800 const char *bname;
40801 machine_mode el_mode, in_mode;
40802 int n, in_n;
40804 /* The SVML is suitable for unsafe math only. */
40805 if (!flag_unsafe_math_optimizations)
40806 return NULL_TREE;
40808 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40809 n = TYPE_VECTOR_SUBPARTS (type_out);
40810 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40811 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40812 if (el_mode != in_mode
40813 || n != in_n)
40814 return NULL_TREE;
40816 switch (fn)
40818 case BUILT_IN_EXP:
40819 case BUILT_IN_LOG:
40820 case BUILT_IN_LOG10:
40821 case BUILT_IN_POW:
40822 case BUILT_IN_TANH:
40823 case BUILT_IN_TAN:
40824 case BUILT_IN_ATAN:
40825 case BUILT_IN_ATAN2:
40826 case BUILT_IN_ATANH:
40827 case BUILT_IN_CBRT:
40828 case BUILT_IN_SINH:
40829 case BUILT_IN_SIN:
40830 case BUILT_IN_ASINH:
40831 case BUILT_IN_ASIN:
40832 case BUILT_IN_COSH:
40833 case BUILT_IN_COS:
40834 case BUILT_IN_ACOSH:
40835 case BUILT_IN_ACOS:
40836 if (el_mode != DFmode || n != 2)
40837 return NULL_TREE;
40838 break;
40840 case BUILT_IN_EXPF:
40841 case BUILT_IN_LOGF:
40842 case BUILT_IN_LOG10F:
40843 case BUILT_IN_POWF:
40844 case BUILT_IN_TANHF:
40845 case BUILT_IN_TANF:
40846 case BUILT_IN_ATANF:
40847 case BUILT_IN_ATAN2F:
40848 case BUILT_IN_ATANHF:
40849 case BUILT_IN_CBRTF:
40850 case BUILT_IN_SINHF:
40851 case BUILT_IN_SINF:
40852 case BUILT_IN_ASINHF:
40853 case BUILT_IN_ASINF:
40854 case BUILT_IN_COSHF:
40855 case BUILT_IN_COSF:
40856 case BUILT_IN_ACOSHF:
40857 case BUILT_IN_ACOSF:
40858 if (el_mode != SFmode || n != 4)
40859 return NULL_TREE;
40860 break;
40862 default:
40863 return NULL_TREE;
40866 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40868 if (fn == BUILT_IN_LOGF)
40869 strcpy (name, "vmlsLn4");
40870 else if (fn == BUILT_IN_LOG)
40871 strcpy (name, "vmldLn2");
40872 else if (n == 4)
40874 sprintf (name, "vmls%s", bname+10);
40875 name[strlen (name)-1] = '4';
40877 else
40878 sprintf (name, "vmld%s2", bname+10);
40880 /* Convert to uppercase. */
40881 name[4] &= ~0x20;
40883 arity = 0;
40884 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40885 args;
40886 args = TREE_CHAIN (args))
40887 arity++;
40889 if (arity == 1)
40890 fntype = build_function_type_list (type_out, type_in, NULL);
40891 else
40892 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40894 /* Build a function declaration for the vectorized function. */
40895 new_fndecl = build_decl (BUILTINS_LOCATION,
40896 FUNCTION_DECL, get_identifier (name), fntype);
40897 TREE_PUBLIC (new_fndecl) = 1;
40898 DECL_EXTERNAL (new_fndecl) = 1;
40899 DECL_IS_NOVOPS (new_fndecl) = 1;
40900 TREE_READONLY (new_fndecl) = 1;
40902 return new_fndecl;
40905 /* Handler for an ACML-style interface to
40906 a library with vectorized intrinsics. */
40908 static tree
40909 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
40911 char name[20] = "__vr.._";
40912 tree fntype, new_fndecl, args;
40913 unsigned arity;
40914 const char *bname;
40915 machine_mode el_mode, in_mode;
40916 int n, in_n;
40918 /* The ACML is 64bits only and suitable for unsafe math only as
40919 it does not correctly support parts of IEEE with the required
40920 precision such as denormals. */
40921 if (!TARGET_64BIT
40922 || !flag_unsafe_math_optimizations)
40923 return NULL_TREE;
40925 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40926 n = TYPE_VECTOR_SUBPARTS (type_out);
40927 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40928 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40929 if (el_mode != in_mode
40930 || n != in_n)
40931 return NULL_TREE;
40933 switch (fn)
40935 case BUILT_IN_SIN:
40936 case BUILT_IN_COS:
40937 case BUILT_IN_EXP:
40938 case BUILT_IN_LOG:
40939 case BUILT_IN_LOG2:
40940 case BUILT_IN_LOG10:
40941 name[4] = 'd';
40942 name[5] = '2';
40943 if (el_mode != DFmode
40944 || n != 2)
40945 return NULL_TREE;
40946 break;
40948 case BUILT_IN_SINF:
40949 case BUILT_IN_COSF:
40950 case BUILT_IN_EXPF:
40951 case BUILT_IN_POWF:
40952 case BUILT_IN_LOGF:
40953 case BUILT_IN_LOG2F:
40954 case BUILT_IN_LOG10F:
40955 name[4] = 's';
40956 name[5] = '4';
40957 if (el_mode != SFmode
40958 || n != 4)
40959 return NULL_TREE;
40960 break;
40962 default:
40963 return NULL_TREE;
40966 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40967 sprintf (name + 7, "%s", bname+10);
40969 arity = 0;
40970 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40971 args;
40972 args = TREE_CHAIN (args))
40973 arity++;
40975 if (arity == 1)
40976 fntype = build_function_type_list (type_out, type_in, NULL);
40977 else
40978 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40980 /* Build a function declaration for the vectorized function. */
40981 new_fndecl = build_decl (BUILTINS_LOCATION,
40982 FUNCTION_DECL, get_identifier (name), fntype);
40983 TREE_PUBLIC (new_fndecl) = 1;
40984 DECL_EXTERNAL (new_fndecl) = 1;
40985 DECL_IS_NOVOPS (new_fndecl) = 1;
40986 TREE_READONLY (new_fndecl) = 1;
40988 return new_fndecl;
40991 /* Returns a decl of a function that implements gather load with
40992 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
40993 Return NULL_TREE if it is not available. */
40995 static tree
40996 ix86_vectorize_builtin_gather (const_tree mem_vectype,
40997 const_tree index_type, int scale)
40999 bool si;
41000 enum ix86_builtins code;
41002 if (! TARGET_AVX2)
41003 return NULL_TREE;
41005 if ((TREE_CODE (index_type) != INTEGER_TYPE
41006 && !POINTER_TYPE_P (index_type))
41007 || (TYPE_MODE (index_type) != SImode
41008 && TYPE_MODE (index_type) != DImode))
41009 return NULL_TREE;
41011 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
41012 return NULL_TREE;
41014 /* v*gather* insn sign extends index to pointer mode. */
41015 if (TYPE_PRECISION (index_type) < POINTER_SIZE
41016 && TYPE_UNSIGNED (index_type))
41017 return NULL_TREE;
41019 if (scale <= 0
41020 || scale > 8
41021 || (scale & (scale - 1)) != 0)
41022 return NULL_TREE;
41024 si = TYPE_MODE (index_type) == SImode;
41025 switch (TYPE_MODE (mem_vectype))
41027 case V2DFmode:
41028 if (TARGET_AVX512VL)
41029 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
41030 else
41031 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
41032 break;
41033 case V4DFmode:
41034 if (TARGET_AVX512VL)
41035 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
41036 else
41037 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
41038 break;
41039 case V2DImode:
41040 if (TARGET_AVX512VL)
41041 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
41042 else
41043 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
41044 break;
41045 case V4DImode:
41046 if (TARGET_AVX512VL)
41047 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
41048 else
41049 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
41050 break;
41051 case V4SFmode:
41052 if (TARGET_AVX512VL)
41053 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
41054 else
41055 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
41056 break;
41057 case V8SFmode:
41058 if (TARGET_AVX512VL)
41059 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
41060 else
41061 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
41062 break;
41063 case V4SImode:
41064 if (TARGET_AVX512VL)
41065 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
41066 else
41067 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
41068 break;
41069 case V8SImode:
41070 if (TARGET_AVX512VL)
41071 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
41072 else
41073 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
41074 break;
41075 case V8DFmode:
41076 if (TARGET_AVX512F)
41077 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
41078 else
41079 return NULL_TREE;
41080 break;
41081 case V8DImode:
41082 if (TARGET_AVX512F)
41083 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
41084 else
41085 return NULL_TREE;
41086 break;
41087 case V16SFmode:
41088 if (TARGET_AVX512F)
41089 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
41090 else
41091 return NULL_TREE;
41092 break;
41093 case V16SImode:
41094 if (TARGET_AVX512F)
41095 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
41096 else
41097 return NULL_TREE;
41098 break;
41099 default:
41100 return NULL_TREE;
41103 return ix86_get_builtin (code);
41106 /* Returns a code for a target-specific builtin that implements
41107 reciprocal of the function, or NULL_TREE if not available. */
41109 static tree
41110 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
41112 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
41113 && flag_finite_math_only && !flag_trapping_math
41114 && flag_unsafe_math_optimizations))
41115 return NULL_TREE;
41117 if (md_fn)
41118 /* Machine dependent builtins. */
41119 switch (fn)
41121 /* Vectorized version of sqrt to rsqrt conversion. */
41122 case IX86_BUILTIN_SQRTPS_NR:
41123 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
41125 case IX86_BUILTIN_SQRTPS_NR256:
41126 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
41128 default:
41129 return NULL_TREE;
41131 else
41132 /* Normal builtins. */
41133 switch (fn)
41135 /* Sqrt to rsqrt conversion. */
41136 case BUILT_IN_SQRTF:
41137 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
41139 default:
41140 return NULL_TREE;
41144 /* Helper for avx_vpermilps256_operand et al. This is also used by
41145 the expansion functions to turn the parallel back into a mask.
41146 The return value is 0 for no match and the imm8+1 for a match. */
41149 avx_vpermilp_parallel (rtx par, machine_mode mode)
41151 unsigned i, nelt = GET_MODE_NUNITS (mode);
41152 unsigned mask = 0;
41153 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
41155 if (XVECLEN (par, 0) != (int) nelt)
41156 return 0;
41158 /* Validate that all of the elements are constants, and not totally
41159 out of range. Copy the data into an integral array to make the
41160 subsequent checks easier. */
41161 for (i = 0; i < nelt; ++i)
41163 rtx er = XVECEXP (par, 0, i);
41164 unsigned HOST_WIDE_INT ei;
41166 if (!CONST_INT_P (er))
41167 return 0;
41168 ei = INTVAL (er);
41169 if (ei >= nelt)
41170 return 0;
41171 ipar[i] = ei;
41174 switch (mode)
41176 case V8DFmode:
41177 /* In the 512-bit DFmode case, we can only move elements within
41178 a 128-bit lane. First fill the second part of the mask,
41179 then fallthru. */
41180 for (i = 4; i < 6; ++i)
41182 if (ipar[i] < 4 || ipar[i] >= 6)
41183 return 0;
41184 mask |= (ipar[i] - 4) << i;
41186 for (i = 6; i < 8; ++i)
41188 if (ipar[i] < 6)
41189 return 0;
41190 mask |= (ipar[i] - 6) << i;
41192 /* FALLTHRU */
41194 case V4DFmode:
41195 /* In the 256-bit DFmode case, we can only move elements within
41196 a 128-bit lane. */
41197 for (i = 0; i < 2; ++i)
41199 if (ipar[i] >= 2)
41200 return 0;
41201 mask |= ipar[i] << i;
41203 for (i = 2; i < 4; ++i)
41205 if (ipar[i] < 2)
41206 return 0;
41207 mask |= (ipar[i] - 2) << i;
41209 break;
41211 case V16SFmode:
41212 /* In 512 bit SFmode case, permutation in the upper 256 bits
41213 must mirror the permutation in the lower 256-bits. */
41214 for (i = 0; i < 8; ++i)
41215 if (ipar[i] + 8 != ipar[i + 8])
41216 return 0;
41217 /* FALLTHRU */
41219 case V8SFmode:
41220 /* In 256 bit SFmode case, we have full freedom of
41221 movement within the low 128-bit lane, but the high 128-bit
41222 lane must mirror the exact same pattern. */
41223 for (i = 0; i < 4; ++i)
41224 if (ipar[i] + 4 != ipar[i + 4])
41225 return 0;
41226 nelt = 4;
41227 /* FALLTHRU */
41229 case V2DFmode:
41230 case V4SFmode:
41231 /* In the 128-bit case, we've full freedom in the placement of
41232 the elements from the source operand. */
41233 for (i = 0; i < nelt; ++i)
41234 mask |= ipar[i] << (i * (nelt / 2));
41235 break;
41237 default:
41238 gcc_unreachable ();
41241 /* Make sure success has a non-zero value by adding one. */
41242 return mask + 1;
41245 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
41246 the expansion functions to turn the parallel back into a mask.
41247 The return value is 0 for no match and the imm8+1 for a match. */
41250 avx_vperm2f128_parallel (rtx par, machine_mode mode)
41252 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
41253 unsigned mask = 0;
41254 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
41256 if (XVECLEN (par, 0) != (int) nelt)
41257 return 0;
41259 /* Validate that all of the elements are constants, and not totally
41260 out of range. Copy the data into an integral array to make the
41261 subsequent checks easier. */
41262 for (i = 0; i < nelt; ++i)
41264 rtx er = XVECEXP (par, 0, i);
41265 unsigned HOST_WIDE_INT ei;
41267 if (!CONST_INT_P (er))
41268 return 0;
41269 ei = INTVAL (er);
41270 if (ei >= 2 * nelt)
41271 return 0;
41272 ipar[i] = ei;
41275 /* Validate that the halves of the permute are halves. */
41276 for (i = 0; i < nelt2 - 1; ++i)
41277 if (ipar[i] + 1 != ipar[i + 1])
41278 return 0;
41279 for (i = nelt2; i < nelt - 1; ++i)
41280 if (ipar[i] + 1 != ipar[i + 1])
41281 return 0;
41283 /* Reconstruct the mask. */
41284 for (i = 0; i < 2; ++i)
41286 unsigned e = ipar[i * nelt2];
41287 if (e % nelt2)
41288 return 0;
41289 e /= nelt2;
41290 mask |= e << (i * 4);
41293 /* Make sure success has a non-zero value by adding one. */
41294 return mask + 1;
41297 /* Return a register priority for hard reg REGNO. */
41298 static int
41299 ix86_register_priority (int hard_regno)
41301 /* ebp and r13 as the base always wants a displacement, r12 as the
41302 base always wants an index. So discourage their usage in an
41303 address. */
41304 if (hard_regno == R12_REG || hard_regno == R13_REG)
41305 return 0;
41306 if (hard_regno == BP_REG)
41307 return 1;
41308 /* New x86-64 int registers result in bigger code size. Discourage
41309 them. */
41310 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
41311 return 2;
41312 /* New x86-64 SSE registers result in bigger code size. Discourage
41313 them. */
41314 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
41315 return 2;
41316 /* Usage of AX register results in smaller code. Prefer it. */
41317 if (hard_regno == AX_REG)
41318 return 4;
41319 return 3;
41322 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
41324 Put float CONST_DOUBLE in the constant pool instead of fp regs.
41325 QImode must go into class Q_REGS.
41326 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
41327 movdf to do mem-to-mem moves through integer regs. */
41329 static reg_class_t
41330 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
41332 machine_mode mode = GET_MODE (x);
41334 /* We're only allowed to return a subclass of CLASS. Many of the
41335 following checks fail for NO_REGS, so eliminate that early. */
41336 if (regclass == NO_REGS)
41337 return NO_REGS;
41339 /* All classes can load zeros. */
41340 if (x == CONST0_RTX (mode))
41341 return regclass;
41343 /* Force constants into memory if we are loading a (nonzero) constant into
41344 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
41345 instructions to load from a constant. */
41346 if (CONSTANT_P (x)
41347 && (MAYBE_MMX_CLASS_P (regclass)
41348 || MAYBE_SSE_CLASS_P (regclass)
41349 || MAYBE_MASK_CLASS_P (regclass)))
41350 return NO_REGS;
41352 /* Prefer SSE regs only, if we can use them for math. */
41353 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
41354 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
41356 /* Floating-point constants need more complex checks. */
41357 if (CONST_DOUBLE_P (x))
41359 /* General regs can load everything. */
41360 if (reg_class_subset_p (regclass, GENERAL_REGS))
41361 return regclass;
41363 /* Floats can load 0 and 1 plus some others. Note that we eliminated
41364 zero above. We only want to wind up preferring 80387 registers if
41365 we plan on doing computation with them. */
41366 if (TARGET_80387
41367 && standard_80387_constant_p (x) > 0)
41369 /* Limit class to non-sse. */
41370 if (regclass == FLOAT_SSE_REGS)
41371 return FLOAT_REGS;
41372 if (regclass == FP_TOP_SSE_REGS)
41373 return FP_TOP_REG;
41374 if (regclass == FP_SECOND_SSE_REGS)
41375 return FP_SECOND_REG;
41376 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
41377 return regclass;
41380 return NO_REGS;
41383 /* Generally when we see PLUS here, it's the function invariant
41384 (plus soft-fp const_int). Which can only be computed into general
41385 regs. */
41386 if (GET_CODE (x) == PLUS)
41387 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
41389 /* QImode constants are easy to load, but non-constant QImode data
41390 must go into Q_REGS. */
41391 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
41393 if (reg_class_subset_p (regclass, Q_REGS))
41394 return regclass;
41395 if (reg_class_subset_p (Q_REGS, regclass))
41396 return Q_REGS;
41397 return NO_REGS;
41400 return regclass;
41403 /* Discourage putting floating-point values in SSE registers unless
41404 SSE math is being used, and likewise for the 387 registers. */
41405 static reg_class_t
41406 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
41408 machine_mode mode = GET_MODE (x);
41410 /* Restrict the output reload class to the register bank that we are doing
41411 math on. If we would like not to return a subset of CLASS, reject this
41412 alternative: if reload cannot do this, it will still use its choice. */
41413 mode = GET_MODE (x);
41414 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
41415 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
41417 if (X87_FLOAT_MODE_P (mode))
41419 if (regclass == FP_TOP_SSE_REGS)
41420 return FP_TOP_REG;
41421 else if (regclass == FP_SECOND_SSE_REGS)
41422 return FP_SECOND_REG;
41423 else
41424 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
41427 return regclass;
41430 static reg_class_t
41431 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
41432 machine_mode mode, secondary_reload_info *sri)
41434 /* Double-word spills from general registers to non-offsettable memory
41435 references (zero-extended addresses) require special handling. */
41436 if (TARGET_64BIT
41437 && MEM_P (x)
41438 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
41439 && INTEGER_CLASS_P (rclass)
41440 && !offsettable_memref_p (x))
41442 sri->icode = (in_p
41443 ? CODE_FOR_reload_noff_load
41444 : CODE_FOR_reload_noff_store);
41445 /* Add the cost of moving address to a temporary. */
41446 sri->extra_cost = 1;
41448 return NO_REGS;
41451 /* QImode spills from non-QI registers require
41452 intermediate register on 32bit targets. */
41453 if (mode == QImode
41454 && (MAYBE_MASK_CLASS_P (rclass)
41455 || (!TARGET_64BIT && !in_p
41456 && INTEGER_CLASS_P (rclass)
41457 && MAYBE_NON_Q_CLASS_P (rclass))))
41459 int regno;
41461 if (REG_P (x))
41462 regno = REGNO (x);
41463 else
41464 regno = -1;
41466 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
41467 regno = true_regnum (x);
41469 /* Return Q_REGS if the operand is in memory. */
41470 if (regno == -1)
41471 return Q_REGS;
41474 /* This condition handles corner case where an expression involving
41475 pointers gets vectorized. We're trying to use the address of a
41476 stack slot as a vector initializer.
41478 (set (reg:V2DI 74 [ vect_cst_.2 ])
41479 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
41481 Eventually frame gets turned into sp+offset like this:
41483 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41484 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41485 (const_int 392 [0x188]))))
41487 That later gets turned into:
41489 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41490 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41491 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
41493 We'll have the following reload recorded:
41495 Reload 0: reload_in (DI) =
41496 (plus:DI (reg/f:DI 7 sp)
41497 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
41498 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41499 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
41500 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
41501 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41502 reload_reg_rtx: (reg:V2DI 22 xmm1)
41504 Which isn't going to work since SSE instructions can't handle scalar
41505 additions. Returning GENERAL_REGS forces the addition into integer
41506 register and reload can handle subsequent reloads without problems. */
41508 if (in_p && GET_CODE (x) == PLUS
41509 && SSE_CLASS_P (rclass)
41510 && SCALAR_INT_MODE_P (mode))
41511 return GENERAL_REGS;
41513 return NO_REGS;
41516 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
41518 static bool
41519 ix86_class_likely_spilled_p (reg_class_t rclass)
41521 switch (rclass)
41523 case AREG:
41524 case DREG:
41525 case CREG:
41526 case BREG:
41527 case AD_REGS:
41528 case SIREG:
41529 case DIREG:
41530 case SSE_FIRST_REG:
41531 case FP_TOP_REG:
41532 case FP_SECOND_REG:
41533 case BND_REGS:
41534 return true;
41536 default:
41537 break;
41540 return false;
41543 /* If we are copying between general and FP registers, we need a memory
41544 location. The same is true for SSE and MMX registers.
41546 To optimize register_move_cost performance, allow inline variant.
41548 The macro can't work reliably when one of the CLASSES is class containing
41549 registers from multiple units (SSE, MMX, integer). We avoid this by never
41550 combining those units in single alternative in the machine description.
41551 Ensure that this constraint holds to avoid unexpected surprises.
41553 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
41554 enforce these sanity checks. */
41556 static inline bool
41557 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41558 machine_mode mode, int strict)
41560 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
41561 return false;
41562 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
41563 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
41564 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
41565 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
41566 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
41567 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
41569 gcc_assert (!strict || lra_in_progress);
41570 return true;
41573 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
41574 return true;
41576 /* Between mask and general, we have moves no larger than word size. */
41577 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
41578 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
41579 return true;
41581 /* ??? This is a lie. We do have moves between mmx/general, and for
41582 mmx/sse2. But by saying we need secondary memory we discourage the
41583 register allocator from using the mmx registers unless needed. */
41584 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
41585 return true;
41587 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41589 /* SSE1 doesn't have any direct moves from other classes. */
41590 if (!TARGET_SSE2)
41591 return true;
41593 /* If the target says that inter-unit moves are more expensive
41594 than moving through memory, then don't generate them. */
41595 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
41596 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
41597 return true;
41599 /* Between SSE and general, we have moves no larger than word size. */
41600 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41601 return true;
41604 return false;
41607 bool
41608 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41609 machine_mode mode, int strict)
41611 return inline_secondary_memory_needed (class1, class2, mode, strict);
41614 /* Implement the TARGET_CLASS_MAX_NREGS hook.
41616 On the 80386, this is the size of MODE in words,
41617 except in the FP regs, where a single reg is always enough. */
41619 static unsigned char
41620 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
41622 if (MAYBE_INTEGER_CLASS_P (rclass))
41624 if (mode == XFmode)
41625 return (TARGET_64BIT ? 2 : 3);
41626 else if (mode == XCmode)
41627 return (TARGET_64BIT ? 4 : 6);
41628 else
41629 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
41631 else
41633 if (COMPLEX_MODE_P (mode))
41634 return 2;
41635 else
41636 return 1;
41640 /* Return true if the registers in CLASS cannot represent the change from
41641 modes FROM to TO. */
41643 bool
41644 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
41645 enum reg_class regclass)
41647 if (from == to)
41648 return false;
41650 /* x87 registers can't do subreg at all, as all values are reformatted
41651 to extended precision. */
41652 if (MAYBE_FLOAT_CLASS_P (regclass))
41653 return true;
41655 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
41657 /* Vector registers do not support QI or HImode loads. If we don't
41658 disallow a change to these modes, reload will assume it's ok to
41659 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
41660 the vec_dupv4hi pattern. */
41661 if (GET_MODE_SIZE (from) < 4)
41662 return true;
41665 return false;
41668 /* Return the cost of moving data of mode M between a
41669 register and memory. A value of 2 is the default; this cost is
41670 relative to those in `REGISTER_MOVE_COST'.
41672 This function is used extensively by register_move_cost that is used to
41673 build tables at startup. Make it inline in this case.
41674 When IN is 2, return maximum of in and out move cost.
41676 If moving between registers and memory is more expensive than
41677 between two registers, you should define this macro to express the
41678 relative cost.
41680 Model also increased moving costs of QImode registers in non
41681 Q_REGS classes.
41683 static inline int
41684 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
41685 int in)
41687 int cost;
41688 if (FLOAT_CLASS_P (regclass))
41690 int index;
41691 switch (mode)
41693 case SFmode:
41694 index = 0;
41695 break;
41696 case DFmode:
41697 index = 1;
41698 break;
41699 case XFmode:
41700 index = 2;
41701 break;
41702 default:
41703 return 100;
41705 if (in == 2)
41706 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
41707 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
41709 if (SSE_CLASS_P (regclass))
41711 int index;
41712 switch (GET_MODE_SIZE (mode))
41714 case 4:
41715 index = 0;
41716 break;
41717 case 8:
41718 index = 1;
41719 break;
41720 case 16:
41721 index = 2;
41722 break;
41723 default:
41724 return 100;
41726 if (in == 2)
41727 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
41728 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
41730 if (MMX_CLASS_P (regclass))
41732 int index;
41733 switch (GET_MODE_SIZE (mode))
41735 case 4:
41736 index = 0;
41737 break;
41738 case 8:
41739 index = 1;
41740 break;
41741 default:
41742 return 100;
41744 if (in)
41745 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
41746 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
41748 switch (GET_MODE_SIZE (mode))
41750 case 1:
41751 if (Q_CLASS_P (regclass) || TARGET_64BIT)
41753 if (!in)
41754 return ix86_cost->int_store[0];
41755 if (TARGET_PARTIAL_REG_DEPENDENCY
41756 && optimize_function_for_speed_p (cfun))
41757 cost = ix86_cost->movzbl_load;
41758 else
41759 cost = ix86_cost->int_load[0];
41760 if (in == 2)
41761 return MAX (cost, ix86_cost->int_store[0]);
41762 return cost;
41764 else
41766 if (in == 2)
41767 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
41768 if (in)
41769 return ix86_cost->movzbl_load;
41770 else
41771 return ix86_cost->int_store[0] + 4;
41773 break;
41774 case 2:
41775 if (in == 2)
41776 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
41777 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
41778 default:
41779 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
41780 if (mode == TFmode)
41781 mode = XFmode;
41782 if (in == 2)
41783 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
41784 else if (in)
41785 cost = ix86_cost->int_load[2];
41786 else
41787 cost = ix86_cost->int_store[2];
41788 return (cost * (((int) GET_MODE_SIZE (mode)
41789 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
41793 static int
41794 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
41795 bool in)
41797 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
41801 /* Return the cost of moving data from a register in class CLASS1 to
41802 one in class CLASS2.
41804 It is not required that the cost always equal 2 when FROM is the same as TO;
41805 on some machines it is expensive to move between registers if they are not
41806 general registers. */
41808 static int
41809 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
41810 reg_class_t class2_i)
41812 enum reg_class class1 = (enum reg_class) class1_i;
41813 enum reg_class class2 = (enum reg_class) class2_i;
41815 /* In case we require secondary memory, compute cost of the store followed
41816 by load. In order to avoid bad register allocation choices, we need
41817 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
41819 if (inline_secondary_memory_needed (class1, class2, mode, 0))
41821 int cost = 1;
41823 cost += inline_memory_move_cost (mode, class1, 2);
41824 cost += inline_memory_move_cost (mode, class2, 2);
41826 /* In case of copying from general_purpose_register we may emit multiple
41827 stores followed by single load causing memory size mismatch stall.
41828 Count this as arbitrarily high cost of 20. */
41829 if (targetm.class_max_nregs (class1, mode)
41830 > targetm.class_max_nregs (class2, mode))
41831 cost += 20;
41833 /* In the case of FP/MMX moves, the registers actually overlap, and we
41834 have to switch modes in order to treat them differently. */
41835 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
41836 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
41837 cost += 20;
41839 return cost;
41842 /* Moves between SSE/MMX and integer unit are expensive. */
41843 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
41844 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41846 /* ??? By keeping returned value relatively high, we limit the number
41847 of moves between integer and MMX/SSE registers for all targets.
41848 Additionally, high value prevents problem with x86_modes_tieable_p(),
41849 where integer modes in MMX/SSE registers are not tieable
41850 because of missing QImode and HImode moves to, from or between
41851 MMX/SSE registers. */
41852 return MAX (8, ix86_cost->mmxsse_to_integer);
41854 if (MAYBE_FLOAT_CLASS_P (class1))
41855 return ix86_cost->fp_move;
41856 if (MAYBE_SSE_CLASS_P (class1))
41857 return ix86_cost->sse_move;
41858 if (MAYBE_MMX_CLASS_P (class1))
41859 return ix86_cost->mmx_move;
41860 return 2;
41863 /* Return TRUE if hard register REGNO can hold a value of machine-mode
41864 MODE. */
41866 bool
41867 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
41869 /* Flags and only flags can only hold CCmode values. */
41870 if (CC_REGNO_P (regno))
41871 return GET_MODE_CLASS (mode) == MODE_CC;
41872 if (GET_MODE_CLASS (mode) == MODE_CC
41873 || GET_MODE_CLASS (mode) == MODE_RANDOM
41874 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
41875 return false;
41876 if (STACK_REGNO_P (regno))
41877 return VALID_FP_MODE_P (mode);
41878 if (MASK_REGNO_P (regno))
41879 return (VALID_MASK_REG_MODE (mode)
41880 || (TARGET_AVX512BW
41881 && VALID_MASK_AVX512BW_MODE (mode)));
41882 if (BND_REGNO_P (regno))
41883 return VALID_BND_REG_MODE (mode);
41884 if (SSE_REGNO_P (regno))
41886 /* We implement the move patterns for all vector modes into and
41887 out of SSE registers, even when no operation instructions
41888 are available. */
41890 /* For AVX-512 we allow, regardless of regno:
41891 - XI mode
41892 - any of 512-bit wide vector mode
41893 - any scalar mode. */
41894 if (TARGET_AVX512F
41895 && (mode == XImode
41896 || VALID_AVX512F_REG_MODE (mode)
41897 || VALID_AVX512F_SCALAR_MODE (mode)))
41898 return true;
41900 /* TODO check for QI/HI scalars. */
41901 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
41902 if (TARGET_AVX512VL
41903 && (mode == OImode
41904 || mode == TImode
41905 || VALID_AVX256_REG_MODE (mode)
41906 || VALID_AVX512VL_128_REG_MODE (mode)))
41907 return true;
41909 /* xmm16-xmm31 are only available for AVX-512. */
41910 if (EXT_REX_SSE_REGNO_P (regno))
41911 return false;
41913 /* OImode and AVX modes are available only when AVX is enabled. */
41914 return ((TARGET_AVX
41915 && VALID_AVX256_REG_OR_OI_MODE (mode))
41916 || VALID_SSE_REG_MODE (mode)
41917 || VALID_SSE2_REG_MODE (mode)
41918 || VALID_MMX_REG_MODE (mode)
41919 || VALID_MMX_REG_MODE_3DNOW (mode));
41921 if (MMX_REGNO_P (regno))
41923 /* We implement the move patterns for 3DNOW modes even in MMX mode,
41924 so if the register is available at all, then we can move data of
41925 the given mode into or out of it. */
41926 return (VALID_MMX_REG_MODE (mode)
41927 || VALID_MMX_REG_MODE_3DNOW (mode));
41930 if (mode == QImode)
41932 /* Take care for QImode values - they can be in non-QI regs,
41933 but then they do cause partial register stalls. */
41934 if (ANY_QI_REGNO_P (regno))
41935 return true;
41936 if (!TARGET_PARTIAL_REG_STALL)
41937 return true;
41938 /* LRA checks if the hard register is OK for the given mode.
41939 QImode values can live in non-QI regs, so we allow all
41940 registers here. */
41941 if (lra_in_progress)
41942 return true;
41943 return !can_create_pseudo_p ();
41945 /* We handle both integer and floats in the general purpose registers. */
41946 else if (VALID_INT_MODE_P (mode))
41947 return true;
41948 else if (VALID_FP_MODE_P (mode))
41949 return true;
41950 else if (VALID_DFP_MODE_P (mode))
41951 return true;
41952 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
41953 on to use that value in smaller contexts, this can easily force a
41954 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
41955 supporting DImode, allow it. */
41956 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
41957 return true;
41959 return false;
41962 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
41963 tieable integer mode. */
41965 static bool
41966 ix86_tieable_integer_mode_p (machine_mode mode)
41968 switch (mode)
41970 case HImode:
41971 case SImode:
41972 return true;
41974 case QImode:
41975 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
41977 case DImode:
41978 return TARGET_64BIT;
41980 default:
41981 return false;
41985 /* Return true if MODE1 is accessible in a register that can hold MODE2
41986 without copying. That is, all register classes that can hold MODE2
41987 can also hold MODE1. */
41989 bool
41990 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
41992 if (mode1 == mode2)
41993 return true;
41995 if (ix86_tieable_integer_mode_p (mode1)
41996 && ix86_tieable_integer_mode_p (mode2))
41997 return true;
41999 /* MODE2 being XFmode implies fp stack or general regs, which means we
42000 can tie any smaller floating point modes to it. Note that we do not
42001 tie this with TFmode. */
42002 if (mode2 == XFmode)
42003 return mode1 == SFmode || mode1 == DFmode;
42005 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
42006 that we can tie it with SFmode. */
42007 if (mode2 == DFmode)
42008 return mode1 == SFmode;
42010 /* If MODE2 is only appropriate for an SSE register, then tie with
42011 any other mode acceptable to SSE registers. */
42012 if (GET_MODE_SIZE (mode2) == 32
42013 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
42014 return (GET_MODE_SIZE (mode1) == 32
42015 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
42016 if (GET_MODE_SIZE (mode2) == 16
42017 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
42018 return (GET_MODE_SIZE (mode1) == 16
42019 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
42021 /* If MODE2 is appropriate for an MMX register, then tie
42022 with any other mode acceptable to MMX registers. */
42023 if (GET_MODE_SIZE (mode2) == 8
42024 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
42025 return (GET_MODE_SIZE (mode1) == 8
42026 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
42028 return false;
42031 /* Return the cost of moving between two registers of mode MODE. */
42033 static int
42034 ix86_set_reg_reg_cost (machine_mode mode)
42036 unsigned int units = UNITS_PER_WORD;
42038 switch (GET_MODE_CLASS (mode))
42040 default:
42041 break;
42043 case MODE_CC:
42044 units = GET_MODE_SIZE (CCmode);
42045 break;
42047 case MODE_FLOAT:
42048 if ((TARGET_SSE && mode == TFmode)
42049 || (TARGET_80387 && mode == XFmode)
42050 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
42051 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
42052 units = GET_MODE_SIZE (mode);
42053 break;
42055 case MODE_COMPLEX_FLOAT:
42056 if ((TARGET_SSE && mode == TCmode)
42057 || (TARGET_80387 && mode == XCmode)
42058 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
42059 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
42060 units = GET_MODE_SIZE (mode);
42061 break;
42063 case MODE_VECTOR_INT:
42064 case MODE_VECTOR_FLOAT:
42065 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
42066 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
42067 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
42068 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
42069 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
42070 units = GET_MODE_SIZE (mode);
42073 /* Return the cost of moving between two registers of mode MODE,
42074 assuming that the move will be in pieces of at most UNITS bytes. */
42075 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
42078 /* Compute a (partial) cost for rtx X. Return true if the complete
42079 cost has been computed, and false if subexpressions should be
42080 scanned. In either case, *TOTAL contains the cost result. */
42082 static bool
42083 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
42084 bool speed)
42086 rtx mask;
42087 enum rtx_code code = (enum rtx_code) code_i;
42088 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
42089 machine_mode mode = GET_MODE (x);
42090 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
42092 switch (code)
42094 case SET:
42095 if (register_operand (SET_DEST (x), VOIDmode)
42096 && reg_or_0_operand (SET_SRC (x), VOIDmode))
42098 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
42099 return true;
42101 return false;
42103 case CONST_INT:
42104 case CONST:
42105 case LABEL_REF:
42106 case SYMBOL_REF:
42107 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
42108 *total = 3;
42109 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
42110 *total = 2;
42111 else if (flag_pic && SYMBOLIC_CONST (x)
42112 && !(TARGET_64BIT
42113 && (GET_CODE (x) == LABEL_REF
42114 || (GET_CODE (x) == SYMBOL_REF
42115 && SYMBOL_REF_LOCAL_P (x))))
42116 /* Use 0 cost for CONST to improve its propagation. */
42117 && (TARGET_64BIT || GET_CODE (x) != CONST))
42118 *total = 1;
42119 else
42120 *total = 0;
42121 return true;
42123 case CONST_WIDE_INT:
42124 *total = 0;
42125 return true;
42127 case CONST_DOUBLE:
42128 switch (standard_80387_constant_p (x))
42130 case 1: /* 0.0 */
42131 *total = 1;
42132 return true;
42133 default: /* Other constants */
42134 *total = 2;
42135 return true;
42136 case 0:
42137 case -1:
42138 break;
42140 if (SSE_FLOAT_MODE_P (mode))
42142 case CONST_VECTOR:
42143 switch (standard_sse_constant_p (x))
42145 case 0:
42146 break;
42147 case 1: /* 0: xor eliminates false dependency */
42148 *total = 0;
42149 return true;
42150 default: /* -1: cmp contains false dependency */
42151 *total = 1;
42152 return true;
42155 /* Fall back to (MEM (SYMBOL_REF)), since that's where
42156 it'll probably end up. Add a penalty for size. */
42157 *total = (COSTS_N_INSNS (1)
42158 + (flag_pic != 0 && !TARGET_64BIT)
42159 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
42160 return true;
42162 case ZERO_EXTEND:
42163 /* The zero extensions is often completely free on x86_64, so make
42164 it as cheap as possible. */
42165 if (TARGET_64BIT && mode == DImode
42166 && GET_MODE (XEXP (x, 0)) == SImode)
42167 *total = 1;
42168 else if (TARGET_ZERO_EXTEND_WITH_AND)
42169 *total = cost->add;
42170 else
42171 *total = cost->movzx;
42172 return false;
42174 case SIGN_EXTEND:
42175 *total = cost->movsx;
42176 return false;
42178 case ASHIFT:
42179 if (SCALAR_INT_MODE_P (mode)
42180 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
42181 && CONST_INT_P (XEXP (x, 1)))
42183 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42184 if (value == 1)
42186 *total = cost->add;
42187 return false;
42189 if ((value == 2 || value == 3)
42190 && cost->lea <= cost->shift_const)
42192 *total = cost->lea;
42193 return false;
42196 /* FALLTHRU */
42198 case ROTATE:
42199 case ASHIFTRT:
42200 case LSHIFTRT:
42201 case ROTATERT:
42202 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42204 /* ??? Should be SSE vector operation cost. */
42205 /* At least for published AMD latencies, this really is the same
42206 as the latency for a simple fpu operation like fabs. */
42207 /* V*QImode is emulated with 1-11 insns. */
42208 if (mode == V16QImode || mode == V32QImode)
42210 int count = 11;
42211 if (TARGET_XOP && mode == V16QImode)
42213 /* For XOP we use vpshab, which requires a broadcast of the
42214 value to the variable shift insn. For constants this
42215 means a V16Q const in mem; even when we can perform the
42216 shift with one insn set the cost to prefer paddb. */
42217 if (CONSTANT_P (XEXP (x, 1)))
42219 *total = (cost->fabs
42220 + rtx_cost (XEXP (x, 0), code, 0, speed)
42221 + (speed ? 2 : COSTS_N_BYTES (16)));
42222 return true;
42224 count = 3;
42226 else if (TARGET_SSSE3)
42227 count = 7;
42228 *total = cost->fabs * count;
42230 else
42231 *total = cost->fabs;
42233 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42235 if (CONST_INT_P (XEXP (x, 1)))
42237 if (INTVAL (XEXP (x, 1)) > 32)
42238 *total = cost->shift_const + COSTS_N_INSNS (2);
42239 else
42240 *total = cost->shift_const * 2;
42242 else
42244 if (GET_CODE (XEXP (x, 1)) == AND)
42245 *total = cost->shift_var * 2;
42246 else
42247 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
42250 else
42252 if (CONST_INT_P (XEXP (x, 1)))
42253 *total = cost->shift_const;
42254 else if (GET_CODE (XEXP (x, 1)) == SUBREG
42255 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
42257 /* Return the cost after shift-and truncation. */
42258 *total = cost->shift_var;
42259 return true;
42261 else
42262 *total = cost->shift_var;
42264 return false;
42266 case FMA:
42268 rtx sub;
42270 gcc_assert (FLOAT_MODE_P (mode));
42271 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
42273 /* ??? SSE scalar/vector cost should be used here. */
42274 /* ??? Bald assumption that fma has the same cost as fmul. */
42275 *total = cost->fmul;
42276 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
42278 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
42279 sub = XEXP (x, 0);
42280 if (GET_CODE (sub) == NEG)
42281 sub = XEXP (sub, 0);
42282 *total += rtx_cost (sub, FMA, 0, speed);
42284 sub = XEXP (x, 2);
42285 if (GET_CODE (sub) == NEG)
42286 sub = XEXP (sub, 0);
42287 *total += rtx_cost (sub, FMA, 2, speed);
42288 return true;
42291 case MULT:
42292 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42294 /* ??? SSE scalar cost should be used here. */
42295 *total = cost->fmul;
42296 return false;
42298 else if (X87_FLOAT_MODE_P (mode))
42300 *total = cost->fmul;
42301 return false;
42303 else if (FLOAT_MODE_P (mode))
42305 /* ??? SSE vector cost should be used here. */
42306 *total = cost->fmul;
42307 return false;
42309 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42311 /* V*QImode is emulated with 7-13 insns. */
42312 if (mode == V16QImode || mode == V32QImode)
42314 int extra = 11;
42315 if (TARGET_XOP && mode == V16QImode)
42316 extra = 5;
42317 else if (TARGET_SSSE3)
42318 extra = 6;
42319 *total = cost->fmul * 2 + cost->fabs * extra;
42321 /* V*DImode is emulated with 5-8 insns. */
42322 else if (mode == V2DImode || mode == V4DImode)
42324 if (TARGET_XOP && mode == V2DImode)
42325 *total = cost->fmul * 2 + cost->fabs * 3;
42326 else
42327 *total = cost->fmul * 3 + cost->fabs * 5;
42329 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
42330 insns, including two PMULUDQ. */
42331 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
42332 *total = cost->fmul * 2 + cost->fabs * 5;
42333 else
42334 *total = cost->fmul;
42335 return false;
42337 else
42339 rtx op0 = XEXP (x, 0);
42340 rtx op1 = XEXP (x, 1);
42341 int nbits;
42342 if (CONST_INT_P (XEXP (x, 1)))
42344 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42345 for (nbits = 0; value != 0; value &= value - 1)
42346 nbits++;
42348 else
42349 /* This is arbitrary. */
42350 nbits = 7;
42352 /* Compute costs correctly for widening multiplication. */
42353 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
42354 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
42355 == GET_MODE_SIZE (mode))
42357 int is_mulwiden = 0;
42358 machine_mode inner_mode = GET_MODE (op0);
42360 if (GET_CODE (op0) == GET_CODE (op1))
42361 is_mulwiden = 1, op1 = XEXP (op1, 0);
42362 else if (CONST_INT_P (op1))
42364 if (GET_CODE (op0) == SIGN_EXTEND)
42365 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
42366 == INTVAL (op1);
42367 else
42368 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
42371 if (is_mulwiden)
42372 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
42375 *total = (cost->mult_init[MODE_INDEX (mode)]
42376 + nbits * cost->mult_bit
42377 + rtx_cost (op0, outer_code, opno, speed)
42378 + rtx_cost (op1, outer_code, opno, speed));
42380 return true;
42383 case DIV:
42384 case UDIV:
42385 case MOD:
42386 case UMOD:
42387 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42388 /* ??? SSE cost should be used here. */
42389 *total = cost->fdiv;
42390 else if (X87_FLOAT_MODE_P (mode))
42391 *total = cost->fdiv;
42392 else if (FLOAT_MODE_P (mode))
42393 /* ??? SSE vector cost should be used here. */
42394 *total = cost->fdiv;
42395 else
42396 *total = cost->divide[MODE_INDEX (mode)];
42397 return false;
42399 case PLUS:
42400 if (GET_MODE_CLASS (mode) == MODE_INT
42401 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
42403 if (GET_CODE (XEXP (x, 0)) == PLUS
42404 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
42405 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
42406 && CONSTANT_P (XEXP (x, 1)))
42408 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
42409 if (val == 2 || val == 4 || val == 8)
42411 *total = cost->lea;
42412 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42413 outer_code, opno, speed);
42414 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
42415 outer_code, opno, speed);
42416 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42417 return true;
42420 else if (GET_CODE (XEXP (x, 0)) == MULT
42421 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
42423 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
42424 if (val == 2 || val == 4 || val == 8)
42426 *total = cost->lea;
42427 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42428 outer_code, opno, speed);
42429 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42430 return true;
42433 else if (GET_CODE (XEXP (x, 0)) == PLUS)
42435 *total = cost->lea;
42436 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42437 outer_code, opno, speed);
42438 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42439 outer_code, opno, speed);
42440 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42441 return true;
42444 /* FALLTHRU */
42446 case MINUS:
42447 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42449 /* ??? SSE cost should be used here. */
42450 *total = cost->fadd;
42451 return false;
42453 else if (X87_FLOAT_MODE_P (mode))
42455 *total = cost->fadd;
42456 return false;
42458 else if (FLOAT_MODE_P (mode))
42460 /* ??? SSE vector cost should be used here. */
42461 *total = cost->fadd;
42462 return false;
42464 /* FALLTHRU */
42466 case AND:
42467 case IOR:
42468 case XOR:
42469 if (GET_MODE_CLASS (mode) == MODE_INT
42470 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42472 *total = (cost->add * 2
42473 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
42474 << (GET_MODE (XEXP (x, 0)) != DImode))
42475 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
42476 << (GET_MODE (XEXP (x, 1)) != DImode)));
42477 return true;
42479 /* FALLTHRU */
42481 case NEG:
42482 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42484 /* ??? SSE cost should be used here. */
42485 *total = cost->fchs;
42486 return false;
42488 else if (X87_FLOAT_MODE_P (mode))
42490 *total = cost->fchs;
42491 return false;
42493 else if (FLOAT_MODE_P (mode))
42495 /* ??? SSE vector cost should be used here. */
42496 *total = cost->fchs;
42497 return false;
42499 /* FALLTHRU */
42501 case NOT:
42502 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42504 /* ??? Should be SSE vector operation cost. */
42505 /* At least for published AMD latencies, this really is the same
42506 as the latency for a simple fpu operation like fabs. */
42507 *total = cost->fabs;
42509 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42510 *total = cost->add * 2;
42511 else
42512 *total = cost->add;
42513 return false;
42515 case COMPARE:
42516 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
42517 && XEXP (XEXP (x, 0), 1) == const1_rtx
42518 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
42519 && XEXP (x, 1) == const0_rtx)
42521 /* This kind of construct is implemented using test[bwl].
42522 Treat it as if we had an AND. */
42523 *total = (cost->add
42524 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
42525 + rtx_cost (const1_rtx, outer_code, opno, speed));
42526 return true;
42529 /* The embedded comparison operand is completely free. */
42530 if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
42531 && XEXP (x, 1) == const0_rtx)
42532 *total = 0;
42534 return false;
42536 case FLOAT_EXTEND:
42537 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
42538 *total = 0;
42539 return false;
42541 case ABS:
42542 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42543 /* ??? SSE cost should be used here. */
42544 *total = cost->fabs;
42545 else if (X87_FLOAT_MODE_P (mode))
42546 *total = cost->fabs;
42547 else if (FLOAT_MODE_P (mode))
42548 /* ??? SSE vector cost should be used here. */
42549 *total = cost->fabs;
42550 return false;
42552 case SQRT:
42553 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42554 /* ??? SSE cost should be used here. */
42555 *total = cost->fsqrt;
42556 else if (X87_FLOAT_MODE_P (mode))
42557 *total = cost->fsqrt;
42558 else if (FLOAT_MODE_P (mode))
42559 /* ??? SSE vector cost should be used here. */
42560 *total = cost->fsqrt;
42561 return false;
42563 case UNSPEC:
42564 if (XINT (x, 1) == UNSPEC_TP)
42565 *total = 0;
42566 return false;
42568 case VEC_SELECT:
42569 case VEC_CONCAT:
42570 case VEC_DUPLICATE:
42571 /* ??? Assume all of these vector manipulation patterns are
42572 recognizable. In which case they all pretty much have the
42573 same cost. */
42574 *total = cost->fabs;
42575 return true;
42576 case VEC_MERGE:
42577 mask = XEXP (x, 2);
42578 /* This is masked instruction, assume the same cost,
42579 as nonmasked variant. */
42580 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
42581 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
42582 else
42583 *total = cost->fabs;
42584 return true;
42586 default:
42587 return false;
42591 #if TARGET_MACHO
42593 static int current_machopic_label_num;
42595 /* Given a symbol name and its associated stub, write out the
42596 definition of the stub. */
42598 void
42599 machopic_output_stub (FILE *file, const char *symb, const char *stub)
42601 unsigned int length;
42602 char *binder_name, *symbol_name, lazy_ptr_name[32];
42603 int label = ++current_machopic_label_num;
42605 /* For 64-bit we shouldn't get here. */
42606 gcc_assert (!TARGET_64BIT);
42608 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
42609 symb = targetm.strip_name_encoding (symb);
42611 length = strlen (stub);
42612 binder_name = XALLOCAVEC (char, length + 32);
42613 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
42615 length = strlen (symb);
42616 symbol_name = XALLOCAVEC (char, length + 32);
42617 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
42619 sprintf (lazy_ptr_name, "L%d$lz", label);
42621 if (MACHOPIC_ATT_STUB)
42622 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
42623 else if (MACHOPIC_PURE)
42624 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
42625 else
42626 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
42628 fprintf (file, "%s:\n", stub);
42629 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42631 if (MACHOPIC_ATT_STUB)
42633 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
42635 else if (MACHOPIC_PURE)
42637 /* PIC stub. */
42638 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42639 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
42640 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
42641 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
42642 label, lazy_ptr_name, label);
42643 fprintf (file, "\tjmp\t*%%ecx\n");
42645 else
42646 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
42648 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
42649 it needs no stub-binding-helper. */
42650 if (MACHOPIC_ATT_STUB)
42651 return;
42653 fprintf (file, "%s:\n", binder_name);
42655 if (MACHOPIC_PURE)
42657 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
42658 fprintf (file, "\tpushl\t%%ecx\n");
42660 else
42661 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
42663 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
42665 /* N.B. Keep the correspondence of these
42666 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
42667 old-pic/new-pic/non-pic stubs; altering this will break
42668 compatibility with existing dylibs. */
42669 if (MACHOPIC_PURE)
42671 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42672 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
42674 else
42675 /* 16-byte -mdynamic-no-pic stub. */
42676 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
42678 fprintf (file, "%s:\n", lazy_ptr_name);
42679 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42680 fprintf (file, ASM_LONG "%s\n", binder_name);
42682 #endif /* TARGET_MACHO */
42684 /* Order the registers for register allocator. */
42686 void
42687 x86_order_regs_for_local_alloc (void)
42689 int pos = 0;
42690 int i;
42692 /* First allocate the local general purpose registers. */
42693 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42694 if (GENERAL_REGNO_P (i) && call_used_regs[i])
42695 reg_alloc_order [pos++] = i;
42697 /* Global general purpose registers. */
42698 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42699 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
42700 reg_alloc_order [pos++] = i;
42702 /* x87 registers come first in case we are doing FP math
42703 using them. */
42704 if (!TARGET_SSE_MATH)
42705 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42706 reg_alloc_order [pos++] = i;
42708 /* SSE registers. */
42709 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
42710 reg_alloc_order [pos++] = i;
42711 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
42712 reg_alloc_order [pos++] = i;
42714 /* Extended REX SSE registers. */
42715 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
42716 reg_alloc_order [pos++] = i;
42718 /* Mask register. */
42719 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
42720 reg_alloc_order [pos++] = i;
42722 /* MPX bound registers. */
42723 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
42724 reg_alloc_order [pos++] = i;
42726 /* x87 registers. */
42727 if (TARGET_SSE_MATH)
42728 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42729 reg_alloc_order [pos++] = i;
42731 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
42732 reg_alloc_order [pos++] = i;
42734 /* Initialize the rest of array as we do not allocate some registers
42735 at all. */
42736 while (pos < FIRST_PSEUDO_REGISTER)
42737 reg_alloc_order [pos++] = 0;
42740 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
42741 in struct attribute_spec handler. */
42742 static tree
42743 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
42744 tree args,
42745 int,
42746 bool *no_add_attrs)
42748 if (TREE_CODE (*node) != FUNCTION_TYPE
42749 && TREE_CODE (*node) != METHOD_TYPE
42750 && TREE_CODE (*node) != FIELD_DECL
42751 && TREE_CODE (*node) != TYPE_DECL)
42753 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42754 name);
42755 *no_add_attrs = true;
42756 return NULL_TREE;
42758 if (TARGET_64BIT)
42760 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
42761 name);
42762 *no_add_attrs = true;
42763 return NULL_TREE;
42765 if (is_attribute_p ("callee_pop_aggregate_return", name))
42767 tree cst;
42769 cst = TREE_VALUE (args);
42770 if (TREE_CODE (cst) != INTEGER_CST)
42772 warning (OPT_Wattributes,
42773 "%qE attribute requires an integer constant argument",
42774 name);
42775 *no_add_attrs = true;
42777 else if (compare_tree_int (cst, 0) != 0
42778 && compare_tree_int (cst, 1) != 0)
42780 warning (OPT_Wattributes,
42781 "argument to %qE attribute is neither zero, nor one",
42782 name);
42783 *no_add_attrs = true;
42786 return NULL_TREE;
42789 return NULL_TREE;
42792 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
42793 struct attribute_spec.handler. */
42794 static tree
42795 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
42796 bool *no_add_attrs)
42798 if (TREE_CODE (*node) != FUNCTION_TYPE
42799 && TREE_CODE (*node) != METHOD_TYPE
42800 && TREE_CODE (*node) != FIELD_DECL
42801 && TREE_CODE (*node) != TYPE_DECL)
42803 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42804 name);
42805 *no_add_attrs = true;
42806 return NULL_TREE;
42809 /* Can combine regparm with all attributes but fastcall. */
42810 if (is_attribute_p ("ms_abi", name))
42812 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
42814 error ("ms_abi and sysv_abi attributes are not compatible");
42817 return NULL_TREE;
42819 else if (is_attribute_p ("sysv_abi", name))
42821 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
42823 error ("ms_abi and sysv_abi attributes are not compatible");
42826 return NULL_TREE;
42829 return NULL_TREE;
42832 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
42833 struct attribute_spec.handler. */
42834 static tree
42835 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
42836 bool *no_add_attrs)
42838 tree *type = NULL;
42839 if (DECL_P (*node))
42841 if (TREE_CODE (*node) == TYPE_DECL)
42842 type = &TREE_TYPE (*node);
42844 else
42845 type = node;
42847 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
42849 warning (OPT_Wattributes, "%qE attribute ignored",
42850 name);
42851 *no_add_attrs = true;
42854 else if ((is_attribute_p ("ms_struct", name)
42855 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
42856 || ((is_attribute_p ("gcc_struct", name)
42857 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
42859 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
42860 name);
42861 *no_add_attrs = true;
42864 return NULL_TREE;
42867 static tree
42868 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
42869 bool *no_add_attrs)
42871 if (TREE_CODE (*node) != FUNCTION_DECL)
42873 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42874 name);
42875 *no_add_attrs = true;
42877 return NULL_TREE;
42880 static bool
42881 ix86_ms_bitfield_layout_p (const_tree record_type)
42883 return ((TARGET_MS_BITFIELD_LAYOUT
42884 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
42885 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
42888 /* Returns an expression indicating where the this parameter is
42889 located on entry to the FUNCTION. */
42891 static rtx
42892 x86_this_parameter (tree function)
42894 tree type = TREE_TYPE (function);
42895 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
42896 int nregs;
42898 if (TARGET_64BIT)
42900 const int *parm_regs;
42902 if (ix86_function_type_abi (type) == MS_ABI)
42903 parm_regs = x86_64_ms_abi_int_parameter_registers;
42904 else
42905 parm_regs = x86_64_int_parameter_registers;
42906 return gen_rtx_REG (Pmode, parm_regs[aggr]);
42909 nregs = ix86_function_regparm (type, function);
42911 if (nregs > 0 && !stdarg_p (type))
42913 int regno;
42914 unsigned int ccvt = ix86_get_callcvt (type);
42916 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42917 regno = aggr ? DX_REG : CX_REG;
42918 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42920 regno = CX_REG;
42921 if (aggr)
42922 return gen_rtx_MEM (SImode,
42923 plus_constant (Pmode, stack_pointer_rtx, 4));
42925 else
42927 regno = AX_REG;
42928 if (aggr)
42930 regno = DX_REG;
42931 if (nregs == 1)
42932 return gen_rtx_MEM (SImode,
42933 plus_constant (Pmode,
42934 stack_pointer_rtx, 4));
42937 return gen_rtx_REG (SImode, regno);
42940 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
42941 aggr ? 8 : 4));
42944 /* Determine whether x86_output_mi_thunk can succeed. */
42946 static bool
42947 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
42948 const_tree function)
42950 /* 64-bit can handle anything. */
42951 if (TARGET_64BIT)
42952 return true;
42954 /* For 32-bit, everything's fine if we have one free register. */
42955 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
42956 return true;
42958 /* Need a free register for vcall_offset. */
42959 if (vcall_offset)
42960 return false;
42962 /* Need a free register for GOT references. */
42963 if (flag_pic && !targetm.binds_local_p (function))
42964 return false;
42966 /* Otherwise ok. */
42967 return true;
42970 /* Output the assembler code for a thunk function. THUNK_DECL is the
42971 declaration for the thunk function itself, FUNCTION is the decl for
42972 the target function. DELTA is an immediate constant offset to be
42973 added to THIS. If VCALL_OFFSET is nonzero, the word at
42974 *(*this + vcall_offset) should be added to THIS. */
42976 static void
42977 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
42978 HOST_WIDE_INT vcall_offset, tree function)
42980 rtx this_param = x86_this_parameter (function);
42981 rtx this_reg, tmp, fnaddr;
42982 unsigned int tmp_regno;
42983 rtx_insn *insn;
42985 if (TARGET_64BIT)
42986 tmp_regno = R10_REG;
42987 else
42989 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
42990 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42991 tmp_regno = AX_REG;
42992 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42993 tmp_regno = DX_REG;
42994 else
42995 tmp_regno = CX_REG;
42998 emit_note (NOTE_INSN_PROLOGUE_END);
43000 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
43001 pull it in now and let DELTA benefit. */
43002 if (REG_P (this_param))
43003 this_reg = this_param;
43004 else if (vcall_offset)
43006 /* Put the this parameter into %eax. */
43007 this_reg = gen_rtx_REG (Pmode, AX_REG);
43008 emit_move_insn (this_reg, this_param);
43010 else
43011 this_reg = NULL_RTX;
43013 /* Adjust the this parameter by a fixed constant. */
43014 if (delta)
43016 rtx delta_rtx = GEN_INT (delta);
43017 rtx delta_dst = this_reg ? this_reg : this_param;
43019 if (TARGET_64BIT)
43021 if (!x86_64_general_operand (delta_rtx, Pmode))
43023 tmp = gen_rtx_REG (Pmode, tmp_regno);
43024 emit_move_insn (tmp, delta_rtx);
43025 delta_rtx = tmp;
43029 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
43032 /* Adjust the this parameter by a value stored in the vtable. */
43033 if (vcall_offset)
43035 rtx vcall_addr, vcall_mem, this_mem;
43037 tmp = gen_rtx_REG (Pmode, tmp_regno);
43039 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
43040 if (Pmode != ptr_mode)
43041 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
43042 emit_move_insn (tmp, this_mem);
43044 /* Adjust the this parameter. */
43045 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
43046 if (TARGET_64BIT
43047 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
43049 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
43050 emit_move_insn (tmp2, GEN_INT (vcall_offset));
43051 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
43054 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
43055 if (Pmode != ptr_mode)
43056 emit_insn (gen_addsi_1_zext (this_reg,
43057 gen_rtx_REG (ptr_mode,
43058 REGNO (this_reg)),
43059 vcall_mem));
43060 else
43061 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
43064 /* If necessary, drop THIS back to its stack slot. */
43065 if (this_reg && this_reg != this_param)
43066 emit_move_insn (this_param, this_reg);
43068 fnaddr = XEXP (DECL_RTL (function), 0);
43069 if (TARGET_64BIT)
43071 if (!flag_pic || targetm.binds_local_p (function)
43072 || TARGET_PECOFF)
43074 else
43076 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
43077 tmp = gen_rtx_CONST (Pmode, tmp);
43078 fnaddr = gen_const_mem (Pmode, tmp);
43081 else
43083 if (!flag_pic || targetm.binds_local_p (function))
43085 #if TARGET_MACHO
43086 else if (TARGET_MACHO)
43088 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
43089 fnaddr = XEXP (fnaddr, 0);
43091 #endif /* TARGET_MACHO */
43092 else
43094 tmp = gen_rtx_REG (Pmode, CX_REG);
43095 output_set_got (tmp, NULL_RTX);
43097 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
43098 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
43099 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
43100 fnaddr = gen_const_mem (Pmode, fnaddr);
43104 /* Our sibling call patterns do not allow memories, because we have no
43105 predicate that can distinguish between frame and non-frame memory.
43106 For our purposes here, we can get away with (ab)using a jump pattern,
43107 because we're going to do no optimization. */
43108 if (MEM_P (fnaddr))
43110 if (sibcall_insn_operand (fnaddr, word_mode))
43112 fnaddr = XEXP (DECL_RTL (function), 0);
43113 tmp = gen_rtx_MEM (QImode, fnaddr);
43114 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43115 tmp = emit_call_insn (tmp);
43116 SIBLING_CALL_P (tmp) = 1;
43118 else
43119 emit_jump_insn (gen_indirect_jump (fnaddr));
43121 else
43123 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
43125 // CM_LARGE_PIC always uses pseudo PIC register which is
43126 // uninitialized. Since FUNCTION is local and calling it
43127 // doesn't go through PLT, we use scratch register %r11 as
43128 // PIC register and initialize it here.
43129 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
43130 ix86_init_large_pic_reg (tmp_regno);
43131 fnaddr = legitimize_pic_address (fnaddr,
43132 gen_rtx_REG (Pmode, tmp_regno));
43135 if (!sibcall_insn_operand (fnaddr, word_mode))
43137 tmp = gen_rtx_REG (word_mode, tmp_regno);
43138 if (GET_MODE (fnaddr) != word_mode)
43139 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
43140 emit_move_insn (tmp, fnaddr);
43141 fnaddr = tmp;
43144 tmp = gen_rtx_MEM (QImode, fnaddr);
43145 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43146 tmp = emit_call_insn (tmp);
43147 SIBLING_CALL_P (tmp) = 1;
43149 emit_barrier ();
43151 /* Emit just enough of rest_of_compilation to get the insns emitted.
43152 Note that use_thunk calls assemble_start_function et al. */
43153 insn = get_insns ();
43154 shorten_branches (insn);
43155 final_start_function (insn, file, 1);
43156 final (insn, file, 1);
43157 final_end_function ();
43160 static void
43161 x86_file_start (void)
43163 default_file_start ();
43164 if (TARGET_16BIT)
43165 fputs ("\t.code16gcc\n", asm_out_file);
43166 #if TARGET_MACHO
43167 darwin_file_start ();
43168 #endif
43169 if (X86_FILE_START_VERSION_DIRECTIVE)
43170 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
43171 if (X86_FILE_START_FLTUSED)
43172 fputs ("\t.global\t__fltused\n", asm_out_file);
43173 if (ix86_asm_dialect == ASM_INTEL)
43174 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
43178 x86_field_alignment (tree field, int computed)
43180 machine_mode mode;
43181 tree type = TREE_TYPE (field);
43183 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
43184 return computed;
43185 mode = TYPE_MODE (strip_array_types (type));
43186 if (mode == DFmode || mode == DCmode
43187 || GET_MODE_CLASS (mode) == MODE_INT
43188 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
43189 return MIN (32, computed);
43190 return computed;
43193 /* Print call to TARGET to FILE. */
43195 static void
43196 x86_print_call_or_nop (FILE *file, const char *target)
43198 if (flag_nop_mcount)
43199 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
43200 else
43201 fprintf (file, "1:\tcall\t%s\n", target);
43204 /* Output assembler code to FILE to increment profiler label # LABELNO
43205 for profiling a function entry. */
43206 void
43207 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
43209 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
43210 : MCOUNT_NAME);
43211 if (TARGET_64BIT)
43213 #ifndef NO_PROFILE_COUNTERS
43214 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
43215 #endif
43217 if (!TARGET_PECOFF && flag_pic)
43218 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
43219 else
43220 x86_print_call_or_nop (file, mcount_name);
43222 else if (flag_pic)
43224 #ifndef NO_PROFILE_COUNTERS
43225 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
43226 LPREFIX, labelno);
43227 #endif
43228 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
43230 else
43232 #ifndef NO_PROFILE_COUNTERS
43233 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
43234 LPREFIX, labelno);
43235 #endif
43236 x86_print_call_or_nop (file, mcount_name);
43239 if (flag_record_mcount)
43241 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
43242 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
43243 fprintf (file, "\t.previous\n");
43247 /* We don't have exact information about the insn sizes, but we may assume
43248 quite safely that we are informed about all 1 byte insns and memory
43249 address sizes. This is enough to eliminate unnecessary padding in
43250 99% of cases. */
43252 static int
43253 min_insn_size (rtx_insn *insn)
43255 int l = 0, len;
43257 if (!INSN_P (insn) || !active_insn_p (insn))
43258 return 0;
43260 /* Discard alignments we've emit and jump instructions. */
43261 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
43262 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
43263 return 0;
43265 /* Important case - calls are always 5 bytes.
43266 It is common to have many calls in the row. */
43267 if (CALL_P (insn)
43268 && symbolic_reference_mentioned_p (PATTERN (insn))
43269 && !SIBLING_CALL_P (insn))
43270 return 5;
43271 len = get_attr_length (insn);
43272 if (len <= 1)
43273 return 1;
43275 /* For normal instructions we rely on get_attr_length being exact,
43276 with a few exceptions. */
43277 if (!JUMP_P (insn))
43279 enum attr_type type = get_attr_type (insn);
43281 switch (type)
43283 case TYPE_MULTI:
43284 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
43285 || asm_noperands (PATTERN (insn)) >= 0)
43286 return 0;
43287 break;
43288 case TYPE_OTHER:
43289 case TYPE_FCMP:
43290 break;
43291 default:
43292 /* Otherwise trust get_attr_length. */
43293 return len;
43296 l = get_attr_length_address (insn);
43297 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
43298 l = 4;
43300 if (l)
43301 return 1+l;
43302 else
43303 return 2;
43306 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43308 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
43309 window. */
43311 static void
43312 ix86_avoid_jump_mispredicts (void)
43314 rtx_insn *insn, *start = get_insns ();
43315 int nbytes = 0, njumps = 0;
43316 bool isjump = false;
43318 /* Look for all minimal intervals of instructions containing 4 jumps.
43319 The intervals are bounded by START and INSN. NBYTES is the total
43320 size of instructions in the interval including INSN and not including
43321 START. When the NBYTES is smaller than 16 bytes, it is possible
43322 that the end of START and INSN ends up in the same 16byte page.
43324 The smallest offset in the page INSN can start is the case where START
43325 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
43326 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
43328 Don't consider asm goto as jump, while it can contain a jump, it doesn't
43329 have to, control transfer to label(s) can be performed through other
43330 means, and also we estimate minimum length of all asm stmts as 0. */
43331 for (insn = start; insn; insn = NEXT_INSN (insn))
43333 int min_size;
43335 if (LABEL_P (insn))
43337 int align = label_to_alignment (insn);
43338 int max_skip = label_to_max_skip (insn);
43340 if (max_skip > 15)
43341 max_skip = 15;
43342 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
43343 already in the current 16 byte page, because otherwise
43344 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
43345 bytes to reach 16 byte boundary. */
43346 if (align <= 0
43347 || (align <= 3 && max_skip != (1 << align) - 1))
43348 max_skip = 0;
43349 if (dump_file)
43350 fprintf (dump_file, "Label %i with max_skip %i\n",
43351 INSN_UID (insn), max_skip);
43352 if (max_skip)
43354 while (nbytes + max_skip >= 16)
43356 start = NEXT_INSN (start);
43357 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43358 || CALL_P (start))
43359 njumps--, isjump = true;
43360 else
43361 isjump = false;
43362 nbytes -= min_insn_size (start);
43365 continue;
43368 min_size = min_insn_size (insn);
43369 nbytes += min_size;
43370 if (dump_file)
43371 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
43372 INSN_UID (insn), min_size);
43373 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
43374 || CALL_P (insn))
43375 njumps++;
43376 else
43377 continue;
43379 while (njumps > 3)
43381 start = NEXT_INSN (start);
43382 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43383 || CALL_P (start))
43384 njumps--, isjump = true;
43385 else
43386 isjump = false;
43387 nbytes -= min_insn_size (start);
43389 gcc_assert (njumps >= 0);
43390 if (dump_file)
43391 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
43392 INSN_UID (start), INSN_UID (insn), nbytes);
43394 if (njumps == 3 && isjump && nbytes < 16)
43396 int padsize = 15 - nbytes + min_insn_size (insn);
43398 if (dump_file)
43399 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
43400 INSN_UID (insn), padsize);
43401 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
43405 #endif
43407 /* AMD Athlon works faster
43408 when RET is not destination of conditional jump or directly preceded
43409 by other jump instruction. We avoid the penalty by inserting NOP just
43410 before the RET instructions in such cases. */
43411 static void
43412 ix86_pad_returns (void)
43414 edge e;
43415 edge_iterator ei;
43417 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43419 basic_block bb = e->src;
43420 rtx_insn *ret = BB_END (bb);
43421 rtx_insn *prev;
43422 bool replace = false;
43424 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
43425 || optimize_bb_for_size_p (bb))
43426 continue;
43427 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
43428 if (active_insn_p (prev) || LABEL_P (prev))
43429 break;
43430 if (prev && LABEL_P (prev))
43432 edge e;
43433 edge_iterator ei;
43435 FOR_EACH_EDGE (e, ei, bb->preds)
43436 if (EDGE_FREQUENCY (e) && e->src->index >= 0
43437 && !(e->flags & EDGE_FALLTHRU))
43439 replace = true;
43440 break;
43443 if (!replace)
43445 prev = prev_active_insn (ret);
43446 if (prev
43447 && ((JUMP_P (prev) && any_condjump_p (prev))
43448 || CALL_P (prev)))
43449 replace = true;
43450 /* Empty functions get branch mispredict even when
43451 the jump destination is not visible to us. */
43452 if (!prev && !optimize_function_for_size_p (cfun))
43453 replace = true;
43455 if (replace)
43457 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
43458 delete_insn (ret);
43463 /* Count the minimum number of instructions in BB. Return 4 if the
43464 number of instructions >= 4. */
43466 static int
43467 ix86_count_insn_bb (basic_block bb)
43469 rtx_insn *insn;
43470 int insn_count = 0;
43472 /* Count number of instructions in this block. Return 4 if the number
43473 of instructions >= 4. */
43474 FOR_BB_INSNS (bb, insn)
43476 /* Only happen in exit blocks. */
43477 if (JUMP_P (insn)
43478 && ANY_RETURN_P (PATTERN (insn)))
43479 break;
43481 if (NONDEBUG_INSN_P (insn)
43482 && GET_CODE (PATTERN (insn)) != USE
43483 && GET_CODE (PATTERN (insn)) != CLOBBER)
43485 insn_count++;
43486 if (insn_count >= 4)
43487 return insn_count;
43491 return insn_count;
43495 /* Count the minimum number of instructions in code path in BB.
43496 Return 4 if the number of instructions >= 4. */
43498 static int
43499 ix86_count_insn (basic_block bb)
43501 edge e;
43502 edge_iterator ei;
43503 int min_prev_count;
43505 /* Only bother counting instructions along paths with no
43506 more than 2 basic blocks between entry and exit. Given
43507 that BB has an edge to exit, determine if a predecessor
43508 of BB has an edge from entry. If so, compute the number
43509 of instructions in the predecessor block. If there
43510 happen to be multiple such blocks, compute the minimum. */
43511 min_prev_count = 4;
43512 FOR_EACH_EDGE (e, ei, bb->preds)
43514 edge prev_e;
43515 edge_iterator prev_ei;
43517 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43519 min_prev_count = 0;
43520 break;
43522 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
43524 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43526 int count = ix86_count_insn_bb (e->src);
43527 if (count < min_prev_count)
43528 min_prev_count = count;
43529 break;
43534 if (min_prev_count < 4)
43535 min_prev_count += ix86_count_insn_bb (bb);
43537 return min_prev_count;
43540 /* Pad short function to 4 instructions. */
43542 static void
43543 ix86_pad_short_function (void)
43545 edge e;
43546 edge_iterator ei;
43548 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43550 rtx_insn *ret = BB_END (e->src);
43551 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
43553 int insn_count = ix86_count_insn (e->src);
43555 /* Pad short function. */
43556 if (insn_count < 4)
43558 rtx_insn *insn = ret;
43560 /* Find epilogue. */
43561 while (insn
43562 && (!NOTE_P (insn)
43563 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
43564 insn = PREV_INSN (insn);
43566 if (!insn)
43567 insn = ret;
43569 /* Two NOPs count as one instruction. */
43570 insn_count = 2 * (4 - insn_count);
43571 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
43577 /* Fix up a Windows system unwinder issue. If an EH region falls through into
43578 the epilogue, the Windows system unwinder will apply epilogue logic and
43579 produce incorrect offsets. This can be avoided by adding a nop between
43580 the last insn that can throw and the first insn of the epilogue. */
43582 static void
43583 ix86_seh_fixup_eh_fallthru (void)
43585 edge e;
43586 edge_iterator ei;
43588 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43590 rtx_insn *insn, *next;
43592 /* Find the beginning of the epilogue. */
43593 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
43594 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
43595 break;
43596 if (insn == NULL)
43597 continue;
43599 /* We only care about preceding insns that can throw. */
43600 insn = prev_active_insn (insn);
43601 if (insn == NULL || !can_throw_internal (insn))
43602 continue;
43604 /* Do not separate calls from their debug information. */
43605 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
43606 if (NOTE_P (next)
43607 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
43608 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
43609 insn = next;
43610 else
43611 break;
43613 emit_insn_after (gen_nops (const1_rtx), insn);
43617 /* Implement machine specific optimizations. We implement padding of returns
43618 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
43619 static void
43620 ix86_reorg (void)
43622 /* We are freeing block_for_insn in the toplev to keep compatibility
43623 with old MDEP_REORGS that are not CFG based. Recompute it now. */
43624 compute_bb_for_insn ();
43626 if (TARGET_SEH && current_function_has_exception_handlers ())
43627 ix86_seh_fixup_eh_fallthru ();
43629 if (optimize && optimize_function_for_speed_p (cfun))
43631 if (TARGET_PAD_SHORT_FUNCTION)
43632 ix86_pad_short_function ();
43633 else if (TARGET_PAD_RETURNS)
43634 ix86_pad_returns ();
43635 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43636 if (TARGET_FOUR_JUMP_LIMIT)
43637 ix86_avoid_jump_mispredicts ();
43638 #endif
43642 /* Return nonzero when QImode register that must be represented via REX prefix
43643 is used. */
43644 bool
43645 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
43647 int i;
43648 extract_insn_cached (insn);
43649 for (i = 0; i < recog_data.n_operands; i++)
43650 if (GENERAL_REG_P (recog_data.operand[i])
43651 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
43652 return true;
43653 return false;
43656 /* Return true when INSN mentions register that must be encoded using REX
43657 prefix. */
43658 bool
43659 x86_extended_reg_mentioned_p (rtx insn)
43661 subrtx_iterator::array_type array;
43662 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
43664 const_rtx x = *iter;
43665 if (REG_P (x)
43666 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
43667 return true;
43669 return false;
43672 /* If profitable, negate (without causing overflow) integer constant
43673 of mode MODE at location LOC. Return true in this case. */
43674 bool
43675 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
43677 HOST_WIDE_INT val;
43679 if (!CONST_INT_P (*loc))
43680 return false;
43682 switch (mode)
43684 case DImode:
43685 /* DImode x86_64 constants must fit in 32 bits. */
43686 gcc_assert (x86_64_immediate_operand (*loc, mode));
43688 mode = SImode;
43689 break;
43691 case SImode:
43692 case HImode:
43693 case QImode:
43694 break;
43696 default:
43697 gcc_unreachable ();
43700 /* Avoid overflows. */
43701 if (mode_signbit_p (mode, *loc))
43702 return false;
43704 val = INTVAL (*loc);
43706 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
43707 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
43708 if ((val < 0 && val != -128)
43709 || val == 128)
43711 *loc = GEN_INT (-val);
43712 return true;
43715 return false;
43718 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
43719 optabs would emit if we didn't have TFmode patterns. */
43721 void
43722 x86_emit_floatuns (rtx operands[2])
43724 rtx_code_label *neglab, *donelab;
43725 rtx i0, i1, f0, in, out;
43726 machine_mode mode, inmode;
43728 inmode = GET_MODE (operands[1]);
43729 gcc_assert (inmode == SImode || inmode == DImode);
43731 out = operands[0];
43732 in = force_reg (inmode, operands[1]);
43733 mode = GET_MODE (out);
43734 neglab = gen_label_rtx ();
43735 donelab = gen_label_rtx ();
43736 f0 = gen_reg_rtx (mode);
43738 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
43740 expand_float (out, in, 0);
43742 emit_jump_insn (gen_jump (donelab));
43743 emit_barrier ();
43745 emit_label (neglab);
43747 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
43748 1, OPTAB_DIRECT);
43749 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
43750 1, OPTAB_DIRECT);
43751 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
43753 expand_float (f0, i0, 0);
43755 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
43757 emit_label (donelab);
43760 static bool canonicalize_perm (struct expand_vec_perm_d *d);
43761 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
43762 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
43763 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
43765 /* Get a vector mode of the same size as the original but with elements
43766 twice as wide. This is only guaranteed to apply to integral vectors. */
43768 static inline machine_mode
43769 get_mode_wider_vector (machine_mode o)
43771 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
43772 machine_mode n = GET_MODE_WIDER_MODE (o);
43773 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
43774 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
43775 return n;
43778 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
43779 fill target with val via vec_duplicate. */
43781 static bool
43782 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
43784 bool ok;
43785 rtx_insn *insn;
43786 rtx dup;
43788 /* First attempt to recognize VAL as-is. */
43789 dup = gen_rtx_VEC_DUPLICATE (mode, val);
43790 insn = emit_insn (gen_rtx_SET (target, dup));
43791 if (recog_memoized (insn) < 0)
43793 rtx_insn *seq;
43794 /* If that fails, force VAL into a register. */
43796 start_sequence ();
43797 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
43798 seq = get_insns ();
43799 end_sequence ();
43800 if (seq)
43801 emit_insn_before (seq, insn);
43803 ok = recog_memoized (insn) >= 0;
43804 gcc_assert (ok);
43806 return true;
43809 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43810 with all elements equal to VAR. Return true if successful. */
43812 static bool
43813 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
43814 rtx target, rtx val)
43816 bool ok;
43818 switch (mode)
43820 case V2SImode:
43821 case V2SFmode:
43822 if (!mmx_ok)
43823 return false;
43824 /* FALLTHRU */
43826 case V4DFmode:
43827 case V4DImode:
43828 case V8SFmode:
43829 case V8SImode:
43830 case V2DFmode:
43831 case V2DImode:
43832 case V4SFmode:
43833 case V4SImode:
43834 case V16SImode:
43835 case V8DImode:
43836 case V16SFmode:
43837 case V8DFmode:
43838 return ix86_vector_duplicate_value (mode, target, val);
43840 case V4HImode:
43841 if (!mmx_ok)
43842 return false;
43843 if (TARGET_SSE || TARGET_3DNOW_A)
43845 rtx x;
43847 val = gen_lowpart (SImode, val);
43848 x = gen_rtx_TRUNCATE (HImode, val);
43849 x = gen_rtx_VEC_DUPLICATE (mode, x);
43850 emit_insn (gen_rtx_SET (target, x));
43851 return true;
43853 goto widen;
43855 case V8QImode:
43856 if (!mmx_ok)
43857 return false;
43858 goto widen;
43860 case V8HImode:
43861 if (TARGET_AVX2)
43862 return ix86_vector_duplicate_value (mode, target, val);
43864 if (TARGET_SSE2)
43866 struct expand_vec_perm_d dperm;
43867 rtx tmp1, tmp2;
43869 permute:
43870 memset (&dperm, 0, sizeof (dperm));
43871 dperm.target = target;
43872 dperm.vmode = mode;
43873 dperm.nelt = GET_MODE_NUNITS (mode);
43874 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
43875 dperm.one_operand_p = true;
43877 /* Extend to SImode using a paradoxical SUBREG. */
43878 tmp1 = gen_reg_rtx (SImode);
43879 emit_move_insn (tmp1, gen_lowpart (SImode, val));
43881 /* Insert the SImode value as low element of a V4SImode vector. */
43882 tmp2 = gen_reg_rtx (V4SImode);
43883 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
43884 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
43886 ok = (expand_vec_perm_1 (&dperm)
43887 || expand_vec_perm_broadcast_1 (&dperm));
43888 gcc_assert (ok);
43889 return ok;
43891 goto widen;
43893 case V16QImode:
43894 if (TARGET_AVX2)
43895 return ix86_vector_duplicate_value (mode, target, val);
43897 if (TARGET_SSE2)
43898 goto permute;
43899 goto widen;
43901 widen:
43902 /* Replicate the value once into the next wider mode and recurse. */
43904 machine_mode smode, wsmode, wvmode;
43905 rtx x;
43907 smode = GET_MODE_INNER (mode);
43908 wvmode = get_mode_wider_vector (mode);
43909 wsmode = GET_MODE_INNER (wvmode);
43911 val = convert_modes (wsmode, smode, val, true);
43912 x = expand_simple_binop (wsmode, ASHIFT, val,
43913 GEN_INT (GET_MODE_BITSIZE (smode)),
43914 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43915 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
43917 x = gen_reg_rtx (wvmode);
43918 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
43919 gcc_assert (ok);
43920 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
43921 return ok;
43924 case V16HImode:
43925 case V32QImode:
43926 if (TARGET_AVX2)
43927 return ix86_vector_duplicate_value (mode, target, val);
43928 else
43930 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
43931 rtx x = gen_reg_rtx (hvmode);
43933 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43934 gcc_assert (ok);
43936 x = gen_rtx_VEC_CONCAT (mode, x, x);
43937 emit_insn (gen_rtx_SET (target, x));
43939 return true;
43941 case V64QImode:
43942 case V32HImode:
43943 if (TARGET_AVX512BW)
43944 return ix86_vector_duplicate_value (mode, target, val);
43945 else
43947 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
43948 rtx x = gen_reg_rtx (hvmode);
43950 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43951 gcc_assert (ok);
43953 x = gen_rtx_VEC_CONCAT (mode, x, x);
43954 emit_insn (gen_rtx_SET (target, x));
43956 return true;
43958 default:
43959 return false;
43963 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43964 whose ONE_VAR element is VAR, and other elements are zero. Return true
43965 if successful. */
43967 static bool
43968 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
43969 rtx target, rtx var, int one_var)
43971 machine_mode vsimode;
43972 rtx new_target;
43973 rtx x, tmp;
43974 bool use_vector_set = false;
43976 switch (mode)
43978 case V2DImode:
43979 /* For SSE4.1, we normally use vector set. But if the second
43980 element is zero and inter-unit moves are OK, we use movq
43981 instead. */
43982 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
43983 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
43984 && one_var == 0));
43985 break;
43986 case V16QImode:
43987 case V4SImode:
43988 case V4SFmode:
43989 use_vector_set = TARGET_SSE4_1;
43990 break;
43991 case V8HImode:
43992 use_vector_set = TARGET_SSE2;
43993 break;
43994 case V4HImode:
43995 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
43996 break;
43997 case V32QImode:
43998 case V16HImode:
43999 case V8SImode:
44000 case V8SFmode:
44001 case V4DFmode:
44002 use_vector_set = TARGET_AVX;
44003 break;
44004 case V4DImode:
44005 /* Use ix86_expand_vector_set in 64bit mode only. */
44006 use_vector_set = TARGET_AVX && TARGET_64BIT;
44007 break;
44008 default:
44009 break;
44012 if (use_vector_set)
44014 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
44015 var = force_reg (GET_MODE_INNER (mode), var);
44016 ix86_expand_vector_set (mmx_ok, target, var, one_var);
44017 return true;
44020 switch (mode)
44022 case V2SFmode:
44023 case V2SImode:
44024 if (!mmx_ok)
44025 return false;
44026 /* FALLTHRU */
44028 case V2DFmode:
44029 case V2DImode:
44030 if (one_var != 0)
44031 return false;
44032 var = force_reg (GET_MODE_INNER (mode), var);
44033 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
44034 emit_insn (gen_rtx_SET (target, x));
44035 return true;
44037 case V4SFmode:
44038 case V4SImode:
44039 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
44040 new_target = gen_reg_rtx (mode);
44041 else
44042 new_target = target;
44043 var = force_reg (GET_MODE_INNER (mode), var);
44044 x = gen_rtx_VEC_DUPLICATE (mode, var);
44045 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
44046 emit_insn (gen_rtx_SET (new_target, x));
44047 if (one_var != 0)
44049 /* We need to shuffle the value to the correct position, so
44050 create a new pseudo to store the intermediate result. */
44052 /* With SSE2, we can use the integer shuffle insns. */
44053 if (mode != V4SFmode && TARGET_SSE2)
44055 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
44056 const1_rtx,
44057 GEN_INT (one_var == 1 ? 0 : 1),
44058 GEN_INT (one_var == 2 ? 0 : 1),
44059 GEN_INT (one_var == 3 ? 0 : 1)));
44060 if (target != new_target)
44061 emit_move_insn (target, new_target);
44062 return true;
44065 /* Otherwise convert the intermediate result to V4SFmode and
44066 use the SSE1 shuffle instructions. */
44067 if (mode != V4SFmode)
44069 tmp = gen_reg_rtx (V4SFmode);
44070 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
44072 else
44073 tmp = new_target;
44075 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
44076 const1_rtx,
44077 GEN_INT (one_var == 1 ? 0 : 1),
44078 GEN_INT (one_var == 2 ? 0+4 : 1+4),
44079 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
44081 if (mode != V4SFmode)
44082 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
44083 else if (tmp != target)
44084 emit_move_insn (target, tmp);
44086 else if (target != new_target)
44087 emit_move_insn (target, new_target);
44088 return true;
44090 case V8HImode:
44091 case V16QImode:
44092 vsimode = V4SImode;
44093 goto widen;
44094 case V4HImode:
44095 case V8QImode:
44096 if (!mmx_ok)
44097 return false;
44098 vsimode = V2SImode;
44099 goto widen;
44100 widen:
44101 if (one_var != 0)
44102 return false;
44104 /* Zero extend the variable element to SImode and recurse. */
44105 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
44107 x = gen_reg_rtx (vsimode);
44108 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
44109 var, one_var))
44110 gcc_unreachable ();
44112 emit_move_insn (target, gen_lowpart (mode, x));
44113 return true;
44115 default:
44116 return false;
44120 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
44121 consisting of the values in VALS. It is known that all elements
44122 except ONE_VAR are constants. Return true if successful. */
44124 static bool
44125 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
44126 rtx target, rtx vals, int one_var)
44128 rtx var = XVECEXP (vals, 0, one_var);
44129 machine_mode wmode;
44130 rtx const_vec, x;
44132 const_vec = copy_rtx (vals);
44133 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
44134 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
44136 switch (mode)
44138 case V2DFmode:
44139 case V2DImode:
44140 case V2SFmode:
44141 case V2SImode:
44142 /* For the two element vectors, it's just as easy to use
44143 the general case. */
44144 return false;
44146 case V4DImode:
44147 /* Use ix86_expand_vector_set in 64bit mode only. */
44148 if (!TARGET_64BIT)
44149 return false;
44150 case V4DFmode:
44151 case V8SFmode:
44152 case V8SImode:
44153 case V16HImode:
44154 case V32QImode:
44155 case V4SFmode:
44156 case V4SImode:
44157 case V8HImode:
44158 case V4HImode:
44159 break;
44161 case V16QImode:
44162 if (TARGET_SSE4_1)
44163 break;
44164 wmode = V8HImode;
44165 goto widen;
44166 case V8QImode:
44167 wmode = V4HImode;
44168 goto widen;
44169 widen:
44170 /* There's no way to set one QImode entry easily. Combine
44171 the variable value with its adjacent constant value, and
44172 promote to an HImode set. */
44173 x = XVECEXP (vals, 0, one_var ^ 1);
44174 if (one_var & 1)
44176 var = convert_modes (HImode, QImode, var, true);
44177 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
44178 NULL_RTX, 1, OPTAB_LIB_WIDEN);
44179 x = GEN_INT (INTVAL (x) & 0xff);
44181 else
44183 var = convert_modes (HImode, QImode, var, true);
44184 x = gen_int_mode (INTVAL (x) << 8, HImode);
44186 if (x != const0_rtx)
44187 var = expand_simple_binop (HImode, IOR, var, x, var,
44188 1, OPTAB_LIB_WIDEN);
44190 x = gen_reg_rtx (wmode);
44191 emit_move_insn (x, gen_lowpart (wmode, const_vec));
44192 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
44194 emit_move_insn (target, gen_lowpart (mode, x));
44195 return true;
44197 default:
44198 return false;
44201 emit_move_insn (target, const_vec);
44202 ix86_expand_vector_set (mmx_ok, target, var, one_var);
44203 return true;
44206 /* A subroutine of ix86_expand_vector_init_general. Use vector
44207 concatenate to handle the most general case: all values variable,
44208 and none identical. */
44210 static void
44211 ix86_expand_vector_init_concat (machine_mode mode,
44212 rtx target, rtx *ops, int n)
44214 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
44215 rtx first[16], second[8], third[4];
44216 rtvec v;
44217 int i, j;
44219 switch (n)
44221 case 2:
44222 switch (mode)
44224 case V16SImode:
44225 cmode = V8SImode;
44226 break;
44227 case V16SFmode:
44228 cmode = V8SFmode;
44229 break;
44230 case V8DImode:
44231 cmode = V4DImode;
44232 break;
44233 case V8DFmode:
44234 cmode = V4DFmode;
44235 break;
44236 case V8SImode:
44237 cmode = V4SImode;
44238 break;
44239 case V8SFmode:
44240 cmode = V4SFmode;
44241 break;
44242 case V4DImode:
44243 cmode = V2DImode;
44244 break;
44245 case V4DFmode:
44246 cmode = V2DFmode;
44247 break;
44248 case V4SImode:
44249 cmode = V2SImode;
44250 break;
44251 case V4SFmode:
44252 cmode = V2SFmode;
44253 break;
44254 case V2DImode:
44255 cmode = DImode;
44256 break;
44257 case V2SImode:
44258 cmode = SImode;
44259 break;
44260 case V2DFmode:
44261 cmode = DFmode;
44262 break;
44263 case V2SFmode:
44264 cmode = SFmode;
44265 break;
44266 default:
44267 gcc_unreachable ();
44270 if (!register_operand (ops[1], cmode))
44271 ops[1] = force_reg (cmode, ops[1]);
44272 if (!register_operand (ops[0], cmode))
44273 ops[0] = force_reg (cmode, ops[0]);
44274 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0],
44275 ops[1])));
44276 break;
44278 case 4:
44279 switch (mode)
44281 case V4DImode:
44282 cmode = V2DImode;
44283 break;
44284 case V4DFmode:
44285 cmode = V2DFmode;
44286 break;
44287 case V4SImode:
44288 cmode = V2SImode;
44289 break;
44290 case V4SFmode:
44291 cmode = V2SFmode;
44292 break;
44293 default:
44294 gcc_unreachable ();
44296 goto half;
44298 case 8:
44299 switch (mode)
44301 case V8DImode:
44302 cmode = V2DImode;
44303 hmode = V4DImode;
44304 break;
44305 case V8DFmode:
44306 cmode = V2DFmode;
44307 hmode = V4DFmode;
44308 break;
44309 case V8SImode:
44310 cmode = V2SImode;
44311 hmode = V4SImode;
44312 break;
44313 case V8SFmode:
44314 cmode = V2SFmode;
44315 hmode = V4SFmode;
44316 break;
44317 default:
44318 gcc_unreachable ();
44320 goto half;
44322 case 16:
44323 switch (mode)
44325 case V16SImode:
44326 cmode = V2SImode;
44327 hmode = V4SImode;
44328 gmode = V8SImode;
44329 break;
44330 case V16SFmode:
44331 cmode = V2SFmode;
44332 hmode = V4SFmode;
44333 gmode = V8SFmode;
44334 break;
44335 default:
44336 gcc_unreachable ();
44338 goto half;
44340 half:
44341 /* FIXME: We process inputs backward to help RA. PR 36222. */
44342 i = n - 1;
44343 j = (n >> 1) - 1;
44344 for (; i > 0; i -= 2, j--)
44346 first[j] = gen_reg_rtx (cmode);
44347 v = gen_rtvec (2, ops[i - 1], ops[i]);
44348 ix86_expand_vector_init (false, first[j],
44349 gen_rtx_PARALLEL (cmode, v));
44352 n >>= 1;
44353 if (n > 4)
44355 gcc_assert (hmode != VOIDmode);
44356 gcc_assert (gmode != VOIDmode);
44357 for (i = j = 0; i < n; i += 2, j++)
44359 second[j] = gen_reg_rtx (hmode);
44360 ix86_expand_vector_init_concat (hmode, second [j],
44361 &first [i], 2);
44363 n >>= 1;
44364 for (i = j = 0; i < n; i += 2, j++)
44366 third[j] = gen_reg_rtx (gmode);
44367 ix86_expand_vector_init_concat (gmode, third[j],
44368 &second[i], 2);
44370 n >>= 1;
44371 ix86_expand_vector_init_concat (mode, target, third, n);
44373 else if (n > 2)
44375 gcc_assert (hmode != VOIDmode);
44376 for (i = j = 0; i < n; i += 2, j++)
44378 second[j] = gen_reg_rtx (hmode);
44379 ix86_expand_vector_init_concat (hmode, second [j],
44380 &first [i], 2);
44382 n >>= 1;
44383 ix86_expand_vector_init_concat (mode, target, second, n);
44385 else
44386 ix86_expand_vector_init_concat (mode, target, first, n);
44387 break;
44389 default:
44390 gcc_unreachable ();
44394 /* A subroutine of ix86_expand_vector_init_general. Use vector
44395 interleave to handle the most general case: all values variable,
44396 and none identical. */
44398 static void
44399 ix86_expand_vector_init_interleave (machine_mode mode,
44400 rtx target, rtx *ops, int n)
44402 machine_mode first_imode, second_imode, third_imode, inner_mode;
44403 int i, j;
44404 rtx op0, op1;
44405 rtx (*gen_load_even) (rtx, rtx, rtx);
44406 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
44407 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
44409 switch (mode)
44411 case V8HImode:
44412 gen_load_even = gen_vec_setv8hi;
44413 gen_interleave_first_low = gen_vec_interleave_lowv4si;
44414 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44415 inner_mode = HImode;
44416 first_imode = V4SImode;
44417 second_imode = V2DImode;
44418 third_imode = VOIDmode;
44419 break;
44420 case V16QImode:
44421 gen_load_even = gen_vec_setv16qi;
44422 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
44423 gen_interleave_second_low = gen_vec_interleave_lowv4si;
44424 inner_mode = QImode;
44425 first_imode = V8HImode;
44426 second_imode = V4SImode;
44427 third_imode = V2DImode;
44428 break;
44429 default:
44430 gcc_unreachable ();
44433 for (i = 0; i < n; i++)
44435 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
44436 op0 = gen_reg_rtx (SImode);
44437 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
44439 /* Insert the SImode value as low element of V4SImode vector. */
44440 op1 = gen_reg_rtx (V4SImode);
44441 op0 = gen_rtx_VEC_MERGE (V4SImode,
44442 gen_rtx_VEC_DUPLICATE (V4SImode,
44443 op0),
44444 CONST0_RTX (V4SImode),
44445 const1_rtx);
44446 emit_insn (gen_rtx_SET (op1, op0));
44448 /* Cast the V4SImode vector back to a vector in orignal mode. */
44449 op0 = gen_reg_rtx (mode);
44450 emit_move_insn (op0, gen_lowpart (mode, op1));
44452 /* Load even elements into the second position. */
44453 emit_insn (gen_load_even (op0,
44454 force_reg (inner_mode,
44455 ops [i + i + 1]),
44456 const1_rtx));
44458 /* Cast vector to FIRST_IMODE vector. */
44459 ops[i] = gen_reg_rtx (first_imode);
44460 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
44463 /* Interleave low FIRST_IMODE vectors. */
44464 for (i = j = 0; i < n; i += 2, j++)
44466 op0 = gen_reg_rtx (first_imode);
44467 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
44469 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
44470 ops[j] = gen_reg_rtx (second_imode);
44471 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
44474 /* Interleave low SECOND_IMODE vectors. */
44475 switch (second_imode)
44477 case V4SImode:
44478 for (i = j = 0; i < n / 2; i += 2, j++)
44480 op0 = gen_reg_rtx (second_imode);
44481 emit_insn (gen_interleave_second_low (op0, ops[i],
44482 ops[i + 1]));
44484 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
44485 vector. */
44486 ops[j] = gen_reg_rtx (third_imode);
44487 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
44489 second_imode = V2DImode;
44490 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44491 /* FALLTHRU */
44493 case V2DImode:
44494 op0 = gen_reg_rtx (second_imode);
44495 emit_insn (gen_interleave_second_low (op0, ops[0],
44496 ops[1]));
44498 /* Cast the SECOND_IMODE vector back to a vector on original
44499 mode. */
44500 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0)));
44501 break;
44503 default:
44504 gcc_unreachable ();
44508 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
44509 all values variable, and none identical. */
44511 static void
44512 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
44513 rtx target, rtx vals)
44515 rtx ops[64], op0, op1, op2, op3, op4, op5;
44516 machine_mode half_mode = VOIDmode;
44517 machine_mode quarter_mode = VOIDmode;
44518 int n, i;
44520 switch (mode)
44522 case V2SFmode:
44523 case V2SImode:
44524 if (!mmx_ok && !TARGET_SSE)
44525 break;
44526 /* FALLTHRU */
44528 case V16SImode:
44529 case V16SFmode:
44530 case V8DFmode:
44531 case V8DImode:
44532 case V8SFmode:
44533 case V8SImode:
44534 case V4DFmode:
44535 case V4DImode:
44536 case V4SFmode:
44537 case V4SImode:
44538 case V2DFmode:
44539 case V2DImode:
44540 n = GET_MODE_NUNITS (mode);
44541 for (i = 0; i < n; i++)
44542 ops[i] = XVECEXP (vals, 0, i);
44543 ix86_expand_vector_init_concat (mode, target, ops, n);
44544 return;
44546 case V32QImode:
44547 half_mode = V16QImode;
44548 goto half;
44550 case V16HImode:
44551 half_mode = V8HImode;
44552 goto half;
44554 half:
44555 n = GET_MODE_NUNITS (mode);
44556 for (i = 0; i < n; i++)
44557 ops[i] = XVECEXP (vals, 0, i);
44558 op0 = gen_reg_rtx (half_mode);
44559 op1 = gen_reg_rtx (half_mode);
44560 ix86_expand_vector_init_interleave (half_mode, op0, ops,
44561 n >> 2);
44562 ix86_expand_vector_init_interleave (half_mode, op1,
44563 &ops [n >> 1], n >> 2);
44564 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1)));
44565 return;
44567 case V64QImode:
44568 quarter_mode = V16QImode;
44569 half_mode = V32QImode;
44570 goto quarter;
44572 case V32HImode:
44573 quarter_mode = V8HImode;
44574 half_mode = V16HImode;
44575 goto quarter;
44577 quarter:
44578 n = GET_MODE_NUNITS (mode);
44579 for (i = 0; i < n; i++)
44580 ops[i] = XVECEXP (vals, 0, i);
44581 op0 = gen_reg_rtx (quarter_mode);
44582 op1 = gen_reg_rtx (quarter_mode);
44583 op2 = gen_reg_rtx (quarter_mode);
44584 op3 = gen_reg_rtx (quarter_mode);
44585 op4 = gen_reg_rtx (half_mode);
44586 op5 = gen_reg_rtx (half_mode);
44587 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
44588 n >> 3);
44589 ix86_expand_vector_init_interleave (quarter_mode, op1,
44590 &ops [n >> 2], n >> 3);
44591 ix86_expand_vector_init_interleave (quarter_mode, op2,
44592 &ops [n >> 1], n >> 3);
44593 ix86_expand_vector_init_interleave (quarter_mode, op3,
44594 &ops [(n >> 1) | (n >> 2)], n >> 3);
44595 emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
44596 emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
44597 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5)));
44598 return;
44600 case V16QImode:
44601 if (!TARGET_SSE4_1)
44602 break;
44603 /* FALLTHRU */
44605 case V8HImode:
44606 if (!TARGET_SSE2)
44607 break;
44609 /* Don't use ix86_expand_vector_init_interleave if we can't
44610 move from GPR to SSE register directly. */
44611 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
44612 break;
44614 n = GET_MODE_NUNITS (mode);
44615 for (i = 0; i < n; i++)
44616 ops[i] = XVECEXP (vals, 0, i);
44617 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
44618 return;
44620 case V4HImode:
44621 case V8QImode:
44622 break;
44624 default:
44625 gcc_unreachable ();
44629 int i, j, n_elts, n_words, n_elt_per_word;
44630 machine_mode inner_mode;
44631 rtx words[4], shift;
44633 inner_mode = GET_MODE_INNER (mode);
44634 n_elts = GET_MODE_NUNITS (mode);
44635 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
44636 n_elt_per_word = n_elts / n_words;
44637 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
44639 for (i = 0; i < n_words; ++i)
44641 rtx word = NULL_RTX;
44643 for (j = 0; j < n_elt_per_word; ++j)
44645 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
44646 elt = convert_modes (word_mode, inner_mode, elt, true);
44648 if (j == 0)
44649 word = elt;
44650 else
44652 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
44653 word, 1, OPTAB_LIB_WIDEN);
44654 word = expand_simple_binop (word_mode, IOR, word, elt,
44655 word, 1, OPTAB_LIB_WIDEN);
44659 words[i] = word;
44662 if (n_words == 1)
44663 emit_move_insn (target, gen_lowpart (mode, words[0]));
44664 else if (n_words == 2)
44666 rtx tmp = gen_reg_rtx (mode);
44667 emit_clobber (tmp);
44668 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
44669 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
44670 emit_move_insn (target, tmp);
44672 else if (n_words == 4)
44674 rtx tmp = gen_reg_rtx (V4SImode);
44675 gcc_assert (word_mode == SImode);
44676 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
44677 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
44678 emit_move_insn (target, gen_lowpart (mode, tmp));
44680 else
44681 gcc_unreachable ();
44685 /* Initialize vector TARGET via VALS. Suppress the use of MMX
44686 instructions unless MMX_OK is true. */
44688 void
44689 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
44691 machine_mode mode = GET_MODE (target);
44692 machine_mode inner_mode = GET_MODE_INNER (mode);
44693 int n_elts = GET_MODE_NUNITS (mode);
44694 int n_var = 0, one_var = -1;
44695 bool all_same = true, all_const_zero = true;
44696 int i;
44697 rtx x;
44699 for (i = 0; i < n_elts; ++i)
44701 x = XVECEXP (vals, 0, i);
44702 if (!(CONST_SCALAR_INT_P (x)
44703 || CONST_DOUBLE_P (x)
44704 || CONST_FIXED_P (x)))
44705 n_var++, one_var = i;
44706 else if (x != CONST0_RTX (inner_mode))
44707 all_const_zero = false;
44708 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
44709 all_same = false;
44712 /* Constants are best loaded from the constant pool. */
44713 if (n_var == 0)
44715 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
44716 return;
44719 /* If all values are identical, broadcast the value. */
44720 if (all_same
44721 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
44722 XVECEXP (vals, 0, 0)))
44723 return;
44725 /* Values where only one field is non-constant are best loaded from
44726 the pool and overwritten via move later. */
44727 if (n_var == 1)
44729 if (all_const_zero
44730 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
44731 XVECEXP (vals, 0, one_var),
44732 one_var))
44733 return;
44735 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
44736 return;
44739 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
44742 void
44743 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
44745 machine_mode mode = GET_MODE (target);
44746 machine_mode inner_mode = GET_MODE_INNER (mode);
44747 machine_mode half_mode;
44748 bool use_vec_merge = false;
44749 rtx tmp;
44750 static rtx (*gen_extract[6][2]) (rtx, rtx)
44752 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
44753 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
44754 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
44755 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
44756 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
44757 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
44759 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
44761 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
44762 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
44763 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
44764 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
44765 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
44766 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
44768 int i, j, n;
44769 machine_mode mmode = VOIDmode;
44770 rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
44772 switch (mode)
44774 case V2SFmode:
44775 case V2SImode:
44776 if (mmx_ok)
44778 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44779 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
44780 if (elt == 0)
44781 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44782 else
44783 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44784 emit_insn (gen_rtx_SET (target, tmp));
44785 return;
44787 break;
44789 case V2DImode:
44790 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
44791 if (use_vec_merge)
44792 break;
44794 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44795 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
44796 if (elt == 0)
44797 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44798 else
44799 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44800 emit_insn (gen_rtx_SET (target, tmp));
44801 return;
44803 case V2DFmode:
44805 rtx op0, op1;
44807 /* For the two element vectors, we implement a VEC_CONCAT with
44808 the extraction of the other element. */
44810 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
44811 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
44813 if (elt == 0)
44814 op0 = val, op1 = tmp;
44815 else
44816 op0 = tmp, op1 = val;
44818 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
44819 emit_insn (gen_rtx_SET (target, tmp));
44821 return;
44823 case V4SFmode:
44824 use_vec_merge = TARGET_SSE4_1;
44825 if (use_vec_merge)
44826 break;
44828 switch (elt)
44830 case 0:
44831 use_vec_merge = true;
44832 break;
44834 case 1:
44835 /* tmp = target = A B C D */
44836 tmp = copy_to_reg (target);
44837 /* target = A A B B */
44838 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
44839 /* target = X A B B */
44840 ix86_expand_vector_set (false, target, val, 0);
44841 /* target = A X C D */
44842 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44843 const1_rtx, const0_rtx,
44844 GEN_INT (2+4), GEN_INT (3+4)));
44845 return;
44847 case 2:
44848 /* tmp = target = A B C D */
44849 tmp = copy_to_reg (target);
44850 /* tmp = X B C D */
44851 ix86_expand_vector_set (false, tmp, val, 0);
44852 /* target = A B X D */
44853 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44854 const0_rtx, const1_rtx,
44855 GEN_INT (0+4), GEN_INT (3+4)));
44856 return;
44858 case 3:
44859 /* tmp = target = A B C D */
44860 tmp = copy_to_reg (target);
44861 /* tmp = X B C D */
44862 ix86_expand_vector_set (false, tmp, val, 0);
44863 /* target = A B X D */
44864 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44865 const0_rtx, const1_rtx,
44866 GEN_INT (2+4), GEN_INT (0+4)));
44867 return;
44869 default:
44870 gcc_unreachable ();
44872 break;
44874 case V4SImode:
44875 use_vec_merge = TARGET_SSE4_1;
44876 if (use_vec_merge)
44877 break;
44879 /* Element 0 handled by vec_merge below. */
44880 if (elt == 0)
44882 use_vec_merge = true;
44883 break;
44886 if (TARGET_SSE2)
44888 /* With SSE2, use integer shuffles to swap element 0 and ELT,
44889 store into element 0, then shuffle them back. */
44891 rtx order[4];
44893 order[0] = GEN_INT (elt);
44894 order[1] = const1_rtx;
44895 order[2] = const2_rtx;
44896 order[3] = GEN_INT (3);
44897 order[elt] = const0_rtx;
44899 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44900 order[1], order[2], order[3]));
44902 ix86_expand_vector_set (false, target, val, 0);
44904 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44905 order[1], order[2], order[3]));
44907 else
44909 /* For SSE1, we have to reuse the V4SF code. */
44910 rtx t = gen_reg_rtx (V4SFmode);
44911 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
44912 emit_move_insn (target, gen_lowpart (mode, t));
44914 return;
44916 case V8HImode:
44917 use_vec_merge = TARGET_SSE2;
44918 break;
44919 case V4HImode:
44920 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44921 break;
44923 case V16QImode:
44924 use_vec_merge = TARGET_SSE4_1;
44925 break;
44927 case V8QImode:
44928 break;
44930 case V32QImode:
44931 half_mode = V16QImode;
44932 j = 0;
44933 n = 16;
44934 goto half;
44936 case V16HImode:
44937 half_mode = V8HImode;
44938 j = 1;
44939 n = 8;
44940 goto half;
44942 case V8SImode:
44943 half_mode = V4SImode;
44944 j = 2;
44945 n = 4;
44946 goto half;
44948 case V4DImode:
44949 half_mode = V2DImode;
44950 j = 3;
44951 n = 2;
44952 goto half;
44954 case V8SFmode:
44955 half_mode = V4SFmode;
44956 j = 4;
44957 n = 4;
44958 goto half;
44960 case V4DFmode:
44961 half_mode = V2DFmode;
44962 j = 5;
44963 n = 2;
44964 goto half;
44966 half:
44967 /* Compute offset. */
44968 i = elt / n;
44969 elt %= n;
44971 gcc_assert (i <= 1);
44973 /* Extract the half. */
44974 tmp = gen_reg_rtx (half_mode);
44975 emit_insn (gen_extract[j][i] (tmp, target));
44977 /* Put val in tmp at elt. */
44978 ix86_expand_vector_set (false, tmp, val, elt);
44980 /* Put it back. */
44981 emit_insn (gen_insert[j][i] (target, target, tmp));
44982 return;
44984 case V8DFmode:
44985 if (TARGET_AVX512F)
44987 mmode = QImode;
44988 gen_blendm = gen_avx512f_blendmv8df;
44990 break;
44992 case V8DImode:
44993 if (TARGET_AVX512F)
44995 mmode = QImode;
44996 gen_blendm = gen_avx512f_blendmv8di;
44998 break;
45000 case V16SFmode:
45001 if (TARGET_AVX512F)
45003 mmode = HImode;
45004 gen_blendm = gen_avx512f_blendmv16sf;
45006 break;
45008 case V16SImode:
45009 if (TARGET_AVX512F)
45011 mmode = HImode;
45012 gen_blendm = gen_avx512f_blendmv16si;
45014 break;
45016 case V32HImode:
45017 if (TARGET_AVX512F && TARGET_AVX512BW)
45019 mmode = SImode;
45020 gen_blendm = gen_avx512bw_blendmv32hi;
45022 break;
45024 case V64QImode:
45025 if (TARGET_AVX512F && TARGET_AVX512BW)
45027 mmode = DImode;
45028 gen_blendm = gen_avx512bw_blendmv64qi;
45030 break;
45032 default:
45033 break;
45036 if (mmode != VOIDmode)
45038 tmp = gen_reg_rtx (mode);
45039 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
45040 emit_insn (gen_blendm (target, tmp, target,
45041 force_reg (mmode,
45042 gen_int_mode (1 << elt, mmode))));
45044 else if (use_vec_merge)
45046 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
45047 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
45048 emit_insn (gen_rtx_SET (target, tmp));
45050 else
45052 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45054 emit_move_insn (mem, target);
45056 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45057 emit_move_insn (tmp, val);
45059 emit_move_insn (target, mem);
45063 void
45064 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
45066 machine_mode mode = GET_MODE (vec);
45067 machine_mode inner_mode = GET_MODE_INNER (mode);
45068 bool use_vec_extr = false;
45069 rtx tmp;
45071 switch (mode)
45073 case V2SImode:
45074 case V2SFmode:
45075 if (!mmx_ok)
45076 break;
45077 /* FALLTHRU */
45079 case V2DFmode:
45080 case V2DImode:
45081 use_vec_extr = true;
45082 break;
45084 case V4SFmode:
45085 use_vec_extr = TARGET_SSE4_1;
45086 if (use_vec_extr)
45087 break;
45089 switch (elt)
45091 case 0:
45092 tmp = vec;
45093 break;
45095 case 1:
45096 case 3:
45097 tmp = gen_reg_rtx (mode);
45098 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
45099 GEN_INT (elt), GEN_INT (elt),
45100 GEN_INT (elt+4), GEN_INT (elt+4)));
45101 break;
45103 case 2:
45104 tmp = gen_reg_rtx (mode);
45105 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
45106 break;
45108 default:
45109 gcc_unreachable ();
45111 vec = tmp;
45112 use_vec_extr = true;
45113 elt = 0;
45114 break;
45116 case V4SImode:
45117 use_vec_extr = TARGET_SSE4_1;
45118 if (use_vec_extr)
45119 break;
45121 if (TARGET_SSE2)
45123 switch (elt)
45125 case 0:
45126 tmp = vec;
45127 break;
45129 case 1:
45130 case 3:
45131 tmp = gen_reg_rtx (mode);
45132 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
45133 GEN_INT (elt), GEN_INT (elt),
45134 GEN_INT (elt), GEN_INT (elt)));
45135 break;
45137 case 2:
45138 tmp = gen_reg_rtx (mode);
45139 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
45140 break;
45142 default:
45143 gcc_unreachable ();
45145 vec = tmp;
45146 use_vec_extr = true;
45147 elt = 0;
45149 else
45151 /* For SSE1, we have to reuse the V4SF code. */
45152 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
45153 gen_lowpart (V4SFmode, vec), elt);
45154 return;
45156 break;
45158 case V8HImode:
45159 use_vec_extr = TARGET_SSE2;
45160 break;
45161 case V4HImode:
45162 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
45163 break;
45165 case V16QImode:
45166 use_vec_extr = TARGET_SSE4_1;
45167 break;
45169 case V8SFmode:
45170 if (TARGET_AVX)
45172 tmp = gen_reg_rtx (V4SFmode);
45173 if (elt < 4)
45174 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
45175 else
45176 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
45177 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45178 return;
45180 break;
45182 case V4DFmode:
45183 if (TARGET_AVX)
45185 tmp = gen_reg_rtx (V2DFmode);
45186 if (elt < 2)
45187 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
45188 else
45189 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
45190 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45191 return;
45193 break;
45195 case V32QImode:
45196 if (TARGET_AVX)
45198 tmp = gen_reg_rtx (V16QImode);
45199 if (elt < 16)
45200 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
45201 else
45202 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
45203 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45204 return;
45206 break;
45208 case V16HImode:
45209 if (TARGET_AVX)
45211 tmp = gen_reg_rtx (V8HImode);
45212 if (elt < 8)
45213 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
45214 else
45215 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
45216 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45217 return;
45219 break;
45221 case V8SImode:
45222 if (TARGET_AVX)
45224 tmp = gen_reg_rtx (V4SImode);
45225 if (elt < 4)
45226 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
45227 else
45228 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
45229 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45230 return;
45232 break;
45234 case V4DImode:
45235 if (TARGET_AVX)
45237 tmp = gen_reg_rtx (V2DImode);
45238 if (elt < 2)
45239 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
45240 else
45241 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
45242 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45243 return;
45245 break;
45247 case V32HImode:
45248 if (TARGET_AVX512BW)
45250 tmp = gen_reg_rtx (V16HImode);
45251 if (elt < 16)
45252 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
45253 else
45254 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
45255 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45256 return;
45258 break;
45260 case V64QImode:
45261 if (TARGET_AVX512BW)
45263 tmp = gen_reg_rtx (V32QImode);
45264 if (elt < 32)
45265 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
45266 else
45267 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
45268 ix86_expand_vector_extract (false, target, tmp, elt & 31);
45269 return;
45271 break;
45273 case V16SFmode:
45274 tmp = gen_reg_rtx (V8SFmode);
45275 if (elt < 8)
45276 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
45277 else
45278 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
45279 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45280 return;
45282 case V8DFmode:
45283 tmp = gen_reg_rtx (V4DFmode);
45284 if (elt < 4)
45285 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
45286 else
45287 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
45288 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45289 return;
45291 case V16SImode:
45292 tmp = gen_reg_rtx (V8SImode);
45293 if (elt < 8)
45294 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
45295 else
45296 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
45297 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45298 return;
45300 case V8DImode:
45301 tmp = gen_reg_rtx (V4DImode);
45302 if (elt < 4)
45303 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
45304 else
45305 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
45306 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45307 return;
45309 case V8QImode:
45310 /* ??? Could extract the appropriate HImode element and shift. */
45311 default:
45312 break;
45315 if (use_vec_extr)
45317 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
45318 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
45320 /* Let the rtl optimizers know about the zero extension performed. */
45321 if (inner_mode == QImode || inner_mode == HImode)
45323 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
45324 target = gen_lowpart (SImode, target);
45327 emit_insn (gen_rtx_SET (target, tmp));
45329 else
45331 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45333 emit_move_insn (mem, vec);
45335 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45336 emit_move_insn (target, tmp);
45340 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
45341 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
45342 The upper bits of DEST are undefined, though they shouldn't cause
45343 exceptions (some bits from src or all zeros are ok). */
45345 static void
45346 emit_reduc_half (rtx dest, rtx src, int i)
45348 rtx tem, d = dest;
45349 switch (GET_MODE (src))
45351 case V4SFmode:
45352 if (i == 128)
45353 tem = gen_sse_movhlps (dest, src, src);
45354 else
45355 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
45356 GEN_INT (1 + 4), GEN_INT (1 + 4));
45357 break;
45358 case V2DFmode:
45359 tem = gen_vec_interleave_highv2df (dest, src, src);
45360 break;
45361 case V16QImode:
45362 case V8HImode:
45363 case V4SImode:
45364 case V2DImode:
45365 d = gen_reg_rtx (V1TImode);
45366 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
45367 GEN_INT (i / 2));
45368 break;
45369 case V8SFmode:
45370 if (i == 256)
45371 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
45372 else
45373 tem = gen_avx_shufps256 (dest, src, src,
45374 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
45375 break;
45376 case V4DFmode:
45377 if (i == 256)
45378 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
45379 else
45380 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
45381 break;
45382 case V32QImode:
45383 case V16HImode:
45384 case V8SImode:
45385 case V4DImode:
45386 if (i == 256)
45388 if (GET_MODE (dest) != V4DImode)
45389 d = gen_reg_rtx (V4DImode);
45390 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
45391 gen_lowpart (V4DImode, src),
45392 const1_rtx);
45394 else
45396 d = gen_reg_rtx (V2TImode);
45397 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
45398 GEN_INT (i / 2));
45400 break;
45401 case V64QImode:
45402 case V32HImode:
45403 case V16SImode:
45404 case V16SFmode:
45405 case V8DImode:
45406 case V8DFmode:
45407 if (i > 128)
45408 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
45409 gen_lowpart (V16SImode, src),
45410 gen_lowpart (V16SImode, src),
45411 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
45412 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
45413 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
45414 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
45415 GEN_INT (0xC), GEN_INT (0xD),
45416 GEN_INT (0xE), GEN_INT (0xF),
45417 GEN_INT (0x10), GEN_INT (0x11),
45418 GEN_INT (0x12), GEN_INT (0x13),
45419 GEN_INT (0x14), GEN_INT (0x15),
45420 GEN_INT (0x16), GEN_INT (0x17));
45421 else
45422 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
45423 gen_lowpart (V16SImode, src),
45424 GEN_INT (i == 128 ? 0x2 : 0x1),
45425 GEN_INT (0x3),
45426 GEN_INT (0x3),
45427 GEN_INT (0x3),
45428 GEN_INT (i == 128 ? 0x6 : 0x5),
45429 GEN_INT (0x7),
45430 GEN_INT (0x7),
45431 GEN_INT (0x7),
45432 GEN_INT (i == 128 ? 0xA : 0x9),
45433 GEN_INT (0xB),
45434 GEN_INT (0xB),
45435 GEN_INT (0xB),
45436 GEN_INT (i == 128 ? 0xE : 0xD),
45437 GEN_INT (0xF),
45438 GEN_INT (0xF),
45439 GEN_INT (0xF));
45440 break;
45441 default:
45442 gcc_unreachable ();
45444 emit_insn (tem);
45445 if (d != dest)
45446 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
45449 /* Expand a vector reduction. FN is the binary pattern to reduce;
45450 DEST is the destination; IN is the input vector. */
45452 void
45453 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
45455 rtx half, dst, vec = in;
45456 machine_mode mode = GET_MODE (in);
45457 int i;
45459 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
45460 if (TARGET_SSE4_1
45461 && mode == V8HImode
45462 && fn == gen_uminv8hi3)
45464 emit_insn (gen_sse4_1_phminposuw (dest, in));
45465 return;
45468 for (i = GET_MODE_BITSIZE (mode);
45469 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
45470 i >>= 1)
45472 half = gen_reg_rtx (mode);
45473 emit_reduc_half (half, vec, i);
45474 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
45475 dst = dest;
45476 else
45477 dst = gen_reg_rtx (mode);
45478 emit_insn (fn (dst, half, vec));
45479 vec = dst;
45483 /* Target hook for scalar_mode_supported_p. */
45484 static bool
45485 ix86_scalar_mode_supported_p (machine_mode mode)
45487 if (DECIMAL_FLOAT_MODE_P (mode))
45488 return default_decimal_float_supported_p ();
45489 else if (mode == TFmode)
45490 return true;
45491 else
45492 return default_scalar_mode_supported_p (mode);
45495 /* Implements target hook vector_mode_supported_p. */
45496 static bool
45497 ix86_vector_mode_supported_p (machine_mode mode)
45499 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
45500 return true;
45501 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
45502 return true;
45503 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
45504 return true;
45505 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
45506 return true;
45507 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
45508 return true;
45509 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
45510 return true;
45511 return false;
45514 /* Implement target hook libgcc_floating_mode_supported_p. */
45515 static bool
45516 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
45518 switch (mode)
45520 case SFmode:
45521 case DFmode:
45522 case XFmode:
45523 return true;
45525 case TFmode:
45526 #ifdef IX86_NO_LIBGCC_TFMODE
45527 return false;
45528 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
45529 return TARGET_LONG_DOUBLE_128;
45530 #else
45531 return true;
45532 #endif
45534 default:
45535 return false;
45539 /* Target hook for c_mode_for_suffix. */
45540 static machine_mode
45541 ix86_c_mode_for_suffix (char suffix)
45543 if (suffix == 'q')
45544 return TFmode;
45545 if (suffix == 'w')
45546 return XFmode;
45548 return VOIDmode;
45551 /* Worker function for TARGET_MD_ASM_ADJUST.
45553 We do this in the new i386 backend to maintain source compatibility
45554 with the old cc0-based compiler. */
45556 static rtx_insn *
45557 ix86_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
45558 vec<const char *> &/*constraints*/,
45559 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
45561 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
45562 clobbers.safe_push (gen_rtx_REG (CCFPmode, FPSR_REG));
45564 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
45565 SET_HARD_REG_BIT (clobbered_regs, FPSR_REG);
45567 return NULL;
45570 /* Implements target vector targetm.asm.encode_section_info. */
45572 static void ATTRIBUTE_UNUSED
45573 ix86_encode_section_info (tree decl, rtx rtl, int first)
45575 default_encode_section_info (decl, rtl, first);
45577 if (ix86_in_large_data_p (decl))
45578 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
45581 /* Worker function for REVERSE_CONDITION. */
45583 enum rtx_code
45584 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
45586 return (mode != CCFPmode && mode != CCFPUmode
45587 ? reverse_condition (code)
45588 : reverse_condition_maybe_unordered (code));
45591 /* Output code to perform an x87 FP register move, from OPERANDS[1]
45592 to OPERANDS[0]. */
45594 const char *
45595 output_387_reg_move (rtx insn, rtx *operands)
45597 if (REG_P (operands[0]))
45599 if (REG_P (operands[1])
45600 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45602 if (REGNO (operands[0]) == FIRST_STACK_REG)
45603 return output_387_ffreep (operands, 0);
45604 return "fstp\t%y0";
45606 if (STACK_TOP_P (operands[0]))
45607 return "fld%Z1\t%y1";
45608 return "fst\t%y0";
45610 else if (MEM_P (operands[0]))
45612 gcc_assert (REG_P (operands[1]));
45613 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45614 return "fstp%Z0\t%y0";
45615 else
45617 /* There is no non-popping store to memory for XFmode.
45618 So if we need one, follow the store with a load. */
45619 if (GET_MODE (operands[0]) == XFmode)
45620 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
45621 else
45622 return "fst%Z0\t%y0";
45625 else
45626 gcc_unreachable();
45629 /* Output code to perform a conditional jump to LABEL, if C2 flag in
45630 FP status register is set. */
45632 void
45633 ix86_emit_fp_unordered_jump (rtx label)
45635 rtx reg = gen_reg_rtx (HImode);
45636 rtx temp;
45638 emit_insn (gen_x86_fnstsw_1 (reg));
45640 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
45642 emit_insn (gen_x86_sahf_1 (reg));
45644 temp = gen_rtx_REG (CCmode, FLAGS_REG);
45645 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
45647 else
45649 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
45651 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
45652 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
45655 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
45656 gen_rtx_LABEL_REF (VOIDmode, label),
45657 pc_rtx);
45658 temp = gen_rtx_SET (pc_rtx, temp);
45660 emit_jump_insn (temp);
45661 predict_jump (REG_BR_PROB_BASE * 10 / 100);
45664 /* Output code to perform a log1p XFmode calculation. */
45666 void ix86_emit_i387_log1p (rtx op0, rtx op1)
45668 rtx_code_label *label1 = gen_label_rtx ();
45669 rtx_code_label *label2 = gen_label_rtx ();
45671 rtx tmp = gen_reg_rtx (XFmode);
45672 rtx tmp2 = gen_reg_rtx (XFmode);
45673 rtx test;
45675 emit_insn (gen_absxf2 (tmp, op1));
45676 test = gen_rtx_GE (VOIDmode, tmp,
45677 CONST_DOUBLE_FROM_REAL_VALUE (
45678 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
45679 XFmode));
45680 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
45682 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45683 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
45684 emit_jump (label2);
45686 emit_label (label1);
45687 emit_move_insn (tmp, CONST1_RTX (XFmode));
45688 emit_insn (gen_addxf3 (tmp, op1, tmp));
45689 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45690 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
45692 emit_label (label2);
45695 /* Emit code for round calculation. */
45696 void ix86_emit_i387_round (rtx op0, rtx op1)
45698 machine_mode inmode = GET_MODE (op1);
45699 machine_mode outmode = GET_MODE (op0);
45700 rtx e1, e2, res, tmp, tmp1, half;
45701 rtx scratch = gen_reg_rtx (HImode);
45702 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
45703 rtx_code_label *jump_label = gen_label_rtx ();
45704 rtx insn;
45705 rtx (*gen_abs) (rtx, rtx);
45706 rtx (*gen_neg) (rtx, rtx);
45708 switch (inmode)
45710 case SFmode:
45711 gen_abs = gen_abssf2;
45712 break;
45713 case DFmode:
45714 gen_abs = gen_absdf2;
45715 break;
45716 case XFmode:
45717 gen_abs = gen_absxf2;
45718 break;
45719 default:
45720 gcc_unreachable ();
45723 switch (outmode)
45725 case SFmode:
45726 gen_neg = gen_negsf2;
45727 break;
45728 case DFmode:
45729 gen_neg = gen_negdf2;
45730 break;
45731 case XFmode:
45732 gen_neg = gen_negxf2;
45733 break;
45734 case HImode:
45735 gen_neg = gen_neghi2;
45736 break;
45737 case SImode:
45738 gen_neg = gen_negsi2;
45739 break;
45740 case DImode:
45741 gen_neg = gen_negdi2;
45742 break;
45743 default:
45744 gcc_unreachable ();
45747 e1 = gen_reg_rtx (inmode);
45748 e2 = gen_reg_rtx (inmode);
45749 res = gen_reg_rtx (outmode);
45751 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
45753 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
45755 /* scratch = fxam(op1) */
45756 emit_insn (gen_rtx_SET (scratch,
45757 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
45758 UNSPEC_FXAM)));
45759 /* e1 = fabs(op1) */
45760 emit_insn (gen_abs (e1, op1));
45762 /* e2 = e1 + 0.5 */
45763 half = force_reg (inmode, half);
45764 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (inmode, e1, half)));
45766 /* res = floor(e2) */
45767 if (inmode != XFmode)
45769 tmp1 = gen_reg_rtx (XFmode);
45771 emit_insn (gen_rtx_SET (tmp1, gen_rtx_FLOAT_EXTEND (XFmode, e2)));
45773 else
45774 tmp1 = e2;
45776 switch (outmode)
45778 case SFmode:
45779 case DFmode:
45781 rtx tmp0 = gen_reg_rtx (XFmode);
45783 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
45785 emit_insn (gen_rtx_SET (res,
45786 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
45787 UNSPEC_TRUNC_NOOP)));
45789 break;
45790 case XFmode:
45791 emit_insn (gen_frndintxf2_floor (res, tmp1));
45792 break;
45793 case HImode:
45794 emit_insn (gen_lfloorxfhi2 (res, tmp1));
45795 break;
45796 case SImode:
45797 emit_insn (gen_lfloorxfsi2 (res, tmp1));
45798 break;
45799 case DImode:
45800 emit_insn (gen_lfloorxfdi2 (res, tmp1));
45801 break;
45802 default:
45803 gcc_unreachable ();
45806 /* flags = signbit(a) */
45807 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
45809 /* if (flags) then res = -res */
45810 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
45811 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
45812 gen_rtx_LABEL_REF (VOIDmode, jump_label),
45813 pc_rtx);
45814 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
45815 predict_jump (REG_BR_PROB_BASE * 50 / 100);
45816 JUMP_LABEL (insn) = jump_label;
45818 emit_insn (gen_neg (res, res));
45820 emit_label (jump_label);
45821 LABEL_NUSES (jump_label) = 1;
45823 emit_move_insn (op0, res);
45826 /* Output code to perform a Newton-Rhapson approximation of a single precision
45827 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
45829 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
45831 rtx x0, x1, e0, e1;
45833 x0 = gen_reg_rtx (mode);
45834 e0 = gen_reg_rtx (mode);
45835 e1 = gen_reg_rtx (mode);
45836 x1 = gen_reg_rtx (mode);
45838 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
45840 b = force_reg (mode, b);
45842 /* x0 = rcp(b) estimate */
45843 if (mode == V16SFmode || mode == V8DFmode)
45844 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45845 UNSPEC_RCP14)));
45846 else
45847 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45848 UNSPEC_RCP)));
45850 /* e0 = x0 * b */
45851 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
45853 /* e0 = x0 * e0 */
45854 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
45856 /* e1 = x0 + x0 */
45857 emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
45859 /* x1 = e1 - e0 */
45860 emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
45862 /* res = a * x1 */
45863 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
45866 /* Output code to perform a Newton-Rhapson approximation of a
45867 single precision floating point [reciprocal] square root. */
45869 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode,
45870 bool recip)
45872 rtx x0, e0, e1, e2, e3, mthree, mhalf;
45873 REAL_VALUE_TYPE r;
45874 int unspec;
45876 x0 = gen_reg_rtx (mode);
45877 e0 = gen_reg_rtx (mode);
45878 e1 = gen_reg_rtx (mode);
45879 e2 = gen_reg_rtx (mode);
45880 e3 = gen_reg_rtx (mode);
45882 real_from_integer (&r, VOIDmode, -3, SIGNED);
45883 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45885 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
45886 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45887 unspec = UNSPEC_RSQRT;
45889 if (VECTOR_MODE_P (mode))
45891 mthree = ix86_build_const_vector (mode, true, mthree);
45892 mhalf = ix86_build_const_vector (mode, true, mhalf);
45893 /* There is no 512-bit rsqrt. There is however rsqrt14. */
45894 if (GET_MODE_SIZE (mode) == 64)
45895 unspec = UNSPEC_RSQRT14;
45898 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
45899 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
45901 a = force_reg (mode, a);
45903 /* x0 = rsqrt(a) estimate */
45904 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
45905 unspec)));
45907 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
45908 if (!recip)
45910 rtx zero, mask;
45912 zero = gen_reg_rtx (mode);
45913 mask = gen_reg_rtx (mode);
45915 zero = force_reg (mode, CONST0_RTX(mode));
45917 /* Handle masked compare. */
45918 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
45920 mask = gen_reg_rtx (HImode);
45921 /* Imm value 0x4 corresponds to not-equal comparison. */
45922 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
45923 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
45925 else
45927 emit_insn (gen_rtx_SET (mask, gen_rtx_NE (mode, zero, a)));
45929 emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, mask)));
45933 /* e0 = x0 * a */
45934 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
45935 /* e1 = e0 * x0 */
45936 emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
45938 /* e2 = e1 - 3. */
45939 mthree = force_reg (mode, mthree);
45940 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
45942 mhalf = force_reg (mode, mhalf);
45943 if (recip)
45944 /* e3 = -.5 * x0 */
45945 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, x0, mhalf)));
45946 else
45947 /* e3 = -.5 * e0 */
45948 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, e0, mhalf)));
45949 /* ret = e2 * e3 */
45950 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e3)));
45953 #ifdef TARGET_SOLARIS
45954 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
45956 static void
45957 i386_solaris_elf_named_section (const char *name, unsigned int flags,
45958 tree decl)
45960 /* With Binutils 2.15, the "@unwind" marker must be specified on
45961 every occurrence of the ".eh_frame" section, not just the first
45962 one. */
45963 if (TARGET_64BIT
45964 && strcmp (name, ".eh_frame") == 0)
45966 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
45967 flags & SECTION_WRITE ? "aw" : "a");
45968 return;
45971 #ifndef USE_GAS
45972 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
45974 solaris_elf_asm_comdat_section (name, flags, decl);
45975 return;
45977 #endif
45979 default_elf_asm_named_section (name, flags, decl);
45981 #endif /* TARGET_SOLARIS */
45983 /* Return the mangling of TYPE if it is an extended fundamental type. */
45985 static const char *
45986 ix86_mangle_type (const_tree type)
45988 type = TYPE_MAIN_VARIANT (type);
45990 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
45991 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
45992 return NULL;
45994 switch (TYPE_MODE (type))
45996 case TFmode:
45997 /* __float128 is "g". */
45998 return "g";
45999 case XFmode:
46000 /* "long double" or __float80 is "e". */
46001 return "e";
46002 default:
46003 return NULL;
46007 /* For 32-bit code we can save PIC register setup by using
46008 __stack_chk_fail_local hidden function instead of calling
46009 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
46010 register, so it is better to call __stack_chk_fail directly. */
46012 static tree ATTRIBUTE_UNUSED
46013 ix86_stack_protect_fail (void)
46015 return TARGET_64BIT
46016 ? default_external_stack_protect_fail ()
46017 : default_hidden_stack_protect_fail ();
46020 /* Select a format to encode pointers in exception handling data. CODE
46021 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
46022 true if the symbol may be affected by dynamic relocations.
46024 ??? All x86 object file formats are capable of representing this.
46025 After all, the relocation needed is the same as for the call insn.
46026 Whether or not a particular assembler allows us to enter such, I
46027 guess we'll have to see. */
46029 asm_preferred_eh_data_format (int code, int global)
46031 if (flag_pic)
46033 int type = DW_EH_PE_sdata8;
46034 if (!TARGET_64BIT
46035 || ix86_cmodel == CM_SMALL_PIC
46036 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
46037 type = DW_EH_PE_sdata4;
46038 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
46040 if (ix86_cmodel == CM_SMALL
46041 || (ix86_cmodel == CM_MEDIUM && code))
46042 return DW_EH_PE_udata4;
46043 return DW_EH_PE_absptr;
46046 /* Expand copysign from SIGN to the positive value ABS_VALUE
46047 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
46048 the sign-bit. */
46049 static void
46050 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
46052 machine_mode mode = GET_MODE (sign);
46053 rtx sgn = gen_reg_rtx (mode);
46054 if (mask == NULL_RTX)
46056 machine_mode vmode;
46058 if (mode == SFmode)
46059 vmode = V4SFmode;
46060 else if (mode == DFmode)
46061 vmode = V2DFmode;
46062 else
46063 vmode = mode;
46065 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
46066 if (!VECTOR_MODE_P (mode))
46068 /* We need to generate a scalar mode mask in this case. */
46069 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
46070 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
46071 mask = gen_reg_rtx (mode);
46072 emit_insn (gen_rtx_SET (mask, tmp));
46075 else
46076 mask = gen_rtx_NOT (mode, mask);
46077 emit_insn (gen_rtx_SET (sgn, gen_rtx_AND (mode, mask, sign)));
46078 emit_insn (gen_rtx_SET (result, gen_rtx_IOR (mode, abs_value, sgn)));
46081 /* Expand fabs (OP0) and return a new rtx that holds the result. The
46082 mask for masking out the sign-bit is stored in *SMASK, if that is
46083 non-null. */
46084 static rtx
46085 ix86_expand_sse_fabs (rtx op0, rtx *smask)
46087 machine_mode vmode, mode = GET_MODE (op0);
46088 rtx xa, mask;
46090 xa = gen_reg_rtx (mode);
46091 if (mode == SFmode)
46092 vmode = V4SFmode;
46093 else if (mode == DFmode)
46094 vmode = V2DFmode;
46095 else
46096 vmode = mode;
46097 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
46098 if (!VECTOR_MODE_P (mode))
46100 /* We need to generate a scalar mode mask in this case. */
46101 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
46102 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
46103 mask = gen_reg_rtx (mode);
46104 emit_insn (gen_rtx_SET (mask, tmp));
46106 emit_insn (gen_rtx_SET (xa, gen_rtx_AND (mode, op0, mask)));
46108 if (smask)
46109 *smask = mask;
46111 return xa;
46114 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
46115 swapping the operands if SWAP_OPERANDS is true. The expanded
46116 code is a forward jump to a newly created label in case the
46117 comparison is true. The generated label rtx is returned. */
46118 static rtx_code_label *
46119 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
46120 bool swap_operands)
46122 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
46123 rtx_code_label *label;
46124 rtx tmp;
46126 if (swap_operands)
46127 std::swap (op0, op1);
46129 label = gen_label_rtx ();
46130 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
46131 emit_insn (gen_rtx_SET (tmp, gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
46132 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
46133 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
46134 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
46135 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
46136 JUMP_LABEL (tmp) = label;
46138 return label;
46141 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
46142 using comparison code CODE. Operands are swapped for the comparison if
46143 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
46144 static rtx
46145 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
46146 bool swap_operands)
46148 rtx (*insn)(rtx, rtx, rtx, rtx);
46149 machine_mode mode = GET_MODE (op0);
46150 rtx mask = gen_reg_rtx (mode);
46152 if (swap_operands)
46153 std::swap (op0, op1);
46155 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
46157 emit_insn (insn (mask, op0, op1,
46158 gen_rtx_fmt_ee (code, mode, op0, op1)));
46159 return mask;
46162 /* Generate and return a rtx of mode MODE for 2**n where n is the number
46163 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
46164 static rtx
46165 ix86_gen_TWO52 (machine_mode mode)
46167 REAL_VALUE_TYPE TWO52r;
46168 rtx TWO52;
46170 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
46171 TWO52 = const_double_from_real_value (TWO52r, mode);
46172 TWO52 = force_reg (mode, TWO52);
46174 return TWO52;
46177 /* Expand SSE sequence for computing lround from OP1 storing
46178 into OP0. */
46179 void
46180 ix86_expand_lround (rtx op0, rtx op1)
46182 /* C code for the stuff we're doing below:
46183 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
46184 return (long)tmp;
46186 machine_mode mode = GET_MODE (op1);
46187 const struct real_format *fmt;
46188 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46189 rtx adj;
46191 /* load nextafter (0.5, 0.0) */
46192 fmt = REAL_MODE_FORMAT (mode);
46193 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46194 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46196 /* adj = copysign (0.5, op1) */
46197 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
46198 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
46200 /* adj = op1 + adj */
46201 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
46203 /* op0 = (imode)adj */
46204 expand_fix (op0, adj, 0);
46207 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
46208 into OPERAND0. */
46209 void
46210 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
46212 /* C code for the stuff we're doing below (for do_floor):
46213 xi = (long)op1;
46214 xi -= (double)xi > op1 ? 1 : 0;
46215 return xi;
46217 machine_mode fmode = GET_MODE (op1);
46218 machine_mode imode = GET_MODE (op0);
46219 rtx ireg, freg, tmp;
46220 rtx_code_label *label;
46222 /* reg = (long)op1 */
46223 ireg = gen_reg_rtx (imode);
46224 expand_fix (ireg, op1, 0);
46226 /* freg = (double)reg */
46227 freg = gen_reg_rtx (fmode);
46228 expand_float (freg, ireg, 0);
46230 /* ireg = (freg > op1) ? ireg - 1 : ireg */
46231 label = ix86_expand_sse_compare_and_jump (UNLE,
46232 freg, op1, !do_floor);
46233 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
46234 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
46235 emit_move_insn (ireg, tmp);
46237 emit_label (label);
46238 LABEL_NUSES (label) = 1;
46240 emit_move_insn (op0, ireg);
46243 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
46244 result in OPERAND0. */
46245 void
46246 ix86_expand_rint (rtx operand0, rtx operand1)
46248 /* C code for the stuff we're doing below:
46249 xa = fabs (operand1);
46250 if (!isless (xa, 2**52))
46251 return operand1;
46252 xa = xa + 2**52 - 2**52;
46253 return copysign (xa, operand1);
46255 machine_mode mode = GET_MODE (operand0);
46256 rtx res, xa, TWO52, mask;
46257 rtx_code_label *label;
46259 res = gen_reg_rtx (mode);
46260 emit_move_insn (res, operand1);
46262 /* xa = abs (operand1) */
46263 xa = ix86_expand_sse_fabs (res, &mask);
46265 /* if (!isless (xa, TWO52)) goto label; */
46266 TWO52 = ix86_gen_TWO52 (mode);
46267 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46269 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46270 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46272 ix86_sse_copysign_to_positive (res, xa, res, mask);
46274 emit_label (label);
46275 LABEL_NUSES (label) = 1;
46277 emit_move_insn (operand0, res);
46280 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46281 into OPERAND0. */
46282 void
46283 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
46285 /* C code for the stuff we expand below.
46286 double xa = fabs (x), x2;
46287 if (!isless (xa, TWO52))
46288 return x;
46289 xa = xa + TWO52 - TWO52;
46290 x2 = copysign (xa, x);
46291 Compensate. Floor:
46292 if (x2 > x)
46293 x2 -= 1;
46294 Compensate. Ceil:
46295 if (x2 < x)
46296 x2 -= -1;
46297 return x2;
46299 machine_mode mode = GET_MODE (operand0);
46300 rtx xa, TWO52, tmp, one, res, mask;
46301 rtx_code_label *label;
46303 TWO52 = ix86_gen_TWO52 (mode);
46305 /* Temporary for holding the result, initialized to the input
46306 operand to ease control flow. */
46307 res = gen_reg_rtx (mode);
46308 emit_move_insn (res, operand1);
46310 /* xa = abs (operand1) */
46311 xa = ix86_expand_sse_fabs (res, &mask);
46313 /* if (!isless (xa, TWO52)) goto label; */
46314 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46316 /* xa = xa + TWO52 - TWO52; */
46317 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46318 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46320 /* xa = copysign (xa, operand1) */
46321 ix86_sse_copysign_to_positive (xa, xa, res, mask);
46323 /* generate 1.0 or -1.0 */
46324 one = force_reg (mode,
46325 const_double_from_real_value (do_floor
46326 ? dconst1 : dconstm1, mode));
46328 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46329 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46330 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46331 /* We always need to subtract here to preserve signed zero. */
46332 tmp = expand_simple_binop (mode, MINUS,
46333 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46334 emit_move_insn (res, tmp);
46336 emit_label (label);
46337 LABEL_NUSES (label) = 1;
46339 emit_move_insn (operand0, res);
46342 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46343 into OPERAND0. */
46344 void
46345 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
46347 /* C code for the stuff we expand below.
46348 double xa = fabs (x), x2;
46349 if (!isless (xa, TWO52))
46350 return x;
46351 x2 = (double)(long)x;
46352 Compensate. Floor:
46353 if (x2 > x)
46354 x2 -= 1;
46355 Compensate. Ceil:
46356 if (x2 < x)
46357 x2 += 1;
46358 if (HONOR_SIGNED_ZEROS (mode))
46359 return copysign (x2, x);
46360 return x2;
46362 machine_mode mode = GET_MODE (operand0);
46363 rtx xa, xi, TWO52, tmp, one, res, mask;
46364 rtx_code_label *label;
46366 TWO52 = ix86_gen_TWO52 (mode);
46368 /* Temporary for holding the result, initialized to the input
46369 operand to ease control flow. */
46370 res = gen_reg_rtx (mode);
46371 emit_move_insn (res, operand1);
46373 /* xa = abs (operand1) */
46374 xa = ix86_expand_sse_fabs (res, &mask);
46376 /* if (!isless (xa, TWO52)) goto label; */
46377 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46379 /* xa = (double)(long)x */
46380 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46381 expand_fix (xi, res, 0);
46382 expand_float (xa, xi, 0);
46384 /* generate 1.0 */
46385 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46387 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46388 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46389 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46390 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
46391 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46392 emit_move_insn (res, tmp);
46394 if (HONOR_SIGNED_ZEROS (mode))
46395 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46397 emit_label (label);
46398 LABEL_NUSES (label) = 1;
46400 emit_move_insn (operand0, res);
46403 /* Expand SSE sequence for computing round from OPERAND1 storing
46404 into OPERAND0. Sequence that works without relying on DImode truncation
46405 via cvttsd2siq that is only available on 64bit targets. */
46406 void
46407 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
46409 /* C code for the stuff we expand below.
46410 double xa = fabs (x), xa2, x2;
46411 if (!isless (xa, TWO52))
46412 return x;
46413 Using the absolute value and copying back sign makes
46414 -0.0 -> -0.0 correct.
46415 xa2 = xa + TWO52 - TWO52;
46416 Compensate.
46417 dxa = xa2 - xa;
46418 if (dxa <= -0.5)
46419 xa2 += 1;
46420 else if (dxa > 0.5)
46421 xa2 -= 1;
46422 x2 = copysign (xa2, x);
46423 return x2;
46425 machine_mode mode = GET_MODE (operand0);
46426 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
46427 rtx_code_label *label;
46429 TWO52 = ix86_gen_TWO52 (mode);
46431 /* Temporary for holding the result, initialized to the input
46432 operand to ease control flow. */
46433 res = gen_reg_rtx (mode);
46434 emit_move_insn (res, operand1);
46436 /* xa = abs (operand1) */
46437 xa = ix86_expand_sse_fabs (res, &mask);
46439 /* if (!isless (xa, TWO52)) goto label; */
46440 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46442 /* xa2 = xa + TWO52 - TWO52; */
46443 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46444 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
46446 /* dxa = xa2 - xa; */
46447 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
46449 /* generate 0.5, 1.0 and -0.5 */
46450 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
46451 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
46452 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
46453 0, OPTAB_DIRECT);
46455 /* Compensate. */
46456 tmp = gen_reg_rtx (mode);
46457 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
46458 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
46459 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46460 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46461 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
46462 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
46463 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46464 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46466 /* res = copysign (xa2, operand1) */
46467 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
46469 emit_label (label);
46470 LABEL_NUSES (label) = 1;
46472 emit_move_insn (operand0, res);
46475 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46476 into OPERAND0. */
46477 void
46478 ix86_expand_trunc (rtx operand0, rtx operand1)
46480 /* C code for SSE variant we expand below.
46481 double xa = fabs (x), x2;
46482 if (!isless (xa, TWO52))
46483 return x;
46484 x2 = (double)(long)x;
46485 if (HONOR_SIGNED_ZEROS (mode))
46486 return copysign (x2, x);
46487 return x2;
46489 machine_mode mode = GET_MODE (operand0);
46490 rtx xa, xi, TWO52, res, mask;
46491 rtx_code_label *label;
46493 TWO52 = ix86_gen_TWO52 (mode);
46495 /* Temporary for holding the result, initialized to the input
46496 operand to ease control flow. */
46497 res = gen_reg_rtx (mode);
46498 emit_move_insn (res, operand1);
46500 /* xa = abs (operand1) */
46501 xa = ix86_expand_sse_fabs (res, &mask);
46503 /* if (!isless (xa, TWO52)) goto label; */
46504 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46506 /* x = (double)(long)x */
46507 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46508 expand_fix (xi, res, 0);
46509 expand_float (res, xi, 0);
46511 if (HONOR_SIGNED_ZEROS (mode))
46512 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46514 emit_label (label);
46515 LABEL_NUSES (label) = 1;
46517 emit_move_insn (operand0, res);
46520 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46521 into OPERAND0. */
46522 void
46523 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
46525 machine_mode mode = GET_MODE (operand0);
46526 rtx xa, mask, TWO52, one, res, smask, tmp;
46527 rtx_code_label *label;
46529 /* C code for SSE variant we expand below.
46530 double xa = fabs (x), x2;
46531 if (!isless (xa, TWO52))
46532 return x;
46533 xa2 = xa + TWO52 - TWO52;
46534 Compensate:
46535 if (xa2 > xa)
46536 xa2 -= 1.0;
46537 x2 = copysign (xa2, x);
46538 return x2;
46541 TWO52 = ix86_gen_TWO52 (mode);
46543 /* Temporary for holding the result, initialized to the input
46544 operand to ease control flow. */
46545 res = gen_reg_rtx (mode);
46546 emit_move_insn (res, operand1);
46548 /* xa = abs (operand1) */
46549 xa = ix86_expand_sse_fabs (res, &smask);
46551 /* if (!isless (xa, TWO52)) goto label; */
46552 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46554 /* res = xa + TWO52 - TWO52; */
46555 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46556 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
46557 emit_move_insn (res, tmp);
46559 /* generate 1.0 */
46560 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46562 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
46563 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
46564 emit_insn (gen_rtx_SET (mask, gen_rtx_AND (mode, mask, one)));
46565 tmp = expand_simple_binop (mode, MINUS,
46566 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
46567 emit_move_insn (res, tmp);
46569 /* res = copysign (res, operand1) */
46570 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
46572 emit_label (label);
46573 LABEL_NUSES (label) = 1;
46575 emit_move_insn (operand0, res);
46578 /* Expand SSE sequence for computing round from OPERAND1 storing
46579 into OPERAND0. */
46580 void
46581 ix86_expand_round (rtx operand0, rtx operand1)
46583 /* C code for the stuff we're doing below:
46584 double xa = fabs (x);
46585 if (!isless (xa, TWO52))
46586 return x;
46587 xa = (double)(long)(xa + nextafter (0.5, 0.0));
46588 return copysign (xa, x);
46590 machine_mode mode = GET_MODE (operand0);
46591 rtx res, TWO52, xa, xi, half, mask;
46592 rtx_code_label *label;
46593 const struct real_format *fmt;
46594 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46596 /* Temporary for holding the result, initialized to the input
46597 operand to ease control flow. */
46598 res = gen_reg_rtx (mode);
46599 emit_move_insn (res, operand1);
46601 TWO52 = ix86_gen_TWO52 (mode);
46602 xa = ix86_expand_sse_fabs (res, &mask);
46603 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46605 /* load nextafter (0.5, 0.0) */
46606 fmt = REAL_MODE_FORMAT (mode);
46607 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46608 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46610 /* xa = xa + 0.5 */
46611 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
46612 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
46614 /* xa = (double)(int64_t)xa */
46615 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46616 expand_fix (xi, xa, 0);
46617 expand_float (xa, xi, 0);
46619 /* res = copysign (xa, operand1) */
46620 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
46622 emit_label (label);
46623 LABEL_NUSES (label) = 1;
46625 emit_move_insn (operand0, res);
46628 /* Expand SSE sequence for computing round
46629 from OP1 storing into OP0 using sse4 round insn. */
46630 void
46631 ix86_expand_round_sse4 (rtx op0, rtx op1)
46633 machine_mode mode = GET_MODE (op0);
46634 rtx e1, e2, res, half;
46635 const struct real_format *fmt;
46636 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46637 rtx (*gen_copysign) (rtx, rtx, rtx);
46638 rtx (*gen_round) (rtx, rtx, rtx);
46640 switch (mode)
46642 case SFmode:
46643 gen_copysign = gen_copysignsf3;
46644 gen_round = gen_sse4_1_roundsf2;
46645 break;
46646 case DFmode:
46647 gen_copysign = gen_copysigndf3;
46648 gen_round = gen_sse4_1_rounddf2;
46649 break;
46650 default:
46651 gcc_unreachable ();
46654 /* round (a) = trunc (a + copysign (0.5, a)) */
46656 /* load nextafter (0.5, 0.0) */
46657 fmt = REAL_MODE_FORMAT (mode);
46658 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46659 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46660 half = const_double_from_real_value (pred_half, mode);
46662 /* e1 = copysign (0.5, op1) */
46663 e1 = gen_reg_rtx (mode);
46664 emit_insn (gen_copysign (e1, half, op1));
46666 /* e2 = op1 + e1 */
46667 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
46669 /* res = trunc (e2) */
46670 res = gen_reg_rtx (mode);
46671 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
46673 emit_move_insn (op0, res);
46677 /* Table of valid machine attributes. */
46678 static const struct attribute_spec ix86_attribute_table[] =
46680 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
46681 affects_type_identity } */
46682 /* Stdcall attribute says callee is responsible for popping arguments
46683 if they are not variable. */
46684 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46685 true },
46686 /* Fastcall attribute says callee is responsible for popping arguments
46687 if they are not variable. */
46688 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46689 true },
46690 /* Thiscall attribute says callee is responsible for popping arguments
46691 if they are not variable. */
46692 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46693 true },
46694 /* Cdecl attribute says the callee is a normal C declaration */
46695 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46696 true },
46697 /* Regparm attribute specifies how many integer arguments are to be
46698 passed in registers. */
46699 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
46700 true },
46701 /* Sseregparm attribute says we are using x86_64 calling conventions
46702 for FP arguments. */
46703 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46704 true },
46705 /* The transactional memory builtins are implicitly regparm or fastcall
46706 depending on the ABI. Override the generic do-nothing attribute that
46707 these builtins were declared with. */
46708 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
46709 true },
46710 /* force_align_arg_pointer says this function realigns the stack at entry. */
46711 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
46712 false, true, true, ix86_handle_cconv_attribute, false },
46713 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46714 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
46715 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
46716 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
46717 false },
46718 #endif
46719 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46720 false },
46721 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46722 false },
46723 #ifdef SUBTARGET_ATTRIBUTE_TABLE
46724 SUBTARGET_ATTRIBUTE_TABLE,
46725 #endif
46726 /* ms_abi and sysv_abi calling convention function attributes. */
46727 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46728 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46729 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
46730 false },
46731 { "callee_pop_aggregate_return", 1, 1, false, true, true,
46732 ix86_handle_callee_pop_aggregate_return, true },
46733 /* End element. */
46734 { NULL, 0, 0, false, false, false, NULL, false }
46737 /* Implement targetm.vectorize.builtin_vectorization_cost. */
46738 static int
46739 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
46740 tree vectype, int)
46742 unsigned elements;
46744 switch (type_of_cost)
46746 case scalar_stmt:
46747 return ix86_cost->scalar_stmt_cost;
46749 case scalar_load:
46750 return ix86_cost->scalar_load_cost;
46752 case scalar_store:
46753 return ix86_cost->scalar_store_cost;
46755 case vector_stmt:
46756 return ix86_cost->vec_stmt_cost;
46758 case vector_load:
46759 return ix86_cost->vec_align_load_cost;
46761 case vector_store:
46762 return ix86_cost->vec_store_cost;
46764 case vec_to_scalar:
46765 return ix86_cost->vec_to_scalar_cost;
46767 case scalar_to_vec:
46768 return ix86_cost->scalar_to_vec_cost;
46770 case unaligned_load:
46771 case unaligned_store:
46772 return ix86_cost->vec_unalign_load_cost;
46774 case cond_branch_taken:
46775 return ix86_cost->cond_taken_branch_cost;
46777 case cond_branch_not_taken:
46778 return ix86_cost->cond_not_taken_branch_cost;
46780 case vec_perm:
46781 case vec_promote_demote:
46782 return ix86_cost->vec_stmt_cost;
46784 case vec_construct:
46785 elements = TYPE_VECTOR_SUBPARTS (vectype);
46786 return ix86_cost->vec_stmt_cost * (elements / 2 + 1);
46788 default:
46789 gcc_unreachable ();
46793 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
46794 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
46795 insn every time. */
46797 static GTY(()) rtx_insn *vselect_insn;
46799 /* Initialize vselect_insn. */
46801 static void
46802 init_vselect_insn (void)
46804 unsigned i;
46805 rtx x;
46807 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
46808 for (i = 0; i < MAX_VECT_LEN; ++i)
46809 XVECEXP (x, 0, i) = const0_rtx;
46810 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
46811 const0_rtx), x);
46812 x = gen_rtx_SET (const0_rtx, x);
46813 start_sequence ();
46814 vselect_insn = emit_insn (x);
46815 end_sequence ();
46818 /* Construct (set target (vec_select op0 (parallel perm))) and
46819 return true if that's a valid instruction in the active ISA. */
46821 static bool
46822 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
46823 unsigned nelt, bool testing_p)
46825 unsigned int i;
46826 rtx x, save_vconcat;
46827 int icode;
46829 if (vselect_insn == NULL_RTX)
46830 init_vselect_insn ();
46832 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
46833 PUT_NUM_ELEM (XVEC (x, 0), nelt);
46834 for (i = 0; i < nelt; ++i)
46835 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
46836 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46837 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
46838 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
46839 SET_DEST (PATTERN (vselect_insn)) = target;
46840 icode = recog_memoized (vselect_insn);
46842 if (icode >= 0 && !testing_p)
46843 emit_insn (copy_rtx (PATTERN (vselect_insn)));
46845 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
46846 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
46847 INSN_CODE (vselect_insn) = -1;
46849 return icode >= 0;
46852 /* Similar, but generate a vec_concat from op0 and op1 as well. */
46854 static bool
46855 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
46856 const unsigned char *perm, unsigned nelt,
46857 bool testing_p)
46859 machine_mode v2mode;
46860 rtx x;
46861 bool ok;
46863 if (vselect_insn == NULL_RTX)
46864 init_vselect_insn ();
46866 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
46867 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46868 PUT_MODE (x, v2mode);
46869 XEXP (x, 0) = op0;
46870 XEXP (x, 1) = op1;
46871 ok = expand_vselect (target, x, perm, nelt, testing_p);
46872 XEXP (x, 0) = const0_rtx;
46873 XEXP (x, 1) = const0_rtx;
46874 return ok;
46877 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46878 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
46880 static bool
46881 expand_vec_perm_blend (struct expand_vec_perm_d *d)
46883 machine_mode mmode, vmode = d->vmode;
46884 unsigned i, mask, nelt = d->nelt;
46885 rtx target, op0, op1, maskop, x;
46886 rtx rperm[32], vperm;
46888 if (d->one_operand_p)
46889 return false;
46890 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
46891 && (TARGET_AVX512BW
46892 || GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4))
46894 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
46896 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
46898 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
46900 else
46901 return false;
46903 /* This is a blend, not a permute. Elements must stay in their
46904 respective lanes. */
46905 for (i = 0; i < nelt; ++i)
46907 unsigned e = d->perm[i];
46908 if (!(e == i || e == i + nelt))
46909 return false;
46912 if (d->testing_p)
46913 return true;
46915 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
46916 decision should be extracted elsewhere, so that we only try that
46917 sequence once all budget==3 options have been tried. */
46918 target = d->target;
46919 op0 = d->op0;
46920 op1 = d->op1;
46921 mask = 0;
46923 switch (vmode)
46925 case V8DFmode:
46926 case V16SFmode:
46927 case V4DFmode:
46928 case V8SFmode:
46929 case V2DFmode:
46930 case V4SFmode:
46931 case V8HImode:
46932 case V8SImode:
46933 case V32HImode:
46934 case V64QImode:
46935 case V16SImode:
46936 case V8DImode:
46937 for (i = 0; i < nelt; ++i)
46938 mask |= (d->perm[i] >= nelt) << i;
46939 break;
46941 case V2DImode:
46942 for (i = 0; i < 2; ++i)
46943 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
46944 vmode = V8HImode;
46945 goto do_subreg;
46947 case V4SImode:
46948 for (i = 0; i < 4; ++i)
46949 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46950 vmode = V8HImode;
46951 goto do_subreg;
46953 case V16QImode:
46954 /* See if bytes move in pairs so we can use pblendw with
46955 an immediate argument, rather than pblendvb with a vector
46956 argument. */
46957 for (i = 0; i < 16; i += 2)
46958 if (d->perm[i] + 1 != d->perm[i + 1])
46960 use_pblendvb:
46961 for (i = 0; i < nelt; ++i)
46962 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
46964 finish_pblendvb:
46965 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
46966 vperm = force_reg (vmode, vperm);
46968 if (GET_MODE_SIZE (vmode) == 16)
46969 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
46970 else
46971 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
46972 if (target != d->target)
46973 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46974 return true;
46977 for (i = 0; i < 8; ++i)
46978 mask |= (d->perm[i * 2] >= 16) << i;
46979 vmode = V8HImode;
46980 /* FALLTHRU */
46982 do_subreg:
46983 target = gen_reg_rtx (vmode);
46984 op0 = gen_lowpart (vmode, op0);
46985 op1 = gen_lowpart (vmode, op1);
46986 break;
46988 case V32QImode:
46989 /* See if bytes move in pairs. If not, vpblendvb must be used. */
46990 for (i = 0; i < 32; i += 2)
46991 if (d->perm[i] + 1 != d->perm[i + 1])
46992 goto use_pblendvb;
46993 /* See if bytes move in quadruplets. If yes, vpblendd
46994 with immediate can be used. */
46995 for (i = 0; i < 32; i += 4)
46996 if (d->perm[i] + 2 != d->perm[i + 2])
46997 break;
46998 if (i < 32)
47000 /* See if bytes move the same in both lanes. If yes,
47001 vpblendw with immediate can be used. */
47002 for (i = 0; i < 16; i += 2)
47003 if (d->perm[i] + 16 != d->perm[i + 16])
47004 goto use_pblendvb;
47006 /* Use vpblendw. */
47007 for (i = 0; i < 16; ++i)
47008 mask |= (d->perm[i * 2] >= 32) << i;
47009 vmode = V16HImode;
47010 goto do_subreg;
47013 /* Use vpblendd. */
47014 for (i = 0; i < 8; ++i)
47015 mask |= (d->perm[i * 4] >= 32) << i;
47016 vmode = V8SImode;
47017 goto do_subreg;
47019 case V16HImode:
47020 /* See if words move in pairs. If yes, vpblendd can be used. */
47021 for (i = 0; i < 16; i += 2)
47022 if (d->perm[i] + 1 != d->perm[i + 1])
47023 break;
47024 if (i < 16)
47026 /* See if words move the same in both lanes. If not,
47027 vpblendvb must be used. */
47028 for (i = 0; i < 8; i++)
47029 if (d->perm[i] + 8 != d->perm[i + 8])
47031 /* Use vpblendvb. */
47032 for (i = 0; i < 32; ++i)
47033 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
47035 vmode = V32QImode;
47036 nelt = 32;
47037 target = gen_reg_rtx (vmode);
47038 op0 = gen_lowpart (vmode, op0);
47039 op1 = gen_lowpart (vmode, op1);
47040 goto finish_pblendvb;
47043 /* Use vpblendw. */
47044 for (i = 0; i < 16; ++i)
47045 mask |= (d->perm[i] >= 16) << i;
47046 break;
47049 /* Use vpblendd. */
47050 for (i = 0; i < 8; ++i)
47051 mask |= (d->perm[i * 2] >= 16) << i;
47052 vmode = V8SImode;
47053 goto do_subreg;
47055 case V4DImode:
47056 /* Use vpblendd. */
47057 for (i = 0; i < 4; ++i)
47058 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
47059 vmode = V8SImode;
47060 goto do_subreg;
47062 default:
47063 gcc_unreachable ();
47066 switch (vmode)
47068 case V8DFmode:
47069 case V8DImode:
47070 mmode = QImode;
47071 break;
47072 case V16SFmode:
47073 case V16SImode:
47074 mmode = HImode;
47075 break;
47076 case V32HImode:
47077 mmode = SImode;
47078 break;
47079 case V64QImode:
47080 mmode = DImode;
47081 break;
47082 default:
47083 mmode = VOIDmode;
47086 if (mmode != VOIDmode)
47087 maskop = force_reg (mmode, gen_int_mode (mask, mmode));
47088 else
47089 maskop = GEN_INT (mask);
47091 /* This matches five different patterns with the different modes. */
47092 x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop);
47093 x = gen_rtx_SET (target, x);
47094 emit_insn (x);
47095 if (target != d->target)
47096 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47098 return true;
47101 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47102 in terms of the variable form of vpermilps.
47104 Note that we will have already failed the immediate input vpermilps,
47105 which requires that the high and low part shuffle be identical; the
47106 variable form doesn't require that. */
47108 static bool
47109 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
47111 rtx rperm[8], vperm;
47112 unsigned i;
47114 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
47115 return false;
47117 /* We can only permute within the 128-bit lane. */
47118 for (i = 0; i < 8; ++i)
47120 unsigned e = d->perm[i];
47121 if (i < 4 ? e >= 4 : e < 4)
47122 return false;
47125 if (d->testing_p)
47126 return true;
47128 for (i = 0; i < 8; ++i)
47130 unsigned e = d->perm[i];
47132 /* Within each 128-bit lane, the elements of op0 are numbered
47133 from 0 and the elements of op1 are numbered from 4. */
47134 if (e >= 8 + 4)
47135 e -= 8;
47136 else if (e >= 4)
47137 e -= 4;
47139 rperm[i] = GEN_INT (e);
47142 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
47143 vperm = force_reg (V8SImode, vperm);
47144 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
47146 return true;
47149 /* Return true if permutation D can be performed as VMODE permutation
47150 instead. */
47152 static bool
47153 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
47155 unsigned int i, j, chunk;
47157 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
47158 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
47159 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
47160 return false;
47162 if (GET_MODE_NUNITS (vmode) >= d->nelt)
47163 return true;
47165 chunk = d->nelt / GET_MODE_NUNITS (vmode);
47166 for (i = 0; i < d->nelt; i += chunk)
47167 if (d->perm[i] & (chunk - 1))
47168 return false;
47169 else
47170 for (j = 1; j < chunk; ++j)
47171 if (d->perm[i] + j != d->perm[i + j])
47172 return false;
47174 return true;
47177 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47178 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
47180 static bool
47181 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
47183 unsigned i, nelt, eltsz, mask;
47184 unsigned char perm[64];
47185 machine_mode vmode = V16QImode;
47186 rtx rperm[64], vperm, target, op0, op1;
47188 nelt = d->nelt;
47190 if (!d->one_operand_p)
47192 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
47194 if (TARGET_AVX2
47195 && valid_perm_using_mode_p (V2TImode, d))
47197 if (d->testing_p)
47198 return true;
47200 /* Use vperm2i128 insn. The pattern uses
47201 V4DImode instead of V2TImode. */
47202 target = d->target;
47203 if (d->vmode != V4DImode)
47204 target = gen_reg_rtx (V4DImode);
47205 op0 = gen_lowpart (V4DImode, d->op0);
47206 op1 = gen_lowpart (V4DImode, d->op1);
47207 rperm[0]
47208 = GEN_INT ((d->perm[0] / (nelt / 2))
47209 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
47210 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
47211 if (target != d->target)
47212 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47213 return true;
47215 return false;
47218 else
47220 if (GET_MODE_SIZE (d->vmode) == 16)
47222 if (!TARGET_SSSE3)
47223 return false;
47225 else if (GET_MODE_SIZE (d->vmode) == 32)
47227 if (!TARGET_AVX2)
47228 return false;
47230 /* V4DImode should be already handled through
47231 expand_vselect by vpermq instruction. */
47232 gcc_assert (d->vmode != V4DImode);
47234 vmode = V32QImode;
47235 if (d->vmode == V8SImode
47236 || d->vmode == V16HImode
47237 || d->vmode == V32QImode)
47239 /* First see if vpermq can be used for
47240 V8SImode/V16HImode/V32QImode. */
47241 if (valid_perm_using_mode_p (V4DImode, d))
47243 for (i = 0; i < 4; i++)
47244 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
47245 if (d->testing_p)
47246 return true;
47247 target = gen_reg_rtx (V4DImode);
47248 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
47249 perm, 4, false))
47251 emit_move_insn (d->target,
47252 gen_lowpart (d->vmode, target));
47253 return true;
47255 return false;
47258 /* Next see if vpermd can be used. */
47259 if (valid_perm_using_mode_p (V8SImode, d))
47260 vmode = V8SImode;
47262 /* Or if vpermps can be used. */
47263 else if (d->vmode == V8SFmode)
47264 vmode = V8SImode;
47266 if (vmode == V32QImode)
47268 /* vpshufb only works intra lanes, it is not
47269 possible to shuffle bytes in between the lanes. */
47270 for (i = 0; i < nelt; ++i)
47271 if ((d->perm[i] ^ i) & (nelt / 2))
47272 return false;
47275 else if (GET_MODE_SIZE (d->vmode) == 64)
47277 if (!TARGET_AVX512BW)
47278 return false;
47280 /* If vpermq didn't work, vpshufb won't work either. */
47281 if (d->vmode == V8DFmode || d->vmode == V8DImode)
47282 return false;
47284 vmode = V64QImode;
47285 if (d->vmode == V16SImode
47286 || d->vmode == V32HImode
47287 || d->vmode == V64QImode)
47289 /* First see if vpermq can be used for
47290 V16SImode/V32HImode/V64QImode. */
47291 if (valid_perm_using_mode_p (V8DImode, d))
47293 for (i = 0; i < 8; i++)
47294 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
47295 if (d->testing_p)
47296 return true;
47297 target = gen_reg_rtx (V8DImode);
47298 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
47299 perm, 8, false))
47301 emit_move_insn (d->target,
47302 gen_lowpart (d->vmode, target));
47303 return true;
47305 return false;
47308 /* Next see if vpermd can be used. */
47309 if (valid_perm_using_mode_p (V16SImode, d))
47310 vmode = V16SImode;
47312 /* Or if vpermps can be used. */
47313 else if (d->vmode == V16SFmode)
47314 vmode = V16SImode;
47315 if (vmode == V64QImode)
47317 /* vpshufb only works intra lanes, it is not
47318 possible to shuffle bytes in between the lanes. */
47319 for (i = 0; i < nelt; ++i)
47320 if ((d->perm[i] ^ i) & (nelt / 4))
47321 return false;
47324 else
47325 return false;
47328 if (d->testing_p)
47329 return true;
47331 if (vmode == V8SImode)
47332 for (i = 0; i < 8; ++i)
47333 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
47334 else if (vmode == V16SImode)
47335 for (i = 0; i < 16; ++i)
47336 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
47337 else
47339 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47340 if (!d->one_operand_p)
47341 mask = 2 * nelt - 1;
47342 else if (vmode == V16QImode)
47343 mask = nelt - 1;
47344 else if (vmode == V64QImode)
47345 mask = nelt / 4 - 1;
47346 else
47347 mask = nelt / 2 - 1;
47349 for (i = 0; i < nelt; ++i)
47351 unsigned j, e = d->perm[i] & mask;
47352 for (j = 0; j < eltsz; ++j)
47353 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
47357 vperm = gen_rtx_CONST_VECTOR (vmode,
47358 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
47359 vperm = force_reg (vmode, vperm);
47361 target = d->target;
47362 if (d->vmode != vmode)
47363 target = gen_reg_rtx (vmode);
47364 op0 = gen_lowpart (vmode, d->op0);
47365 if (d->one_operand_p)
47367 if (vmode == V16QImode)
47368 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
47369 else if (vmode == V32QImode)
47370 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
47371 else if (vmode == V64QImode)
47372 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
47373 else if (vmode == V8SFmode)
47374 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
47375 else if (vmode == V8SImode)
47376 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
47377 else if (vmode == V16SFmode)
47378 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
47379 else if (vmode == V16SImode)
47380 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
47381 else
47382 gcc_unreachable ();
47384 else
47386 op1 = gen_lowpart (vmode, d->op1);
47387 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
47389 if (target != d->target)
47390 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47392 return true;
47395 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
47396 in a single instruction. */
47398 static bool
47399 expand_vec_perm_1 (struct expand_vec_perm_d *d)
47401 unsigned i, nelt = d->nelt;
47402 unsigned char perm2[MAX_VECT_LEN];
47404 /* Check plain VEC_SELECT first, because AVX has instructions that could
47405 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
47406 input where SEL+CONCAT may not. */
47407 if (d->one_operand_p)
47409 int mask = nelt - 1;
47410 bool identity_perm = true;
47411 bool broadcast_perm = true;
47413 for (i = 0; i < nelt; i++)
47415 perm2[i] = d->perm[i] & mask;
47416 if (perm2[i] != i)
47417 identity_perm = false;
47418 if (perm2[i])
47419 broadcast_perm = false;
47422 if (identity_perm)
47424 if (!d->testing_p)
47425 emit_move_insn (d->target, d->op0);
47426 return true;
47428 else if (broadcast_perm && TARGET_AVX2)
47430 /* Use vpbroadcast{b,w,d}. */
47431 rtx (*gen) (rtx, rtx) = NULL;
47432 switch (d->vmode)
47434 case V64QImode:
47435 if (TARGET_AVX512BW)
47436 gen = gen_avx512bw_vec_dupv64qi_1;
47437 break;
47438 case V32QImode:
47439 gen = gen_avx2_pbroadcastv32qi_1;
47440 break;
47441 case V32HImode:
47442 if (TARGET_AVX512BW)
47443 gen = gen_avx512bw_vec_dupv32hi_1;
47444 break;
47445 case V16HImode:
47446 gen = gen_avx2_pbroadcastv16hi_1;
47447 break;
47448 case V16SImode:
47449 if (TARGET_AVX512F)
47450 gen = gen_avx512f_vec_dupv16si_1;
47451 break;
47452 case V8SImode:
47453 gen = gen_avx2_pbroadcastv8si_1;
47454 break;
47455 case V16QImode:
47456 gen = gen_avx2_pbroadcastv16qi;
47457 break;
47458 case V8HImode:
47459 gen = gen_avx2_pbroadcastv8hi;
47460 break;
47461 case V16SFmode:
47462 if (TARGET_AVX512F)
47463 gen = gen_avx512f_vec_dupv16sf_1;
47464 break;
47465 case V8SFmode:
47466 gen = gen_avx2_vec_dupv8sf_1;
47467 break;
47468 case V8DFmode:
47469 if (TARGET_AVX512F)
47470 gen = gen_avx512f_vec_dupv8df_1;
47471 break;
47472 case V8DImode:
47473 if (TARGET_AVX512F)
47474 gen = gen_avx512f_vec_dupv8di_1;
47475 break;
47476 /* For other modes prefer other shuffles this function creates. */
47477 default: break;
47479 if (gen != NULL)
47481 if (!d->testing_p)
47482 emit_insn (gen (d->target, d->op0));
47483 return true;
47487 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
47488 return true;
47490 /* There are plenty of patterns in sse.md that are written for
47491 SEL+CONCAT and are not replicated for a single op. Perhaps
47492 that should be changed, to avoid the nastiness here. */
47494 /* Recognize interleave style patterns, which means incrementing
47495 every other permutation operand. */
47496 for (i = 0; i < nelt; i += 2)
47498 perm2[i] = d->perm[i] & mask;
47499 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
47501 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47502 d->testing_p))
47503 return true;
47505 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
47506 if (nelt >= 4)
47508 for (i = 0; i < nelt; i += 4)
47510 perm2[i + 0] = d->perm[i + 0] & mask;
47511 perm2[i + 1] = d->perm[i + 1] & mask;
47512 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
47513 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
47516 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47517 d->testing_p))
47518 return true;
47522 /* Finally, try the fully general two operand permute. */
47523 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
47524 d->testing_p))
47525 return true;
47527 /* Recognize interleave style patterns with reversed operands. */
47528 if (!d->one_operand_p)
47530 for (i = 0; i < nelt; ++i)
47532 unsigned e = d->perm[i];
47533 if (e >= nelt)
47534 e -= nelt;
47535 else
47536 e += nelt;
47537 perm2[i] = e;
47540 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
47541 d->testing_p))
47542 return true;
47545 /* Try the SSE4.1 blend variable merge instructions. */
47546 if (expand_vec_perm_blend (d))
47547 return true;
47549 /* Try one of the AVX vpermil variable permutations. */
47550 if (expand_vec_perm_vpermil (d))
47551 return true;
47553 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47554 vpshufb, vpermd, vpermps or vpermq variable permutation. */
47555 if (expand_vec_perm_pshufb (d))
47556 return true;
47558 /* Try the AVX2 vpalignr instruction. */
47559 if (expand_vec_perm_palignr (d, true))
47560 return true;
47562 /* Try the AVX512F vpermi2 instructions. */
47563 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47564 return true;
47566 return false;
47569 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47570 in terms of a pair of pshuflw + pshufhw instructions. */
47572 static bool
47573 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47575 unsigned char perm2[MAX_VECT_LEN];
47576 unsigned i;
47577 bool ok;
47579 if (d->vmode != V8HImode || !d->one_operand_p)
47580 return false;
47582 /* The two permutations only operate in 64-bit lanes. */
47583 for (i = 0; i < 4; ++i)
47584 if (d->perm[i] >= 4)
47585 return false;
47586 for (i = 4; i < 8; ++i)
47587 if (d->perm[i] < 4)
47588 return false;
47590 if (d->testing_p)
47591 return true;
47593 /* Emit the pshuflw. */
47594 memcpy (perm2, d->perm, 4);
47595 for (i = 4; i < 8; ++i)
47596 perm2[i] = i;
47597 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47598 gcc_assert (ok);
47600 /* Emit the pshufhw. */
47601 memcpy (perm2 + 4, d->perm + 4, 4);
47602 for (i = 0; i < 4; ++i)
47603 perm2[i] = i;
47604 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47605 gcc_assert (ok);
47607 return true;
47610 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47611 the permutation using the SSSE3 palignr instruction. This succeeds
47612 when all of the elements in PERM fit within one vector and we merely
47613 need to shift them down so that a single vector permutation has a
47614 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
47615 the vpalignr instruction itself can perform the requested permutation. */
47617 static bool
47618 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47620 unsigned i, nelt = d->nelt;
47621 unsigned min, max, minswap, maxswap;
47622 bool in_order, ok, swap = false;
47623 rtx shift, target;
47624 struct expand_vec_perm_d dcopy;
47626 /* Even with AVX, palignr only operates on 128-bit vectors,
47627 in AVX2 palignr operates on both 128-bit lanes. */
47628 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47629 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47630 return false;
47632 min = 2 * nelt;
47633 max = 0;
47634 minswap = 2 * nelt;
47635 maxswap = 0;
47636 for (i = 0; i < nelt; ++i)
47638 unsigned e = d->perm[i];
47639 unsigned eswap = d->perm[i] ^ nelt;
47640 if (GET_MODE_SIZE (d->vmode) == 32)
47642 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47643 eswap = e ^ (nelt / 2);
47645 if (e < min)
47646 min = e;
47647 if (e > max)
47648 max = e;
47649 if (eswap < minswap)
47650 minswap = eswap;
47651 if (eswap > maxswap)
47652 maxswap = eswap;
47654 if (min == 0
47655 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47657 if (d->one_operand_p
47658 || minswap == 0
47659 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47660 ? nelt / 2 : nelt))
47661 return false;
47662 swap = true;
47663 min = minswap;
47664 max = maxswap;
47667 /* Given that we have SSSE3, we know we'll be able to implement the
47668 single operand permutation after the palignr with pshufb for
47669 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
47670 first. */
47671 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47672 return true;
47674 dcopy = *d;
47675 if (swap)
47677 dcopy.op0 = d->op1;
47678 dcopy.op1 = d->op0;
47679 for (i = 0; i < nelt; ++i)
47680 dcopy.perm[i] ^= nelt;
47683 in_order = true;
47684 for (i = 0; i < nelt; ++i)
47686 unsigned e = dcopy.perm[i];
47687 if (GET_MODE_SIZE (d->vmode) == 32
47688 && e >= nelt
47689 && (e & (nelt / 2 - 1)) < min)
47690 e = e - min - (nelt / 2);
47691 else
47692 e = e - min;
47693 if (e != i)
47694 in_order = false;
47695 dcopy.perm[i] = e;
47697 dcopy.one_operand_p = true;
47699 if (single_insn_only_p && !in_order)
47700 return false;
47702 /* For AVX2, test whether we can permute the result in one instruction. */
47703 if (d->testing_p)
47705 if (in_order)
47706 return true;
47707 dcopy.op1 = dcopy.op0;
47708 return expand_vec_perm_1 (&dcopy);
47711 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
47712 if (GET_MODE_SIZE (d->vmode) == 16)
47714 target = gen_reg_rtx (TImode);
47715 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
47716 gen_lowpart (TImode, dcopy.op0), shift));
47718 else
47720 target = gen_reg_rtx (V2TImode);
47721 emit_insn (gen_avx2_palignrv2ti (target,
47722 gen_lowpart (V2TImode, dcopy.op1),
47723 gen_lowpart (V2TImode, dcopy.op0),
47724 shift));
47727 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
47729 /* Test for the degenerate case where the alignment by itself
47730 produces the desired permutation. */
47731 if (in_order)
47733 emit_move_insn (d->target, dcopy.op0);
47734 return true;
47737 ok = expand_vec_perm_1 (&dcopy);
47738 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
47740 return ok;
47743 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
47744 the permutation using the SSE4_1 pblendv instruction. Potentially
47745 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
47747 static bool
47748 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
47750 unsigned i, which, nelt = d->nelt;
47751 struct expand_vec_perm_d dcopy, dcopy1;
47752 machine_mode vmode = d->vmode;
47753 bool ok;
47755 /* Use the same checks as in expand_vec_perm_blend. */
47756 if (d->one_operand_p)
47757 return false;
47758 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47760 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47762 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47764 else
47765 return false;
47767 /* Figure out where permutation elements stay not in their
47768 respective lanes. */
47769 for (i = 0, which = 0; i < nelt; ++i)
47771 unsigned e = d->perm[i];
47772 if (e != i)
47773 which |= (e < nelt ? 1 : 2);
47775 /* We can pblend the part where elements stay not in their
47776 respective lanes only when these elements are all in one
47777 half of a permutation.
47778 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
47779 lanes, but both 8 and 9 >= 8
47780 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
47781 respective lanes and 8 >= 8, but 2 not. */
47782 if (which != 1 && which != 2)
47783 return false;
47784 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
47785 return true;
47787 /* First we apply one operand permutation to the part where
47788 elements stay not in their respective lanes. */
47789 dcopy = *d;
47790 if (which == 2)
47791 dcopy.op0 = dcopy.op1 = d->op1;
47792 else
47793 dcopy.op0 = dcopy.op1 = d->op0;
47794 if (!d->testing_p)
47795 dcopy.target = gen_reg_rtx (vmode);
47796 dcopy.one_operand_p = true;
47798 for (i = 0; i < nelt; ++i)
47799 dcopy.perm[i] = d->perm[i] & (nelt - 1);
47801 ok = expand_vec_perm_1 (&dcopy);
47802 if (GET_MODE_SIZE (vmode) != 16 && !ok)
47803 return false;
47804 else
47805 gcc_assert (ok);
47806 if (d->testing_p)
47807 return true;
47809 /* Next we put permuted elements into their positions. */
47810 dcopy1 = *d;
47811 if (which == 2)
47812 dcopy1.op1 = dcopy.target;
47813 else
47814 dcopy1.op0 = dcopy.target;
47816 for (i = 0; i < nelt; ++i)
47817 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
47819 ok = expand_vec_perm_blend (&dcopy1);
47820 gcc_assert (ok);
47822 return true;
47825 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
47827 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47828 a two vector permutation into a single vector permutation by using
47829 an interleave operation to merge the vectors. */
47831 static bool
47832 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
47834 struct expand_vec_perm_d dremap, dfinal;
47835 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
47836 unsigned HOST_WIDE_INT contents;
47837 unsigned char remap[2 * MAX_VECT_LEN];
47838 rtx_insn *seq;
47839 bool ok, same_halves = false;
47841 if (GET_MODE_SIZE (d->vmode) == 16)
47843 if (d->one_operand_p)
47844 return false;
47846 else if (GET_MODE_SIZE (d->vmode) == 32)
47848 if (!TARGET_AVX)
47849 return false;
47850 /* For 32-byte modes allow even d->one_operand_p.
47851 The lack of cross-lane shuffling in some instructions
47852 might prevent a single insn shuffle. */
47853 dfinal = *d;
47854 dfinal.testing_p = true;
47855 /* If expand_vec_perm_interleave3 can expand this into
47856 a 3 insn sequence, give up and let it be expanded as
47857 3 insn sequence. While that is one insn longer,
47858 it doesn't need a memory operand and in the common
47859 case that both interleave low and high permutations
47860 with the same operands are adjacent needs 4 insns
47861 for both after CSE. */
47862 if (expand_vec_perm_interleave3 (&dfinal))
47863 return false;
47865 else
47866 return false;
47868 /* Examine from whence the elements come. */
47869 contents = 0;
47870 for (i = 0; i < nelt; ++i)
47871 contents |= HOST_WIDE_INT_1U << d->perm[i];
47873 memset (remap, 0xff, sizeof (remap));
47874 dremap = *d;
47876 if (GET_MODE_SIZE (d->vmode) == 16)
47878 unsigned HOST_WIDE_INT h1, h2, h3, h4;
47880 /* Split the two input vectors into 4 halves. */
47881 h1 = (HOST_WIDE_INT_1U << nelt2) - 1;
47882 h2 = h1 << nelt2;
47883 h3 = h2 << nelt2;
47884 h4 = h3 << nelt2;
47886 /* If the elements from the low halves use interleave low, and similarly
47887 for interleave high. If the elements are from mis-matched halves, we
47888 can use shufps for V4SF/V4SI or do a DImode shuffle. */
47889 if ((contents & (h1 | h3)) == contents)
47891 /* punpckl* */
47892 for (i = 0; i < nelt2; ++i)
47894 remap[i] = i * 2;
47895 remap[i + nelt] = i * 2 + 1;
47896 dremap.perm[i * 2] = i;
47897 dremap.perm[i * 2 + 1] = i + nelt;
47899 if (!TARGET_SSE2 && d->vmode == V4SImode)
47900 dremap.vmode = V4SFmode;
47902 else if ((contents & (h2 | h4)) == contents)
47904 /* punpckh* */
47905 for (i = 0; i < nelt2; ++i)
47907 remap[i + nelt2] = i * 2;
47908 remap[i + nelt + nelt2] = i * 2 + 1;
47909 dremap.perm[i * 2] = i + nelt2;
47910 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
47912 if (!TARGET_SSE2 && d->vmode == V4SImode)
47913 dremap.vmode = V4SFmode;
47915 else if ((contents & (h1 | h4)) == contents)
47917 /* shufps */
47918 for (i = 0; i < nelt2; ++i)
47920 remap[i] = i;
47921 remap[i + nelt + nelt2] = i + nelt2;
47922 dremap.perm[i] = i;
47923 dremap.perm[i + nelt2] = i + nelt + nelt2;
47925 if (nelt != 4)
47927 /* shufpd */
47928 dremap.vmode = V2DImode;
47929 dremap.nelt = 2;
47930 dremap.perm[0] = 0;
47931 dremap.perm[1] = 3;
47934 else if ((contents & (h2 | h3)) == contents)
47936 /* shufps */
47937 for (i = 0; i < nelt2; ++i)
47939 remap[i + nelt2] = i;
47940 remap[i + nelt] = i + nelt2;
47941 dremap.perm[i] = i + nelt2;
47942 dremap.perm[i + nelt2] = i + nelt;
47944 if (nelt != 4)
47946 /* shufpd */
47947 dremap.vmode = V2DImode;
47948 dremap.nelt = 2;
47949 dremap.perm[0] = 1;
47950 dremap.perm[1] = 2;
47953 else
47954 return false;
47956 else
47958 unsigned int nelt4 = nelt / 4, nzcnt = 0;
47959 unsigned HOST_WIDE_INT q[8];
47960 unsigned int nonzero_halves[4];
47962 /* Split the two input vectors into 8 quarters. */
47963 q[0] = (HOST_WIDE_INT_1U << nelt4) - 1;
47964 for (i = 1; i < 8; ++i)
47965 q[i] = q[0] << (nelt4 * i);
47966 for (i = 0; i < 4; ++i)
47967 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
47969 nonzero_halves[nzcnt] = i;
47970 ++nzcnt;
47973 if (nzcnt == 1)
47975 gcc_assert (d->one_operand_p);
47976 nonzero_halves[1] = nonzero_halves[0];
47977 same_halves = true;
47979 else if (d->one_operand_p)
47981 gcc_assert (nonzero_halves[0] == 0);
47982 gcc_assert (nonzero_halves[1] == 1);
47985 if (nzcnt <= 2)
47987 if (d->perm[0] / nelt2 == nonzero_halves[1])
47989 /* Attempt to increase the likelihood that dfinal
47990 shuffle will be intra-lane. */
47991 std::swap (nonzero_halves[0], nonzero_halves[1]);
47994 /* vperm2f128 or vperm2i128. */
47995 for (i = 0; i < nelt2; ++i)
47997 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
47998 remap[i + nonzero_halves[0] * nelt2] = i;
47999 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
48000 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
48003 if (d->vmode != V8SFmode
48004 && d->vmode != V4DFmode
48005 && d->vmode != V8SImode)
48007 dremap.vmode = V8SImode;
48008 dremap.nelt = 8;
48009 for (i = 0; i < 4; ++i)
48011 dremap.perm[i] = i + nonzero_halves[0] * 4;
48012 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
48016 else if (d->one_operand_p)
48017 return false;
48018 else if (TARGET_AVX2
48019 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
48021 /* vpunpckl* */
48022 for (i = 0; i < nelt4; ++i)
48024 remap[i] = i * 2;
48025 remap[i + nelt] = i * 2 + 1;
48026 remap[i + nelt2] = i * 2 + nelt2;
48027 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
48028 dremap.perm[i * 2] = i;
48029 dremap.perm[i * 2 + 1] = i + nelt;
48030 dremap.perm[i * 2 + nelt2] = i + nelt2;
48031 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
48034 else if (TARGET_AVX2
48035 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
48037 /* vpunpckh* */
48038 for (i = 0; i < nelt4; ++i)
48040 remap[i + nelt4] = i * 2;
48041 remap[i + nelt + nelt4] = i * 2 + 1;
48042 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
48043 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
48044 dremap.perm[i * 2] = i + nelt4;
48045 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
48046 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
48047 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
48050 else
48051 return false;
48054 /* Use the remapping array set up above to move the elements from their
48055 swizzled locations into their final destinations. */
48056 dfinal = *d;
48057 for (i = 0; i < nelt; ++i)
48059 unsigned e = remap[d->perm[i]];
48060 gcc_assert (e < nelt);
48061 /* If same_halves is true, both halves of the remapped vector are the
48062 same. Avoid cross-lane accesses if possible. */
48063 if (same_halves && i >= nelt2)
48065 gcc_assert (e < nelt2);
48066 dfinal.perm[i] = e + nelt2;
48068 else
48069 dfinal.perm[i] = e;
48071 if (!d->testing_p)
48073 dremap.target = gen_reg_rtx (dremap.vmode);
48074 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48076 dfinal.op1 = dfinal.op0;
48077 dfinal.one_operand_p = true;
48079 /* Test if the final remap can be done with a single insn. For V4SFmode or
48080 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
48081 start_sequence ();
48082 ok = expand_vec_perm_1 (&dfinal);
48083 seq = get_insns ();
48084 end_sequence ();
48086 if (!ok)
48087 return false;
48089 if (d->testing_p)
48090 return true;
48092 if (dremap.vmode != dfinal.vmode)
48094 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
48095 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
48098 ok = expand_vec_perm_1 (&dremap);
48099 gcc_assert (ok);
48101 emit_insn (seq);
48102 return true;
48105 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48106 a single vector cross-lane permutation into vpermq followed
48107 by any of the single insn permutations. */
48109 static bool
48110 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
48112 struct expand_vec_perm_d dremap, dfinal;
48113 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
48114 unsigned contents[2];
48115 bool ok;
48117 if (!(TARGET_AVX2
48118 && (d->vmode == V32QImode || d->vmode == V16HImode)
48119 && d->one_operand_p))
48120 return false;
48122 contents[0] = 0;
48123 contents[1] = 0;
48124 for (i = 0; i < nelt2; ++i)
48126 contents[0] |= 1u << (d->perm[i] / nelt4);
48127 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
48130 for (i = 0; i < 2; ++i)
48132 unsigned int cnt = 0;
48133 for (j = 0; j < 4; ++j)
48134 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
48135 return false;
48138 if (d->testing_p)
48139 return true;
48141 dremap = *d;
48142 dremap.vmode = V4DImode;
48143 dremap.nelt = 4;
48144 dremap.target = gen_reg_rtx (V4DImode);
48145 dremap.op0 = gen_lowpart (V4DImode, d->op0);
48146 dremap.op1 = dremap.op0;
48147 dremap.one_operand_p = true;
48148 for (i = 0; i < 2; ++i)
48150 unsigned int cnt = 0;
48151 for (j = 0; j < 4; ++j)
48152 if ((contents[i] & (1u << j)) != 0)
48153 dremap.perm[2 * i + cnt++] = j;
48154 for (; cnt < 2; ++cnt)
48155 dremap.perm[2 * i + cnt] = 0;
48158 dfinal = *d;
48159 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48160 dfinal.op1 = dfinal.op0;
48161 dfinal.one_operand_p = true;
48162 for (i = 0, j = 0; i < nelt; ++i)
48164 if (i == nelt2)
48165 j = 2;
48166 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
48167 if ((d->perm[i] / nelt4) == dremap.perm[j])
48169 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
48170 dfinal.perm[i] |= nelt4;
48171 else
48172 gcc_unreachable ();
48175 ok = expand_vec_perm_1 (&dremap);
48176 gcc_assert (ok);
48178 ok = expand_vec_perm_1 (&dfinal);
48179 gcc_assert (ok);
48181 return true;
48184 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
48185 a vector permutation using two instructions, vperm2f128 resp.
48186 vperm2i128 followed by any single in-lane permutation. */
48188 static bool
48189 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
48191 struct expand_vec_perm_d dfirst, dsecond;
48192 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
48193 bool ok;
48195 if (!TARGET_AVX
48196 || GET_MODE_SIZE (d->vmode) != 32
48197 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
48198 return false;
48200 dsecond = *d;
48201 dsecond.one_operand_p = false;
48202 dsecond.testing_p = true;
48204 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
48205 immediate. For perm < 16 the second permutation uses
48206 d->op0 as first operand, for perm >= 16 it uses d->op1
48207 as first operand. The second operand is the result of
48208 vperm2[fi]128. */
48209 for (perm = 0; perm < 32; perm++)
48211 /* Ignore permutations which do not move anything cross-lane. */
48212 if (perm < 16)
48214 /* The second shuffle for e.g. V4DFmode has
48215 0123 and ABCD operands.
48216 Ignore AB23, as 23 is already in the second lane
48217 of the first operand. */
48218 if ((perm & 0xc) == (1 << 2)) continue;
48219 /* And 01CD, as 01 is in the first lane of the first
48220 operand. */
48221 if ((perm & 3) == 0) continue;
48222 /* And 4567, as then the vperm2[fi]128 doesn't change
48223 anything on the original 4567 second operand. */
48224 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
48226 else
48228 /* The second shuffle for e.g. V4DFmode has
48229 4567 and ABCD operands.
48230 Ignore AB67, as 67 is already in the second lane
48231 of the first operand. */
48232 if ((perm & 0xc) == (3 << 2)) continue;
48233 /* And 45CD, as 45 is in the first lane of the first
48234 operand. */
48235 if ((perm & 3) == 2) continue;
48236 /* And 0123, as then the vperm2[fi]128 doesn't change
48237 anything on the original 0123 first operand. */
48238 if ((perm & 0xf) == (1 << 2)) continue;
48241 for (i = 0; i < nelt; i++)
48243 j = d->perm[i] / nelt2;
48244 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
48245 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
48246 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
48247 dsecond.perm[i] = d->perm[i] & (nelt - 1);
48248 else
48249 break;
48252 if (i == nelt)
48254 start_sequence ();
48255 ok = expand_vec_perm_1 (&dsecond);
48256 end_sequence ();
48258 else
48259 ok = false;
48261 if (ok)
48263 if (d->testing_p)
48264 return true;
48266 /* Found a usable second shuffle. dfirst will be
48267 vperm2f128 on d->op0 and d->op1. */
48268 dsecond.testing_p = false;
48269 dfirst = *d;
48270 dfirst.target = gen_reg_rtx (d->vmode);
48271 for (i = 0; i < nelt; i++)
48272 dfirst.perm[i] = (i & (nelt2 - 1))
48273 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
48275 canonicalize_perm (&dfirst);
48276 ok = expand_vec_perm_1 (&dfirst);
48277 gcc_assert (ok);
48279 /* And dsecond is some single insn shuffle, taking
48280 d->op0 and result of vperm2f128 (if perm < 16) or
48281 d->op1 and result of vperm2f128 (otherwise). */
48282 if (perm >= 16)
48283 dsecond.op0 = dsecond.op1;
48284 dsecond.op1 = dfirst.target;
48286 ok = expand_vec_perm_1 (&dsecond);
48287 gcc_assert (ok);
48289 return true;
48292 /* For one operand, the only useful vperm2f128 permutation is 0x01
48293 aka lanes swap. */
48294 if (d->one_operand_p)
48295 return false;
48298 return false;
48301 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48302 a two vector permutation using 2 intra-lane interleave insns
48303 and cross-lane shuffle for 32-byte vectors. */
48305 static bool
48306 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
48308 unsigned i, nelt;
48309 rtx (*gen) (rtx, rtx, rtx);
48311 if (d->one_operand_p)
48312 return false;
48313 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
48315 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
48317 else
48318 return false;
48320 nelt = d->nelt;
48321 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
48322 return false;
48323 for (i = 0; i < nelt; i += 2)
48324 if (d->perm[i] != d->perm[0] + i / 2
48325 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
48326 return false;
48328 if (d->testing_p)
48329 return true;
48331 switch (d->vmode)
48333 case V32QImode:
48334 if (d->perm[0])
48335 gen = gen_vec_interleave_highv32qi;
48336 else
48337 gen = gen_vec_interleave_lowv32qi;
48338 break;
48339 case V16HImode:
48340 if (d->perm[0])
48341 gen = gen_vec_interleave_highv16hi;
48342 else
48343 gen = gen_vec_interleave_lowv16hi;
48344 break;
48345 case V8SImode:
48346 if (d->perm[0])
48347 gen = gen_vec_interleave_highv8si;
48348 else
48349 gen = gen_vec_interleave_lowv8si;
48350 break;
48351 case V4DImode:
48352 if (d->perm[0])
48353 gen = gen_vec_interleave_highv4di;
48354 else
48355 gen = gen_vec_interleave_lowv4di;
48356 break;
48357 case V8SFmode:
48358 if (d->perm[0])
48359 gen = gen_vec_interleave_highv8sf;
48360 else
48361 gen = gen_vec_interleave_lowv8sf;
48362 break;
48363 case V4DFmode:
48364 if (d->perm[0])
48365 gen = gen_vec_interleave_highv4df;
48366 else
48367 gen = gen_vec_interleave_lowv4df;
48368 break;
48369 default:
48370 gcc_unreachable ();
48373 emit_insn (gen (d->target, d->op0, d->op1));
48374 return true;
48377 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
48378 a single vector permutation using a single intra-lane vector
48379 permutation, vperm2f128 swapping the lanes and vblend* insn blending
48380 the non-swapped and swapped vectors together. */
48382 static bool
48383 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
48385 struct expand_vec_perm_d dfirst, dsecond;
48386 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
48387 rtx_insn *seq;
48388 bool ok;
48389 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
48391 if (!TARGET_AVX
48392 || TARGET_AVX2
48393 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
48394 || !d->one_operand_p)
48395 return false;
48397 dfirst = *d;
48398 for (i = 0; i < nelt; i++)
48399 dfirst.perm[i] = 0xff;
48400 for (i = 0, msk = 0; i < nelt; i++)
48402 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
48403 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
48404 return false;
48405 dfirst.perm[j] = d->perm[i];
48406 if (j != i)
48407 msk |= (1 << i);
48409 for (i = 0; i < nelt; i++)
48410 if (dfirst.perm[i] == 0xff)
48411 dfirst.perm[i] = i;
48413 if (!d->testing_p)
48414 dfirst.target = gen_reg_rtx (dfirst.vmode);
48416 start_sequence ();
48417 ok = expand_vec_perm_1 (&dfirst);
48418 seq = get_insns ();
48419 end_sequence ();
48421 if (!ok)
48422 return false;
48424 if (d->testing_p)
48425 return true;
48427 emit_insn (seq);
48429 dsecond = *d;
48430 dsecond.op0 = dfirst.target;
48431 dsecond.op1 = dfirst.target;
48432 dsecond.one_operand_p = true;
48433 dsecond.target = gen_reg_rtx (dsecond.vmode);
48434 for (i = 0; i < nelt; i++)
48435 dsecond.perm[i] = i ^ nelt2;
48437 ok = expand_vec_perm_1 (&dsecond);
48438 gcc_assert (ok);
48440 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
48441 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
48442 return true;
48445 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
48446 permutation using two vperm2f128, followed by a vshufpd insn blending
48447 the two vectors together. */
48449 static bool
48450 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
48452 struct expand_vec_perm_d dfirst, dsecond, dthird;
48453 bool ok;
48455 if (!TARGET_AVX || (d->vmode != V4DFmode))
48456 return false;
48458 if (d->testing_p)
48459 return true;
48461 dfirst = *d;
48462 dsecond = *d;
48463 dthird = *d;
48465 dfirst.perm[0] = (d->perm[0] & ~1);
48466 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
48467 dfirst.perm[2] = (d->perm[2] & ~1);
48468 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
48469 dsecond.perm[0] = (d->perm[1] & ~1);
48470 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
48471 dsecond.perm[2] = (d->perm[3] & ~1);
48472 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
48473 dthird.perm[0] = (d->perm[0] % 2);
48474 dthird.perm[1] = (d->perm[1] % 2) + 4;
48475 dthird.perm[2] = (d->perm[2] % 2) + 2;
48476 dthird.perm[3] = (d->perm[3] % 2) + 6;
48478 dfirst.target = gen_reg_rtx (dfirst.vmode);
48479 dsecond.target = gen_reg_rtx (dsecond.vmode);
48480 dthird.op0 = dfirst.target;
48481 dthird.op1 = dsecond.target;
48482 dthird.one_operand_p = false;
48484 canonicalize_perm (&dfirst);
48485 canonicalize_perm (&dsecond);
48487 ok = expand_vec_perm_1 (&dfirst)
48488 && expand_vec_perm_1 (&dsecond)
48489 && expand_vec_perm_1 (&dthird);
48491 gcc_assert (ok);
48493 return true;
48496 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
48497 permutation with two pshufb insns and an ior. We should have already
48498 failed all two instruction sequences. */
48500 static bool
48501 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
48503 rtx rperm[2][16], vperm, l, h, op, m128;
48504 unsigned int i, nelt, eltsz;
48506 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
48507 return false;
48508 gcc_assert (!d->one_operand_p);
48510 if (d->testing_p)
48511 return true;
48513 nelt = d->nelt;
48514 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48516 /* Generate two permutation masks. If the required element is within
48517 the given vector it is shuffled into the proper lane. If the required
48518 element is in the other vector, force a zero into the lane by setting
48519 bit 7 in the permutation mask. */
48520 m128 = GEN_INT (-128);
48521 for (i = 0; i < nelt; ++i)
48523 unsigned j, e = d->perm[i];
48524 unsigned which = (e >= nelt);
48525 if (e >= nelt)
48526 e -= nelt;
48528 for (j = 0; j < eltsz; ++j)
48530 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
48531 rperm[1-which][i*eltsz + j] = m128;
48535 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
48536 vperm = force_reg (V16QImode, vperm);
48538 l = gen_reg_rtx (V16QImode);
48539 op = gen_lowpart (V16QImode, d->op0);
48540 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
48542 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48543 vperm = force_reg (V16QImode, vperm);
48545 h = gen_reg_rtx (V16QImode);
48546 op = gen_lowpart (V16QImode, d->op1);
48547 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48549 op = d->target;
48550 if (d->vmode != V16QImode)
48551 op = gen_reg_rtx (V16QImode);
48552 emit_insn (gen_iorv16qi3 (op, l, h));
48553 if (op != d->target)
48554 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48556 return true;
48559 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48560 with two vpshufb insns, vpermq and vpor. We should have already failed
48561 all two or three instruction sequences. */
48563 static bool
48564 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48566 rtx rperm[2][32], vperm, l, h, hp, op, m128;
48567 unsigned int i, nelt, eltsz;
48569 if (!TARGET_AVX2
48570 || !d->one_operand_p
48571 || (d->vmode != V32QImode && d->vmode != V16HImode))
48572 return false;
48574 if (d->testing_p)
48575 return true;
48577 nelt = d->nelt;
48578 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48580 /* Generate two permutation masks. If the required element is within
48581 the same lane, it is shuffled in. If the required element from the
48582 other lane, force a zero by setting bit 7 in the permutation mask.
48583 In the other mask the mask has non-negative elements if element
48584 is requested from the other lane, but also moved to the other lane,
48585 so that the result of vpshufb can have the two V2TImode halves
48586 swapped. */
48587 m128 = GEN_INT (-128);
48588 for (i = 0; i < nelt; ++i)
48590 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48591 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48593 for (j = 0; j < eltsz; ++j)
48595 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48596 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48600 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48601 vperm = force_reg (V32QImode, vperm);
48603 h = gen_reg_rtx (V32QImode);
48604 op = gen_lowpart (V32QImode, d->op0);
48605 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48607 /* Swap the 128-byte lanes of h into hp. */
48608 hp = gen_reg_rtx (V4DImode);
48609 op = gen_lowpart (V4DImode, h);
48610 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48611 const1_rtx));
48613 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48614 vperm = force_reg (V32QImode, vperm);
48616 l = gen_reg_rtx (V32QImode);
48617 op = gen_lowpart (V32QImode, d->op0);
48618 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48620 op = d->target;
48621 if (d->vmode != V32QImode)
48622 op = gen_reg_rtx (V32QImode);
48623 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48624 if (op != d->target)
48625 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48627 return true;
48630 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48631 and extract-odd permutations of two V32QImode and V16QImode operand
48632 with two vpshufb insns, vpor and vpermq. We should have already
48633 failed all two or three instruction sequences. */
48635 static bool
48636 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48638 rtx rperm[2][32], vperm, l, h, ior, op, m128;
48639 unsigned int i, nelt, eltsz;
48641 if (!TARGET_AVX2
48642 || d->one_operand_p
48643 || (d->vmode != V32QImode && d->vmode != V16HImode))
48644 return false;
48646 for (i = 0; i < d->nelt; ++i)
48647 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48648 return false;
48650 if (d->testing_p)
48651 return true;
48653 nelt = d->nelt;
48654 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48656 /* Generate two permutation masks. In the first permutation mask
48657 the first quarter will contain indexes for the first half
48658 of the op0, the second quarter will contain bit 7 set, third quarter
48659 will contain indexes for the second half of the op0 and the
48660 last quarter bit 7 set. In the second permutation mask
48661 the first quarter will contain bit 7 set, the second quarter
48662 indexes for the first half of the op1, the third quarter bit 7 set
48663 and last quarter indexes for the second half of the op1.
48664 I.e. the first mask e.g. for V32QImode extract even will be:
48665 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48666 (all values masked with 0xf except for -128) and second mask
48667 for extract even will be
48668 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
48669 m128 = GEN_INT (-128);
48670 for (i = 0; i < nelt; ++i)
48672 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48673 unsigned which = d->perm[i] >= nelt;
48674 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48676 for (j = 0; j < eltsz; ++j)
48678 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48679 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48683 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48684 vperm = force_reg (V32QImode, vperm);
48686 l = gen_reg_rtx (V32QImode);
48687 op = gen_lowpart (V32QImode, d->op0);
48688 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48690 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48691 vperm = force_reg (V32QImode, vperm);
48693 h = gen_reg_rtx (V32QImode);
48694 op = gen_lowpart (V32QImode, d->op1);
48695 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48697 ior = gen_reg_rtx (V32QImode);
48698 emit_insn (gen_iorv32qi3 (ior, l, h));
48700 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
48701 op = gen_reg_rtx (V4DImode);
48702 ior = gen_lowpart (V4DImode, ior);
48703 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
48704 const1_rtx, GEN_INT (3)));
48705 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48707 return true;
48710 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48711 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
48712 with two "and" and "pack" or two "shift" and "pack" insns. We should
48713 have already failed all two instruction sequences. */
48715 static bool
48716 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
48718 rtx op, dop0, dop1, t, rperm[16];
48719 unsigned i, odd, c, s, nelt = d->nelt;
48720 bool end_perm = false;
48721 machine_mode half_mode;
48722 rtx (*gen_and) (rtx, rtx, rtx);
48723 rtx (*gen_pack) (rtx, rtx, rtx);
48724 rtx (*gen_shift) (rtx, rtx, rtx);
48726 if (d->one_operand_p)
48727 return false;
48729 switch (d->vmode)
48731 case V8HImode:
48732 /* Required for "pack". */
48733 if (!TARGET_SSE4_1)
48734 return false;
48735 c = 0xffff;
48736 s = 16;
48737 half_mode = V4SImode;
48738 gen_and = gen_andv4si3;
48739 gen_pack = gen_sse4_1_packusdw;
48740 gen_shift = gen_lshrv4si3;
48741 break;
48742 case V16QImode:
48743 /* No check as all instructions are SSE2. */
48744 c = 0xff;
48745 s = 8;
48746 half_mode = V8HImode;
48747 gen_and = gen_andv8hi3;
48748 gen_pack = gen_sse2_packuswb;
48749 gen_shift = gen_lshrv8hi3;
48750 break;
48751 case V16HImode:
48752 if (!TARGET_AVX2)
48753 return false;
48754 c = 0xffff;
48755 s = 16;
48756 half_mode = V8SImode;
48757 gen_and = gen_andv8si3;
48758 gen_pack = gen_avx2_packusdw;
48759 gen_shift = gen_lshrv8si3;
48760 end_perm = true;
48761 break;
48762 case V32QImode:
48763 if (!TARGET_AVX2)
48764 return false;
48765 c = 0xff;
48766 s = 8;
48767 half_mode = V16HImode;
48768 gen_and = gen_andv16hi3;
48769 gen_pack = gen_avx2_packuswb;
48770 gen_shift = gen_lshrv16hi3;
48771 end_perm = true;
48772 break;
48773 default:
48774 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
48775 general shuffles. */
48776 return false;
48779 /* Check that permutation is even or odd. */
48780 odd = d->perm[0];
48781 if (odd > 1)
48782 return false;
48784 for (i = 1; i < nelt; ++i)
48785 if (d->perm[i] != 2 * i + odd)
48786 return false;
48788 if (d->testing_p)
48789 return true;
48791 dop0 = gen_reg_rtx (half_mode);
48792 dop1 = gen_reg_rtx (half_mode);
48793 if (odd == 0)
48795 for (i = 0; i < nelt / 2; i++)
48796 rperm[i] = GEN_INT (c);
48797 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
48798 t = force_reg (half_mode, t);
48799 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
48800 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
48802 else
48804 emit_insn (gen_shift (dop0,
48805 gen_lowpart (half_mode, d->op0),
48806 GEN_INT (s)));
48807 emit_insn (gen_shift (dop1,
48808 gen_lowpart (half_mode, d->op1),
48809 GEN_INT (s)));
48811 /* In AVX2 for 256 bit case we need to permute pack result. */
48812 if (TARGET_AVX2 && end_perm)
48814 op = gen_reg_rtx (d->vmode);
48815 t = gen_reg_rtx (V4DImode);
48816 emit_insn (gen_pack (op, dop0, dop1));
48817 emit_insn (gen_avx2_permv4di_1 (t,
48818 gen_lowpart (V4DImode, op),
48819 const0_rtx,
48820 const2_rtx,
48821 const1_rtx,
48822 GEN_INT (3)));
48823 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
48825 else
48826 emit_insn (gen_pack (d->target, dop0, dop1));
48828 return true;
48831 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
48832 and extract-odd permutations. */
48834 static bool
48835 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
48837 rtx t1, t2, t3, t4, t5;
48839 switch (d->vmode)
48841 case V4DFmode:
48842 if (d->testing_p)
48843 break;
48844 t1 = gen_reg_rtx (V4DFmode);
48845 t2 = gen_reg_rtx (V4DFmode);
48847 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48848 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
48849 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
48851 /* Now an unpck[lh]pd will produce the result required. */
48852 if (odd)
48853 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
48854 else
48855 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
48856 emit_insn (t3);
48857 break;
48859 case V8SFmode:
48861 int mask = odd ? 0xdd : 0x88;
48863 if (d->testing_p)
48864 break;
48865 t1 = gen_reg_rtx (V8SFmode);
48866 t2 = gen_reg_rtx (V8SFmode);
48867 t3 = gen_reg_rtx (V8SFmode);
48869 /* Shuffle within the 128-bit lanes to produce:
48870 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
48871 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
48872 GEN_INT (mask)));
48874 /* Shuffle the lanes around to produce:
48875 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
48876 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
48877 GEN_INT (0x3)));
48879 /* Shuffle within the 128-bit lanes to produce:
48880 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
48881 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
48883 /* Shuffle within the 128-bit lanes to produce:
48884 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
48885 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
48887 /* Shuffle the lanes around to produce:
48888 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
48889 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
48890 GEN_INT (0x20)));
48892 break;
48894 case V2DFmode:
48895 case V4SFmode:
48896 case V2DImode:
48897 case V4SImode:
48898 /* These are always directly implementable by expand_vec_perm_1. */
48899 gcc_unreachable ();
48901 case V8HImode:
48902 if (TARGET_SSE4_1)
48903 return expand_vec_perm_even_odd_pack (d);
48904 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
48905 return expand_vec_perm_pshufb2 (d);
48906 else
48908 if (d->testing_p)
48909 break;
48910 /* We need 2*log2(N)-1 operations to achieve odd/even
48911 with interleave. */
48912 t1 = gen_reg_rtx (V8HImode);
48913 t2 = gen_reg_rtx (V8HImode);
48914 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
48915 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
48916 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
48917 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
48918 if (odd)
48919 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
48920 else
48921 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
48922 emit_insn (t3);
48924 break;
48926 case V16QImode:
48927 return expand_vec_perm_even_odd_pack (d);
48929 case V16HImode:
48930 case V32QImode:
48931 return expand_vec_perm_even_odd_pack (d);
48933 case V4DImode:
48934 if (!TARGET_AVX2)
48936 struct expand_vec_perm_d d_copy = *d;
48937 d_copy.vmode = V4DFmode;
48938 if (d->testing_p)
48939 d_copy.target = gen_lowpart (V4DFmode, d->target);
48940 else
48941 d_copy.target = gen_reg_rtx (V4DFmode);
48942 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
48943 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
48944 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48946 if (!d->testing_p)
48947 emit_move_insn (d->target,
48948 gen_lowpart (V4DImode, d_copy.target));
48949 return true;
48951 return false;
48954 if (d->testing_p)
48955 break;
48957 t1 = gen_reg_rtx (V4DImode);
48958 t2 = gen_reg_rtx (V4DImode);
48960 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48961 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
48962 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
48964 /* Now an vpunpck[lh]qdq will produce the result required. */
48965 if (odd)
48966 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
48967 else
48968 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
48969 emit_insn (t3);
48970 break;
48972 case V8SImode:
48973 if (!TARGET_AVX2)
48975 struct expand_vec_perm_d d_copy = *d;
48976 d_copy.vmode = V8SFmode;
48977 if (d->testing_p)
48978 d_copy.target = gen_lowpart (V8SFmode, d->target);
48979 else
48980 d_copy.target = gen_reg_rtx (V8SFmode);
48981 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
48982 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
48983 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48985 if (!d->testing_p)
48986 emit_move_insn (d->target,
48987 gen_lowpart (V8SImode, d_copy.target));
48988 return true;
48990 return false;
48993 if (d->testing_p)
48994 break;
48996 t1 = gen_reg_rtx (V8SImode);
48997 t2 = gen_reg_rtx (V8SImode);
48998 t3 = gen_reg_rtx (V4DImode);
48999 t4 = gen_reg_rtx (V4DImode);
49000 t5 = gen_reg_rtx (V4DImode);
49002 /* Shuffle the lanes around into
49003 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
49004 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
49005 gen_lowpart (V4DImode, d->op1),
49006 GEN_INT (0x20)));
49007 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
49008 gen_lowpart (V4DImode, d->op1),
49009 GEN_INT (0x31)));
49011 /* Swap the 2nd and 3rd position in each lane into
49012 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
49013 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
49014 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
49015 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
49016 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
49018 /* Now an vpunpck[lh]qdq will produce
49019 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
49020 if (odd)
49021 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
49022 gen_lowpart (V4DImode, t2));
49023 else
49024 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
49025 gen_lowpart (V4DImode, t2));
49026 emit_insn (t3);
49027 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
49028 break;
49030 default:
49031 gcc_unreachable ();
49034 return true;
49037 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
49038 extract-even and extract-odd permutations. */
49040 static bool
49041 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
49043 unsigned i, odd, nelt = d->nelt;
49045 odd = d->perm[0];
49046 if (odd != 0 && odd != 1)
49047 return false;
49049 for (i = 1; i < nelt; ++i)
49050 if (d->perm[i] != 2 * i + odd)
49051 return false;
49053 return expand_vec_perm_even_odd_1 (d, odd);
49056 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
49057 permutations. We assume that expand_vec_perm_1 has already failed. */
49059 static bool
49060 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
49062 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
49063 machine_mode vmode = d->vmode;
49064 unsigned char perm2[4];
49065 rtx op0 = d->op0, dest;
49066 bool ok;
49068 switch (vmode)
49070 case V4DFmode:
49071 case V8SFmode:
49072 /* These are special-cased in sse.md so that we can optionally
49073 use the vbroadcast instruction. They expand to two insns
49074 if the input happens to be in a register. */
49075 gcc_unreachable ();
49077 case V2DFmode:
49078 case V2DImode:
49079 case V4SFmode:
49080 case V4SImode:
49081 /* These are always implementable using standard shuffle patterns. */
49082 gcc_unreachable ();
49084 case V8HImode:
49085 case V16QImode:
49086 /* These can be implemented via interleave. We save one insn by
49087 stopping once we have promoted to V4SImode and then use pshufd. */
49088 if (d->testing_p)
49089 return true;
49092 rtx dest;
49093 rtx (*gen) (rtx, rtx, rtx)
49094 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
49095 : gen_vec_interleave_lowv8hi;
49097 if (elt >= nelt2)
49099 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
49100 : gen_vec_interleave_highv8hi;
49101 elt -= nelt2;
49103 nelt2 /= 2;
49105 dest = gen_reg_rtx (vmode);
49106 emit_insn (gen (dest, op0, op0));
49107 vmode = get_mode_wider_vector (vmode);
49108 op0 = gen_lowpart (vmode, dest);
49110 while (vmode != V4SImode);
49112 memset (perm2, elt, 4);
49113 dest = gen_reg_rtx (V4SImode);
49114 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
49115 gcc_assert (ok);
49116 if (!d->testing_p)
49117 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
49118 return true;
49120 case V64QImode:
49121 case V32QImode:
49122 case V16HImode:
49123 case V8SImode:
49124 case V4DImode:
49125 /* For AVX2 broadcasts of the first element vpbroadcast* or
49126 vpermq should be used by expand_vec_perm_1. */
49127 gcc_assert (!TARGET_AVX2 || d->perm[0]);
49128 return false;
49130 default:
49131 gcc_unreachable ();
49135 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
49136 broadcast permutations. */
49138 static bool
49139 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
49141 unsigned i, elt, nelt = d->nelt;
49143 if (!d->one_operand_p)
49144 return false;
49146 elt = d->perm[0];
49147 for (i = 1; i < nelt; ++i)
49148 if (d->perm[i] != elt)
49149 return false;
49151 return expand_vec_perm_broadcast_1 (d);
49154 /* Implement arbitrary permutations of two V64QImode operands
49155 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
49156 static bool
49157 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
49159 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
49160 return false;
49162 if (d->testing_p)
49163 return true;
49165 struct expand_vec_perm_d ds[2];
49166 rtx rperm[128], vperm, target0, target1;
49167 unsigned int i, nelt;
49168 machine_mode vmode;
49170 nelt = d->nelt;
49171 vmode = V64QImode;
49173 for (i = 0; i < 2; i++)
49175 ds[i] = *d;
49176 ds[i].vmode = V32HImode;
49177 ds[i].nelt = 32;
49178 ds[i].target = gen_reg_rtx (V32HImode);
49179 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
49180 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
49183 /* Prepare permutations such that the first one takes care of
49184 putting the even bytes into the right positions or one higher
49185 positions (ds[0]) and the second one takes care of
49186 putting the odd bytes into the right positions or one below
49187 (ds[1]). */
49189 for (i = 0; i < nelt; i++)
49191 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
49192 if (i & 1)
49194 rperm[i] = constm1_rtx;
49195 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49197 else
49199 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49200 rperm[i + 64] = constm1_rtx;
49204 bool ok = expand_vec_perm_1 (&ds[0]);
49205 gcc_assert (ok);
49206 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
49208 ok = expand_vec_perm_1 (&ds[1]);
49209 gcc_assert (ok);
49210 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
49212 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
49213 vperm = force_reg (vmode, vperm);
49214 target0 = gen_reg_rtx (V64QImode);
49215 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
49217 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
49218 vperm = force_reg (vmode, vperm);
49219 target1 = gen_reg_rtx (V64QImode);
49220 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
49222 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
49223 return true;
49226 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
49227 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
49228 all the shorter instruction sequences. */
49230 static bool
49231 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
49233 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
49234 unsigned int i, nelt, eltsz;
49235 bool used[4];
49237 if (!TARGET_AVX2
49238 || d->one_operand_p
49239 || (d->vmode != V32QImode && d->vmode != V16HImode))
49240 return false;
49242 if (d->testing_p)
49243 return true;
49245 nelt = d->nelt;
49246 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
49248 /* Generate 4 permutation masks. If the required element is within
49249 the same lane, it is shuffled in. If the required element from the
49250 other lane, force a zero by setting bit 7 in the permutation mask.
49251 In the other mask the mask has non-negative elements if element
49252 is requested from the other lane, but also moved to the other lane,
49253 so that the result of vpshufb can have the two V2TImode halves
49254 swapped. */
49255 m128 = GEN_INT (-128);
49256 for (i = 0; i < 32; ++i)
49258 rperm[0][i] = m128;
49259 rperm[1][i] = m128;
49260 rperm[2][i] = m128;
49261 rperm[3][i] = m128;
49263 used[0] = false;
49264 used[1] = false;
49265 used[2] = false;
49266 used[3] = false;
49267 for (i = 0; i < nelt; ++i)
49269 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
49270 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
49271 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
49273 for (j = 0; j < eltsz; ++j)
49274 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
49275 used[which] = true;
49278 for (i = 0; i < 2; ++i)
49280 if (!used[2 * i + 1])
49282 h[i] = NULL_RTX;
49283 continue;
49285 vperm = gen_rtx_CONST_VECTOR (V32QImode,
49286 gen_rtvec_v (32, rperm[2 * i + 1]));
49287 vperm = force_reg (V32QImode, vperm);
49288 h[i] = gen_reg_rtx (V32QImode);
49289 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49290 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
49293 /* Swap the 128-byte lanes of h[X]. */
49294 for (i = 0; i < 2; ++i)
49296 if (h[i] == NULL_RTX)
49297 continue;
49298 op = gen_reg_rtx (V4DImode);
49299 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
49300 const2_rtx, GEN_INT (3), const0_rtx,
49301 const1_rtx));
49302 h[i] = gen_lowpart (V32QImode, op);
49305 for (i = 0; i < 2; ++i)
49307 if (!used[2 * i])
49309 l[i] = NULL_RTX;
49310 continue;
49312 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
49313 vperm = force_reg (V32QImode, vperm);
49314 l[i] = gen_reg_rtx (V32QImode);
49315 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49316 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
49319 for (i = 0; i < 2; ++i)
49321 if (h[i] && l[i])
49323 op = gen_reg_rtx (V32QImode);
49324 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
49325 l[i] = op;
49327 else if (h[i])
49328 l[i] = h[i];
49331 gcc_assert (l[0] && l[1]);
49332 op = d->target;
49333 if (d->vmode != V32QImode)
49334 op = gen_reg_rtx (V32QImode);
49335 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
49336 if (op != d->target)
49337 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
49338 return true;
49341 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
49342 With all of the interface bits taken care of, perform the expansion
49343 in D and return true on success. */
49345 static bool
49346 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
49348 /* Try a single instruction expansion. */
49349 if (expand_vec_perm_1 (d))
49350 return true;
49352 /* Try sequences of two instructions. */
49354 if (expand_vec_perm_pshuflw_pshufhw (d))
49355 return true;
49357 if (expand_vec_perm_palignr (d, false))
49358 return true;
49360 if (expand_vec_perm_interleave2 (d))
49361 return true;
49363 if (expand_vec_perm_broadcast (d))
49364 return true;
49366 if (expand_vec_perm_vpermq_perm_1 (d))
49367 return true;
49369 if (expand_vec_perm_vperm2f128 (d))
49370 return true;
49372 if (expand_vec_perm_pblendv (d))
49373 return true;
49375 /* Try sequences of three instructions. */
49377 if (expand_vec_perm_even_odd_pack (d))
49378 return true;
49380 if (expand_vec_perm_2vperm2f128_vshuf (d))
49381 return true;
49383 if (expand_vec_perm_pshufb2 (d))
49384 return true;
49386 if (expand_vec_perm_interleave3 (d))
49387 return true;
49389 if (expand_vec_perm_vperm2f128_vblend (d))
49390 return true;
49392 /* Try sequences of four instructions. */
49394 if (expand_vec_perm_vpshufb2_vpermq (d))
49395 return true;
49397 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
49398 return true;
49400 if (expand_vec_perm_vpermi2_vpshub2 (d))
49401 return true;
49403 /* ??? Look for narrow permutations whose element orderings would
49404 allow the promotion to a wider mode. */
49406 /* ??? Look for sequences of interleave or a wider permute that place
49407 the data into the correct lanes for a half-vector shuffle like
49408 pshuf[lh]w or vpermilps. */
49410 /* ??? Look for sequences of interleave that produce the desired results.
49411 The combinatorics of punpck[lh] get pretty ugly... */
49413 if (expand_vec_perm_even_odd (d))
49414 return true;
49416 /* Even longer sequences. */
49417 if (expand_vec_perm_vpshufb4_vpermq2 (d))
49418 return true;
49420 return false;
49423 /* If a permutation only uses one operand, make it clear. Returns true
49424 if the permutation references both operands. */
49426 static bool
49427 canonicalize_perm (struct expand_vec_perm_d *d)
49429 int i, which, nelt = d->nelt;
49431 for (i = which = 0; i < nelt; ++i)
49432 which |= (d->perm[i] < nelt ? 1 : 2);
49434 d->one_operand_p = true;
49435 switch (which)
49437 default:
49438 gcc_unreachable();
49440 case 3:
49441 if (!rtx_equal_p (d->op0, d->op1))
49443 d->one_operand_p = false;
49444 break;
49446 /* The elements of PERM do not suggest that only the first operand
49447 is used, but both operands are identical. Allow easier matching
49448 of the permutation by folding the permutation into the single
49449 input vector. */
49450 /* FALLTHRU */
49452 case 2:
49453 for (i = 0; i < nelt; ++i)
49454 d->perm[i] &= nelt - 1;
49455 d->op0 = d->op1;
49456 break;
49458 case 1:
49459 d->op1 = d->op0;
49460 break;
49463 return (which == 3);
49466 bool
49467 ix86_expand_vec_perm_const (rtx operands[4])
49469 struct expand_vec_perm_d d;
49470 unsigned char perm[MAX_VECT_LEN];
49471 int i, nelt;
49472 bool two_args;
49473 rtx sel;
49475 d.target = operands[0];
49476 d.op0 = operands[1];
49477 d.op1 = operands[2];
49478 sel = operands[3];
49480 d.vmode = GET_MODE (d.target);
49481 gcc_assert (VECTOR_MODE_P (d.vmode));
49482 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49483 d.testing_p = false;
49485 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
49486 gcc_assert (XVECLEN (sel, 0) == nelt);
49487 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
49489 for (i = 0; i < nelt; ++i)
49491 rtx e = XVECEXP (sel, 0, i);
49492 int ei = INTVAL (e) & (2 * nelt - 1);
49493 d.perm[i] = ei;
49494 perm[i] = ei;
49497 two_args = canonicalize_perm (&d);
49499 if (ix86_expand_vec_perm_const_1 (&d))
49500 return true;
49502 /* If the selector says both arguments are needed, but the operands are the
49503 same, the above tried to expand with one_operand_p and flattened selector.
49504 If that didn't work, retry without one_operand_p; we succeeded with that
49505 during testing. */
49506 if (two_args && d.one_operand_p)
49508 d.one_operand_p = false;
49509 memcpy (d.perm, perm, sizeof (perm));
49510 return ix86_expand_vec_perm_const_1 (&d);
49513 return false;
49516 /* Implement targetm.vectorize.vec_perm_const_ok. */
49518 static bool
49519 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
49520 const unsigned char *sel)
49522 struct expand_vec_perm_d d;
49523 unsigned int i, nelt, which;
49524 bool ret;
49526 d.vmode = vmode;
49527 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49528 d.testing_p = true;
49530 /* Given sufficient ISA support we can just return true here
49531 for selected vector modes. */
49532 switch (d.vmode)
49534 case V16SFmode:
49535 case V16SImode:
49536 case V8DImode:
49537 case V8DFmode:
49538 if (TARGET_AVX512F)
49539 /* All implementable with a single vpermi2 insn. */
49540 return true;
49541 break;
49542 case V32HImode:
49543 if (TARGET_AVX512BW)
49544 /* All implementable with a single vpermi2 insn. */
49545 return true;
49546 break;
49547 case V64QImode:
49548 if (TARGET_AVX512BW)
49549 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
49550 return true;
49551 break;
49552 case V8SImode:
49553 case V8SFmode:
49554 case V4DFmode:
49555 case V4DImode:
49556 if (TARGET_AVX512VL)
49557 /* All implementable with a single vpermi2 insn. */
49558 return true;
49559 break;
49560 case V16HImode:
49561 if (TARGET_AVX2)
49562 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49563 return true;
49564 break;
49565 case V32QImode:
49566 if (TARGET_AVX2)
49567 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49568 return true;
49569 break;
49570 case V4SImode:
49571 case V4SFmode:
49572 case V8HImode:
49573 case V16QImode:
49574 /* All implementable with a single vpperm insn. */
49575 if (TARGET_XOP)
49576 return true;
49577 /* All implementable with 2 pshufb + 1 ior. */
49578 if (TARGET_SSSE3)
49579 return true;
49580 break;
49581 case V2DImode:
49582 case V2DFmode:
49583 /* All implementable with shufpd or unpck[lh]pd. */
49584 return true;
49585 default:
49586 return false;
49589 /* Extract the values from the vector CST into the permutation
49590 array in D. */
49591 memcpy (d.perm, sel, nelt);
49592 for (i = which = 0; i < nelt; ++i)
49594 unsigned char e = d.perm[i];
49595 gcc_assert (e < 2 * nelt);
49596 which |= (e < nelt ? 1 : 2);
49599 /* For all elements from second vector, fold the elements to first. */
49600 if (which == 2)
49601 for (i = 0; i < nelt; ++i)
49602 d.perm[i] -= nelt;
49604 /* Check whether the mask can be applied to the vector type. */
49605 d.one_operand_p = (which != 3);
49607 /* Implementable with shufps or pshufd. */
49608 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49609 return true;
49611 /* Otherwise we have to go through the motions and see if we can
49612 figure out how to generate the requested permutation. */
49613 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49614 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49615 if (!d.one_operand_p)
49616 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49618 start_sequence ();
49619 ret = ix86_expand_vec_perm_const_1 (&d);
49620 end_sequence ();
49622 return ret;
49625 void
49626 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49628 struct expand_vec_perm_d d;
49629 unsigned i, nelt;
49631 d.target = targ;
49632 d.op0 = op0;
49633 d.op1 = op1;
49634 d.vmode = GET_MODE (targ);
49635 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49636 d.one_operand_p = false;
49637 d.testing_p = false;
49639 for (i = 0; i < nelt; ++i)
49640 d.perm[i] = i * 2 + odd;
49642 /* We'll either be able to implement the permutation directly... */
49643 if (expand_vec_perm_1 (&d))
49644 return;
49646 /* ... or we use the special-case patterns. */
49647 expand_vec_perm_even_odd_1 (&d, odd);
49650 static void
49651 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49653 struct expand_vec_perm_d d;
49654 unsigned i, nelt, base;
49655 bool ok;
49657 d.target = targ;
49658 d.op0 = op0;
49659 d.op1 = op1;
49660 d.vmode = GET_MODE (targ);
49661 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49662 d.one_operand_p = false;
49663 d.testing_p = false;
49665 base = high_p ? nelt / 2 : 0;
49666 for (i = 0; i < nelt / 2; ++i)
49668 d.perm[i * 2] = i + base;
49669 d.perm[i * 2 + 1] = i + base + nelt;
49672 /* Note that for AVX this isn't one instruction. */
49673 ok = ix86_expand_vec_perm_const_1 (&d);
49674 gcc_assert (ok);
49678 /* Expand a vector operation CODE for a V*QImode in terms of the
49679 same operation on V*HImode. */
49681 void
49682 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49684 machine_mode qimode = GET_MODE (dest);
49685 machine_mode himode;
49686 rtx (*gen_il) (rtx, rtx, rtx);
49687 rtx (*gen_ih) (rtx, rtx, rtx);
49688 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49689 struct expand_vec_perm_d d;
49690 bool ok, full_interleave;
49691 bool uns_p = false;
49692 int i;
49694 switch (qimode)
49696 case V16QImode:
49697 himode = V8HImode;
49698 gen_il = gen_vec_interleave_lowv16qi;
49699 gen_ih = gen_vec_interleave_highv16qi;
49700 break;
49701 case V32QImode:
49702 himode = V16HImode;
49703 gen_il = gen_avx2_interleave_lowv32qi;
49704 gen_ih = gen_avx2_interleave_highv32qi;
49705 break;
49706 case V64QImode:
49707 himode = V32HImode;
49708 gen_il = gen_avx512bw_interleave_lowv64qi;
49709 gen_ih = gen_avx512bw_interleave_highv64qi;
49710 break;
49711 default:
49712 gcc_unreachable ();
49715 op2_l = op2_h = op2;
49716 switch (code)
49718 case MULT:
49719 /* Unpack data such that we've got a source byte in each low byte of
49720 each word. We don't care what goes into the high byte of each word.
49721 Rather than trying to get zero in there, most convenient is to let
49722 it be a copy of the low byte. */
49723 op2_l = gen_reg_rtx (qimode);
49724 op2_h = gen_reg_rtx (qimode);
49725 emit_insn (gen_il (op2_l, op2, op2));
49726 emit_insn (gen_ih (op2_h, op2, op2));
49727 /* FALLTHRU */
49729 op1_l = gen_reg_rtx (qimode);
49730 op1_h = gen_reg_rtx (qimode);
49731 emit_insn (gen_il (op1_l, op1, op1));
49732 emit_insn (gen_ih (op1_h, op1, op1));
49733 full_interleave = qimode == V16QImode;
49734 break;
49736 case ASHIFT:
49737 case LSHIFTRT:
49738 uns_p = true;
49739 /* FALLTHRU */
49740 case ASHIFTRT:
49741 op1_l = gen_reg_rtx (himode);
49742 op1_h = gen_reg_rtx (himode);
49743 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
49744 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
49745 full_interleave = true;
49746 break;
49747 default:
49748 gcc_unreachable ();
49751 /* Perform the operation. */
49752 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
49753 1, OPTAB_DIRECT);
49754 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
49755 1, OPTAB_DIRECT);
49756 gcc_assert (res_l && res_h);
49758 /* Merge the data back into the right place. */
49759 d.target = dest;
49760 d.op0 = gen_lowpart (qimode, res_l);
49761 d.op1 = gen_lowpart (qimode, res_h);
49762 d.vmode = qimode;
49763 d.nelt = GET_MODE_NUNITS (qimode);
49764 d.one_operand_p = false;
49765 d.testing_p = false;
49767 if (full_interleave)
49769 /* For SSE2, we used an full interleave, so the desired
49770 results are in the even elements. */
49771 for (i = 0; i < 64; ++i)
49772 d.perm[i] = i * 2;
49774 else
49776 /* For AVX, the interleave used above was not cross-lane. So the
49777 extraction is evens but with the second and third quarter swapped.
49778 Happily, that is even one insn shorter than even extraction. */
49779 for (i = 0; i < 64; ++i)
49780 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
49783 ok = ix86_expand_vec_perm_const_1 (&d);
49784 gcc_assert (ok);
49786 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49787 gen_rtx_fmt_ee (code, qimode, op1, op2));
49790 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
49791 if op is CONST_VECTOR with all odd elements equal to their
49792 preceding element. */
49794 static bool
49795 const_vector_equal_evenodd_p (rtx op)
49797 machine_mode mode = GET_MODE (op);
49798 int i, nunits = GET_MODE_NUNITS (mode);
49799 if (GET_CODE (op) != CONST_VECTOR
49800 || nunits != CONST_VECTOR_NUNITS (op))
49801 return false;
49802 for (i = 0; i < nunits; i += 2)
49803 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
49804 return false;
49805 return true;
49808 void
49809 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
49810 bool uns_p, bool odd_p)
49812 machine_mode mode = GET_MODE (op1);
49813 machine_mode wmode = GET_MODE (dest);
49814 rtx x;
49815 rtx orig_op1 = op1, orig_op2 = op2;
49817 if (!nonimmediate_operand (op1, mode))
49818 op1 = force_reg (mode, op1);
49819 if (!nonimmediate_operand (op2, mode))
49820 op2 = force_reg (mode, op2);
49822 /* We only play even/odd games with vectors of SImode. */
49823 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
49825 /* If we're looking for the odd results, shift those members down to
49826 the even slots. For some cpus this is faster than a PSHUFD. */
49827 if (odd_p)
49829 /* For XOP use vpmacsdqh, but only for smult, as it is only
49830 signed. */
49831 if (TARGET_XOP && mode == V4SImode && !uns_p)
49833 x = force_reg (wmode, CONST0_RTX (wmode));
49834 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
49835 return;
49838 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
49839 if (!const_vector_equal_evenodd_p (orig_op1))
49840 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
49841 x, NULL, 1, OPTAB_DIRECT);
49842 if (!const_vector_equal_evenodd_p (orig_op2))
49843 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
49844 x, NULL, 1, OPTAB_DIRECT);
49845 op1 = gen_lowpart (mode, op1);
49846 op2 = gen_lowpart (mode, op2);
49849 if (mode == V16SImode)
49851 if (uns_p)
49852 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
49853 else
49854 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
49856 else if (mode == V8SImode)
49858 if (uns_p)
49859 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
49860 else
49861 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
49863 else if (uns_p)
49864 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
49865 else if (TARGET_SSE4_1)
49866 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
49867 else
49869 rtx s1, s2, t0, t1, t2;
49871 /* The easiest way to implement this without PMULDQ is to go through
49872 the motions as if we are performing a full 64-bit multiply. With
49873 the exception that we need to do less shuffling of the elements. */
49875 /* Compute the sign-extension, aka highparts, of the two operands. */
49876 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49877 op1, pc_rtx, pc_rtx);
49878 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49879 op2, pc_rtx, pc_rtx);
49881 /* Multiply LO(A) * HI(B), and vice-versa. */
49882 t1 = gen_reg_rtx (wmode);
49883 t2 = gen_reg_rtx (wmode);
49884 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
49885 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
49887 /* Multiply LO(A) * LO(B). */
49888 t0 = gen_reg_rtx (wmode);
49889 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
49891 /* Combine and shift the highparts into place. */
49892 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
49893 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
49894 1, OPTAB_DIRECT);
49896 /* Combine high and low parts. */
49897 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
49898 return;
49900 emit_insn (x);
49903 void
49904 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
49905 bool uns_p, bool high_p)
49907 machine_mode wmode = GET_MODE (dest);
49908 machine_mode mode = GET_MODE (op1);
49909 rtx t1, t2, t3, t4, mask;
49911 switch (mode)
49913 case V4SImode:
49914 t1 = gen_reg_rtx (mode);
49915 t2 = gen_reg_rtx (mode);
49916 if (TARGET_XOP && !uns_p)
49918 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
49919 shuffle the elements once so that all elements are in the right
49920 place for immediate use: { A C B D }. */
49921 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
49922 const1_rtx, GEN_INT (3)));
49923 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
49924 const1_rtx, GEN_INT (3)));
49926 else
49928 /* Put the elements into place for the multiply. */
49929 ix86_expand_vec_interleave (t1, op1, op1, high_p);
49930 ix86_expand_vec_interleave (t2, op2, op2, high_p);
49931 high_p = false;
49933 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
49934 break;
49936 case V8SImode:
49937 /* Shuffle the elements between the lanes. After this we
49938 have { A B E F | C D G H } for each operand. */
49939 t1 = gen_reg_rtx (V4DImode);
49940 t2 = gen_reg_rtx (V4DImode);
49941 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
49942 const0_rtx, const2_rtx,
49943 const1_rtx, GEN_INT (3)));
49944 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
49945 const0_rtx, const2_rtx,
49946 const1_rtx, GEN_INT (3)));
49948 /* Shuffle the elements within the lanes. After this we
49949 have { A A B B | C C D D } or { E E F F | G G H H }. */
49950 t3 = gen_reg_rtx (V8SImode);
49951 t4 = gen_reg_rtx (V8SImode);
49952 mask = GEN_INT (high_p
49953 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
49954 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
49955 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
49956 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
49958 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
49959 break;
49961 case V8HImode:
49962 case V16HImode:
49963 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
49964 uns_p, OPTAB_DIRECT);
49965 t2 = expand_binop (mode,
49966 uns_p ? umul_highpart_optab : smul_highpart_optab,
49967 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
49968 gcc_assert (t1 && t2);
49970 t3 = gen_reg_rtx (mode);
49971 ix86_expand_vec_interleave (t3, t1, t2, high_p);
49972 emit_move_insn (dest, gen_lowpart (wmode, t3));
49973 break;
49975 case V16QImode:
49976 case V32QImode:
49977 case V32HImode:
49978 case V16SImode:
49979 case V64QImode:
49980 t1 = gen_reg_rtx (wmode);
49981 t2 = gen_reg_rtx (wmode);
49982 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
49983 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
49985 emit_insn (gen_rtx_SET (dest, gen_rtx_MULT (wmode, t1, t2)));
49986 break;
49988 default:
49989 gcc_unreachable ();
49993 void
49994 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
49996 rtx res_1, res_2, res_3, res_4;
49998 res_1 = gen_reg_rtx (V4SImode);
49999 res_2 = gen_reg_rtx (V4SImode);
50000 res_3 = gen_reg_rtx (V2DImode);
50001 res_4 = gen_reg_rtx (V2DImode);
50002 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
50003 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
50005 /* Move the results in element 2 down to element 1; we don't care
50006 what goes in elements 2 and 3. Then we can merge the parts
50007 back together with an interleave.
50009 Note that two other sequences were tried:
50010 (1) Use interleaves at the start instead of psrldq, which allows
50011 us to use a single shufps to merge things back at the end.
50012 (2) Use shufps here to combine the two vectors, then pshufd to
50013 put the elements in the correct order.
50014 In both cases the cost of the reformatting stall was too high
50015 and the overall sequence slower. */
50017 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
50018 const0_rtx, const2_rtx,
50019 const0_rtx, const0_rtx));
50020 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
50021 const0_rtx, const2_rtx,
50022 const0_rtx, const0_rtx));
50023 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
50025 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
50028 void
50029 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
50031 machine_mode mode = GET_MODE (op0);
50032 rtx t1, t2, t3, t4, t5, t6;
50034 if (TARGET_AVX512DQ && mode == V8DImode)
50035 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
50036 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
50037 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
50038 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
50039 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
50040 else if (TARGET_XOP && mode == V2DImode)
50042 /* op1: A,B,C,D, op2: E,F,G,H */
50043 op1 = gen_lowpart (V4SImode, op1);
50044 op2 = gen_lowpart (V4SImode, op2);
50046 t1 = gen_reg_rtx (V4SImode);
50047 t2 = gen_reg_rtx (V4SImode);
50048 t3 = gen_reg_rtx (V2DImode);
50049 t4 = gen_reg_rtx (V2DImode);
50051 /* t1: B,A,D,C */
50052 emit_insn (gen_sse2_pshufd_1 (t1, op1,
50053 GEN_INT (1),
50054 GEN_INT (0),
50055 GEN_INT (3),
50056 GEN_INT (2)));
50058 /* t2: (B*E),(A*F),(D*G),(C*H) */
50059 emit_insn (gen_mulv4si3 (t2, t1, op2));
50061 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
50062 emit_insn (gen_xop_phadddq (t3, t2));
50064 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
50065 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
50067 /* Multiply lower parts and add all */
50068 t5 = gen_reg_rtx (V2DImode);
50069 emit_insn (gen_vec_widen_umult_even_v4si (t5,
50070 gen_lowpart (V4SImode, op1),
50071 gen_lowpart (V4SImode, op2)));
50072 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
50075 else
50077 machine_mode nmode;
50078 rtx (*umul) (rtx, rtx, rtx);
50080 if (mode == V2DImode)
50082 umul = gen_vec_widen_umult_even_v4si;
50083 nmode = V4SImode;
50085 else if (mode == V4DImode)
50087 umul = gen_vec_widen_umult_even_v8si;
50088 nmode = V8SImode;
50090 else if (mode == V8DImode)
50092 umul = gen_vec_widen_umult_even_v16si;
50093 nmode = V16SImode;
50095 else
50096 gcc_unreachable ();
50099 /* Multiply low parts. */
50100 t1 = gen_reg_rtx (mode);
50101 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
50103 /* Shift input vectors right 32 bits so we can multiply high parts. */
50104 t6 = GEN_INT (32);
50105 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
50106 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
50108 /* Multiply high parts by low parts. */
50109 t4 = gen_reg_rtx (mode);
50110 t5 = gen_reg_rtx (mode);
50111 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
50112 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
50114 /* Combine and shift the highparts back. */
50115 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
50116 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
50118 /* Combine high and low parts. */
50119 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
50122 set_unique_reg_note (get_last_insn (), REG_EQUAL,
50123 gen_rtx_MULT (mode, op1, op2));
50126 /* Return 1 if control tansfer instruction INSN
50127 should be encoded with bnd prefix.
50128 If insn is NULL then return 1 when control
50129 transfer instructions should be prefixed with
50130 bnd by default for current function. */
50132 bool
50133 ix86_bnd_prefixed_insn_p (rtx insn)
50135 /* For call insns check special flag. */
50136 if (insn && CALL_P (insn))
50138 rtx call = get_call_rtx_from (insn);
50139 if (call)
50140 return CALL_EXPR_WITH_BOUNDS_P (call);
50143 /* All other insns are prefixed only if function is instrumented. */
50144 return chkp_function_instrumented_p (current_function_decl);
50147 /* Calculate integer abs() using only SSE2 instructions. */
50149 void
50150 ix86_expand_sse2_abs (rtx target, rtx input)
50152 machine_mode mode = GET_MODE (target);
50153 rtx tmp0, tmp1, x;
50155 switch (mode)
50157 /* For 32-bit signed integer X, the best way to calculate the absolute
50158 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
50159 case V4SImode:
50160 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
50161 GEN_INT (GET_MODE_BITSIZE
50162 (GET_MODE_INNER (mode)) - 1),
50163 NULL, 0, OPTAB_DIRECT);
50164 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
50165 NULL, 0, OPTAB_DIRECT);
50166 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
50167 target, 0, OPTAB_DIRECT);
50168 break;
50170 /* For 16-bit signed integer X, the best way to calculate the absolute
50171 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
50172 case V8HImode:
50173 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50175 x = expand_simple_binop (mode, SMAX, tmp0, input,
50176 target, 0, OPTAB_DIRECT);
50177 break;
50179 /* For 8-bit signed integer X, the best way to calculate the absolute
50180 value of X is min ((unsigned char) X, (unsigned char) (-X)),
50181 as SSE2 provides the PMINUB insn. */
50182 case V16QImode:
50183 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50185 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
50186 target, 0, OPTAB_DIRECT);
50187 break;
50189 default:
50190 gcc_unreachable ();
50193 if (x != target)
50194 emit_move_insn (target, x);
50197 /* Expand an insert into a vector register through pinsr insn.
50198 Return true if successful. */
50200 bool
50201 ix86_expand_pinsr (rtx *operands)
50203 rtx dst = operands[0];
50204 rtx src = operands[3];
50206 unsigned int size = INTVAL (operands[1]);
50207 unsigned int pos = INTVAL (operands[2]);
50209 if (GET_CODE (dst) == SUBREG)
50211 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
50212 dst = SUBREG_REG (dst);
50215 if (GET_CODE (src) == SUBREG)
50216 src = SUBREG_REG (src);
50218 switch (GET_MODE (dst))
50220 case V16QImode:
50221 case V8HImode:
50222 case V4SImode:
50223 case V2DImode:
50225 machine_mode srcmode, dstmode;
50226 rtx (*pinsr)(rtx, rtx, rtx, rtx);
50228 srcmode = mode_for_size (size, MODE_INT, 0);
50230 switch (srcmode)
50232 case QImode:
50233 if (!TARGET_SSE4_1)
50234 return false;
50235 dstmode = V16QImode;
50236 pinsr = gen_sse4_1_pinsrb;
50237 break;
50239 case HImode:
50240 if (!TARGET_SSE2)
50241 return false;
50242 dstmode = V8HImode;
50243 pinsr = gen_sse2_pinsrw;
50244 break;
50246 case SImode:
50247 if (!TARGET_SSE4_1)
50248 return false;
50249 dstmode = V4SImode;
50250 pinsr = gen_sse4_1_pinsrd;
50251 break;
50253 case DImode:
50254 gcc_assert (TARGET_64BIT);
50255 if (!TARGET_SSE4_1)
50256 return false;
50257 dstmode = V2DImode;
50258 pinsr = gen_sse4_1_pinsrq;
50259 break;
50261 default:
50262 return false;
50265 rtx d = dst;
50266 if (GET_MODE (dst) != dstmode)
50267 d = gen_reg_rtx (dstmode);
50268 src = gen_lowpart (srcmode, src);
50270 pos /= size;
50272 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
50273 GEN_INT (1 << pos)));
50274 if (d != dst)
50275 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
50276 return true;
50279 default:
50280 return false;
50284 /* This function returns the calling abi specific va_list type node.
50285 It returns the FNDECL specific va_list type. */
50287 static tree
50288 ix86_fn_abi_va_list (tree fndecl)
50290 if (!TARGET_64BIT)
50291 return va_list_type_node;
50292 gcc_assert (fndecl != NULL_TREE);
50294 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
50295 return ms_va_list_type_node;
50296 else
50297 return sysv_va_list_type_node;
50300 /* Returns the canonical va_list type specified by TYPE. If there
50301 is no valid TYPE provided, it return NULL_TREE. */
50303 static tree
50304 ix86_canonical_va_list_type (tree type)
50306 tree wtype, htype;
50308 /* Resolve references and pointers to va_list type. */
50309 if (TREE_CODE (type) == MEM_REF)
50310 type = TREE_TYPE (type);
50311 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
50312 type = TREE_TYPE (type);
50313 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
50314 type = TREE_TYPE (type);
50316 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
50318 wtype = va_list_type_node;
50319 gcc_assert (wtype != NULL_TREE);
50320 htype = type;
50321 if (TREE_CODE (wtype) == ARRAY_TYPE)
50323 /* If va_list is an array type, the argument may have decayed
50324 to a pointer type, e.g. by being passed to another function.
50325 In that case, unwrap both types so that we can compare the
50326 underlying records. */
50327 if (TREE_CODE (htype) == ARRAY_TYPE
50328 || POINTER_TYPE_P (htype))
50330 wtype = TREE_TYPE (wtype);
50331 htype = TREE_TYPE (htype);
50334 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50335 return va_list_type_node;
50336 wtype = sysv_va_list_type_node;
50337 gcc_assert (wtype != NULL_TREE);
50338 htype = type;
50339 if (TREE_CODE (wtype) == ARRAY_TYPE)
50341 /* If va_list is an array type, the argument may have decayed
50342 to a pointer type, e.g. by being passed to another function.
50343 In that case, unwrap both types so that we can compare the
50344 underlying records. */
50345 if (TREE_CODE (htype) == ARRAY_TYPE
50346 || POINTER_TYPE_P (htype))
50348 wtype = TREE_TYPE (wtype);
50349 htype = TREE_TYPE (htype);
50352 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50353 return sysv_va_list_type_node;
50354 wtype = ms_va_list_type_node;
50355 gcc_assert (wtype != NULL_TREE);
50356 htype = type;
50357 if (TREE_CODE (wtype) == ARRAY_TYPE)
50359 /* If va_list is an array type, the argument may have decayed
50360 to a pointer type, e.g. by being passed to another function.
50361 In that case, unwrap both types so that we can compare the
50362 underlying records. */
50363 if (TREE_CODE (htype) == ARRAY_TYPE
50364 || POINTER_TYPE_P (htype))
50366 wtype = TREE_TYPE (wtype);
50367 htype = TREE_TYPE (htype);
50370 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50371 return ms_va_list_type_node;
50372 return NULL_TREE;
50374 return std_canonical_va_list_type (type);
50377 /* Iterate through the target-specific builtin types for va_list.
50378 IDX denotes the iterator, *PTREE is set to the result type of
50379 the va_list builtin, and *PNAME to its internal type.
50380 Returns zero if there is no element for this index, otherwise
50381 IDX should be increased upon the next call.
50382 Note, do not iterate a base builtin's name like __builtin_va_list.
50383 Used from c_common_nodes_and_builtins. */
50385 static int
50386 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
50388 if (TARGET_64BIT)
50390 switch (idx)
50392 default:
50393 break;
50395 case 0:
50396 *ptree = ms_va_list_type_node;
50397 *pname = "__builtin_ms_va_list";
50398 return 1;
50400 case 1:
50401 *ptree = sysv_va_list_type_node;
50402 *pname = "__builtin_sysv_va_list";
50403 return 1;
50407 return 0;
50410 #undef TARGET_SCHED_DISPATCH
50411 #define TARGET_SCHED_DISPATCH has_dispatch
50412 #undef TARGET_SCHED_DISPATCH_DO
50413 #define TARGET_SCHED_DISPATCH_DO do_dispatch
50414 #undef TARGET_SCHED_REASSOCIATION_WIDTH
50415 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50416 #undef TARGET_SCHED_REORDER
50417 #define TARGET_SCHED_REORDER ix86_sched_reorder
50418 #undef TARGET_SCHED_ADJUST_PRIORITY
50419 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50420 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50421 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50422 ix86_dependencies_evaluation_hook
50424 /* The size of the dispatch window is the total number of bytes of
50425 object code allowed in a window. */
50426 #define DISPATCH_WINDOW_SIZE 16
50428 /* Number of dispatch windows considered for scheduling. */
50429 #define MAX_DISPATCH_WINDOWS 3
50431 /* Maximum number of instructions in a window. */
50432 #define MAX_INSN 4
50434 /* Maximum number of immediate operands in a window. */
50435 #define MAX_IMM 4
50437 /* Maximum number of immediate bits allowed in a window. */
50438 #define MAX_IMM_SIZE 128
50440 /* Maximum number of 32 bit immediates allowed in a window. */
50441 #define MAX_IMM_32 4
50443 /* Maximum number of 64 bit immediates allowed in a window. */
50444 #define MAX_IMM_64 2
50446 /* Maximum total of loads or prefetches allowed in a window. */
50447 #define MAX_LOAD 2
50449 /* Maximum total of stores allowed in a window. */
50450 #define MAX_STORE 1
50452 #undef BIG
50453 #define BIG 100
50456 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
50457 enum dispatch_group {
50458 disp_no_group = 0,
50459 disp_load,
50460 disp_store,
50461 disp_load_store,
50462 disp_prefetch,
50463 disp_imm,
50464 disp_imm_32,
50465 disp_imm_64,
50466 disp_branch,
50467 disp_cmp,
50468 disp_jcc,
50469 disp_last
50472 /* Number of allowable groups in a dispatch window. It is an array
50473 indexed by dispatch_group enum. 100 is used as a big number,
50474 because the number of these kind of operations does not have any
50475 effect in dispatch window, but we need them for other reasons in
50476 the table. */
50477 static unsigned int num_allowable_groups[disp_last] = {
50478 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
50481 char group_name[disp_last + 1][16] = {
50482 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
50483 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
50484 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
50487 /* Instruction path. */
50488 enum insn_path {
50489 no_path = 0,
50490 path_single, /* Single micro op. */
50491 path_double, /* Double micro op. */
50492 path_multi, /* Instructions with more than 2 micro op.. */
50493 last_path
50496 /* sched_insn_info defines a window to the instructions scheduled in
50497 the basic block. It contains a pointer to the insn_info table and
50498 the instruction scheduled.
50500 Windows are allocated for each basic block and are linked
50501 together. */
50502 typedef struct sched_insn_info_s {
50503 rtx insn;
50504 enum dispatch_group group;
50505 enum insn_path path;
50506 int byte_len;
50507 int imm_bytes;
50508 } sched_insn_info;
50510 /* Linked list of dispatch windows. This is a two way list of
50511 dispatch windows of a basic block. It contains information about
50512 the number of uops in the window and the total number of
50513 instructions and of bytes in the object code for this dispatch
50514 window. */
50515 typedef struct dispatch_windows_s {
50516 int num_insn; /* Number of insn in the window. */
50517 int num_uops; /* Number of uops in the window. */
50518 int window_size; /* Number of bytes in the window. */
50519 int window_num; /* Window number between 0 or 1. */
50520 int num_imm; /* Number of immediates in an insn. */
50521 int num_imm_32; /* Number of 32 bit immediates in an insn. */
50522 int num_imm_64; /* Number of 64 bit immediates in an insn. */
50523 int imm_size; /* Total immediates in the window. */
50524 int num_loads; /* Total memory loads in the window. */
50525 int num_stores; /* Total memory stores in the window. */
50526 int violation; /* Violation exists in window. */
50527 sched_insn_info *window; /* Pointer to the window. */
50528 struct dispatch_windows_s *next;
50529 struct dispatch_windows_s *prev;
50530 } dispatch_windows;
50532 /* Immediate valuse used in an insn. */
50533 typedef struct imm_info_s
50535 int imm;
50536 int imm32;
50537 int imm64;
50538 } imm_info;
50540 static dispatch_windows *dispatch_window_list;
50541 static dispatch_windows *dispatch_window_list1;
50543 /* Get dispatch group of insn. */
50545 static enum dispatch_group
50546 get_mem_group (rtx_insn *insn)
50548 enum attr_memory memory;
50550 if (INSN_CODE (insn) < 0)
50551 return disp_no_group;
50552 memory = get_attr_memory (insn);
50553 if (memory == MEMORY_STORE)
50554 return disp_store;
50556 if (memory == MEMORY_LOAD)
50557 return disp_load;
50559 if (memory == MEMORY_BOTH)
50560 return disp_load_store;
50562 return disp_no_group;
50565 /* Return true if insn is a compare instruction. */
50567 static bool
50568 is_cmp (rtx_insn *insn)
50570 enum attr_type type;
50572 type = get_attr_type (insn);
50573 return (type == TYPE_TEST
50574 || type == TYPE_ICMP
50575 || type == TYPE_FCMP
50576 || GET_CODE (PATTERN (insn)) == COMPARE);
50579 /* Return true if a dispatch violation encountered. */
50581 static bool
50582 dispatch_violation (void)
50584 if (dispatch_window_list->next)
50585 return dispatch_window_list->next->violation;
50586 return dispatch_window_list->violation;
50589 /* Return true if insn is a branch instruction. */
50591 static bool
50592 is_branch (rtx_insn *insn)
50594 return (CALL_P (insn) || JUMP_P (insn));
50597 /* Return true if insn is a prefetch instruction. */
50599 static bool
50600 is_prefetch (rtx_insn *insn)
50602 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
50605 /* This function initializes a dispatch window and the list container holding a
50606 pointer to the window. */
50608 static void
50609 init_window (int window_num)
50611 int i;
50612 dispatch_windows *new_list;
50614 if (window_num == 0)
50615 new_list = dispatch_window_list;
50616 else
50617 new_list = dispatch_window_list1;
50619 new_list->num_insn = 0;
50620 new_list->num_uops = 0;
50621 new_list->window_size = 0;
50622 new_list->next = NULL;
50623 new_list->prev = NULL;
50624 new_list->window_num = window_num;
50625 new_list->num_imm = 0;
50626 new_list->num_imm_32 = 0;
50627 new_list->num_imm_64 = 0;
50628 new_list->imm_size = 0;
50629 new_list->num_loads = 0;
50630 new_list->num_stores = 0;
50631 new_list->violation = false;
50633 for (i = 0; i < MAX_INSN; i++)
50635 new_list->window[i].insn = NULL;
50636 new_list->window[i].group = disp_no_group;
50637 new_list->window[i].path = no_path;
50638 new_list->window[i].byte_len = 0;
50639 new_list->window[i].imm_bytes = 0;
50641 return;
50644 /* This function allocates and initializes a dispatch window and the
50645 list container holding a pointer to the window. */
50647 static dispatch_windows *
50648 allocate_window (void)
50650 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
50651 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
50653 return new_list;
50656 /* This routine initializes the dispatch scheduling information. It
50657 initiates building dispatch scheduler tables and constructs the
50658 first dispatch window. */
50660 static void
50661 init_dispatch_sched (void)
50663 /* Allocate a dispatch list and a window. */
50664 dispatch_window_list = allocate_window ();
50665 dispatch_window_list1 = allocate_window ();
50666 init_window (0);
50667 init_window (1);
50670 /* This function returns true if a branch is detected. End of a basic block
50671 does not have to be a branch, but here we assume only branches end a
50672 window. */
50674 static bool
50675 is_end_basic_block (enum dispatch_group group)
50677 return group == disp_branch;
50680 /* This function is called when the end of a window processing is reached. */
50682 static void
50683 process_end_window (void)
50685 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
50686 if (dispatch_window_list->next)
50688 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
50689 gcc_assert (dispatch_window_list->window_size
50690 + dispatch_window_list1->window_size <= 48);
50691 init_window (1);
50693 init_window (0);
50696 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
50697 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
50698 for 48 bytes of instructions. Note that these windows are not dispatch
50699 windows that their sizes are DISPATCH_WINDOW_SIZE. */
50701 static dispatch_windows *
50702 allocate_next_window (int window_num)
50704 if (window_num == 0)
50706 if (dispatch_window_list->next)
50707 init_window (1);
50708 init_window (0);
50709 return dispatch_window_list;
50712 dispatch_window_list->next = dispatch_window_list1;
50713 dispatch_window_list1->prev = dispatch_window_list;
50715 return dispatch_window_list1;
50718 /* Compute number of immediate operands of an instruction. */
50720 static void
50721 find_constant (rtx in_rtx, imm_info *imm_values)
50723 if (INSN_P (in_rtx))
50724 in_rtx = PATTERN (in_rtx);
50725 subrtx_iterator::array_type array;
50726 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
50727 if (const_rtx x = *iter)
50728 switch (GET_CODE (x))
50730 case CONST:
50731 case SYMBOL_REF:
50732 case CONST_INT:
50733 (imm_values->imm)++;
50734 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
50735 (imm_values->imm32)++;
50736 else
50737 (imm_values->imm64)++;
50738 break;
50740 case CONST_DOUBLE:
50741 case CONST_WIDE_INT:
50742 (imm_values->imm)++;
50743 (imm_values->imm64)++;
50744 break;
50746 case CODE_LABEL:
50747 if (LABEL_KIND (x) == LABEL_NORMAL)
50749 (imm_values->imm)++;
50750 (imm_values->imm32)++;
50752 break;
50754 default:
50755 break;
50759 /* Return total size of immediate operands of an instruction along with number
50760 of corresponding immediate-operands. It initializes its parameters to zero
50761 befor calling FIND_CONSTANT.
50762 INSN is the input instruction. IMM is the total of immediates.
50763 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
50764 bit immediates. */
50766 static int
50767 get_num_immediates (rtx_insn *insn, int *imm, int *imm32, int *imm64)
50769 imm_info imm_values = {0, 0, 0};
50771 find_constant (insn, &imm_values);
50772 *imm = imm_values.imm;
50773 *imm32 = imm_values.imm32;
50774 *imm64 = imm_values.imm64;
50775 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
50778 /* This function indicates if an operand of an instruction is an
50779 immediate. */
50781 static bool
50782 has_immediate (rtx_insn *insn)
50784 int num_imm_operand;
50785 int num_imm32_operand;
50786 int num_imm64_operand;
50788 if (insn)
50789 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50790 &num_imm64_operand);
50791 return false;
50794 /* Return single or double path for instructions. */
50796 static enum insn_path
50797 get_insn_path (rtx_insn *insn)
50799 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
50801 if ((int)path == 0)
50802 return path_single;
50804 if ((int)path == 1)
50805 return path_double;
50807 return path_multi;
50810 /* Return insn dispatch group. */
50812 static enum dispatch_group
50813 get_insn_group (rtx_insn *insn)
50815 enum dispatch_group group = get_mem_group (insn);
50816 if (group)
50817 return group;
50819 if (is_branch (insn))
50820 return disp_branch;
50822 if (is_cmp (insn))
50823 return disp_cmp;
50825 if (has_immediate (insn))
50826 return disp_imm;
50828 if (is_prefetch (insn))
50829 return disp_prefetch;
50831 return disp_no_group;
50834 /* Count number of GROUP restricted instructions in a dispatch
50835 window WINDOW_LIST. */
50837 static int
50838 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
50840 enum dispatch_group group = get_insn_group (insn);
50841 int imm_size;
50842 int num_imm_operand;
50843 int num_imm32_operand;
50844 int num_imm64_operand;
50846 if (group == disp_no_group)
50847 return 0;
50849 if (group == disp_imm)
50851 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50852 &num_imm64_operand);
50853 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
50854 || num_imm_operand + window_list->num_imm > MAX_IMM
50855 || (num_imm32_operand > 0
50856 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
50857 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
50858 || (num_imm64_operand > 0
50859 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
50860 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
50861 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
50862 && num_imm64_operand > 0
50863 && ((window_list->num_imm_64 > 0
50864 && window_list->num_insn >= 2)
50865 || window_list->num_insn >= 3)))
50866 return BIG;
50868 return 1;
50871 if ((group == disp_load_store
50872 && (window_list->num_loads >= MAX_LOAD
50873 || window_list->num_stores >= MAX_STORE))
50874 || ((group == disp_load
50875 || group == disp_prefetch)
50876 && window_list->num_loads >= MAX_LOAD)
50877 || (group == disp_store
50878 && window_list->num_stores >= MAX_STORE))
50879 return BIG;
50881 return 1;
50884 /* This function returns true if insn satisfies dispatch rules on the
50885 last window scheduled. */
50887 static bool
50888 fits_dispatch_window (rtx_insn *insn)
50890 dispatch_windows *window_list = dispatch_window_list;
50891 dispatch_windows *window_list_next = dispatch_window_list->next;
50892 unsigned int num_restrict;
50893 enum dispatch_group group = get_insn_group (insn);
50894 enum insn_path path = get_insn_path (insn);
50895 int sum;
50897 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
50898 instructions should be given the lowest priority in the
50899 scheduling process in Haifa scheduler to make sure they will be
50900 scheduled in the same dispatch window as the reference to them. */
50901 if (group == disp_jcc || group == disp_cmp)
50902 return false;
50904 /* Check nonrestricted. */
50905 if (group == disp_no_group || group == disp_branch)
50906 return true;
50908 /* Get last dispatch window. */
50909 if (window_list_next)
50910 window_list = window_list_next;
50912 if (window_list->window_num == 1)
50914 sum = window_list->prev->window_size + window_list->window_size;
50916 if (sum == 32
50917 || (min_insn_size (insn) + sum) >= 48)
50918 /* Window 1 is full. Go for next window. */
50919 return true;
50922 num_restrict = count_num_restricted (insn, window_list);
50924 if (num_restrict > num_allowable_groups[group])
50925 return false;
50927 /* See if it fits in the first window. */
50928 if (window_list->window_num == 0)
50930 /* The first widow should have only single and double path
50931 uops. */
50932 if (path == path_double
50933 && (window_list->num_uops + 2) > MAX_INSN)
50934 return false;
50935 else if (path != path_single)
50936 return false;
50938 return true;
50941 /* Add an instruction INSN with NUM_UOPS micro-operations to the
50942 dispatch window WINDOW_LIST. */
50944 static void
50945 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
50947 int byte_len = min_insn_size (insn);
50948 int num_insn = window_list->num_insn;
50949 int imm_size;
50950 sched_insn_info *window = window_list->window;
50951 enum dispatch_group group = get_insn_group (insn);
50952 enum insn_path path = get_insn_path (insn);
50953 int num_imm_operand;
50954 int num_imm32_operand;
50955 int num_imm64_operand;
50957 if (!window_list->violation && group != disp_cmp
50958 && !fits_dispatch_window (insn))
50959 window_list->violation = true;
50961 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50962 &num_imm64_operand);
50964 /* Initialize window with new instruction. */
50965 window[num_insn].insn = insn;
50966 window[num_insn].byte_len = byte_len;
50967 window[num_insn].group = group;
50968 window[num_insn].path = path;
50969 window[num_insn].imm_bytes = imm_size;
50971 window_list->window_size += byte_len;
50972 window_list->num_insn = num_insn + 1;
50973 window_list->num_uops = window_list->num_uops + num_uops;
50974 window_list->imm_size += imm_size;
50975 window_list->num_imm += num_imm_operand;
50976 window_list->num_imm_32 += num_imm32_operand;
50977 window_list->num_imm_64 += num_imm64_operand;
50979 if (group == disp_store)
50980 window_list->num_stores += 1;
50981 else if (group == disp_load
50982 || group == disp_prefetch)
50983 window_list->num_loads += 1;
50984 else if (group == disp_load_store)
50986 window_list->num_stores += 1;
50987 window_list->num_loads += 1;
50991 /* Adds a scheduled instruction, INSN, to the current dispatch window.
50992 If the total bytes of instructions or the number of instructions in
50993 the window exceed allowable, it allocates a new window. */
50995 static void
50996 add_to_dispatch_window (rtx_insn *insn)
50998 int byte_len;
50999 dispatch_windows *window_list;
51000 dispatch_windows *next_list;
51001 dispatch_windows *window0_list;
51002 enum insn_path path;
51003 enum dispatch_group insn_group;
51004 bool insn_fits;
51005 int num_insn;
51006 int num_uops;
51007 int window_num;
51008 int insn_num_uops;
51009 int sum;
51011 if (INSN_CODE (insn) < 0)
51012 return;
51014 byte_len = min_insn_size (insn);
51015 window_list = dispatch_window_list;
51016 next_list = window_list->next;
51017 path = get_insn_path (insn);
51018 insn_group = get_insn_group (insn);
51020 /* Get the last dispatch window. */
51021 if (next_list)
51022 window_list = dispatch_window_list->next;
51024 if (path == path_single)
51025 insn_num_uops = 1;
51026 else if (path == path_double)
51027 insn_num_uops = 2;
51028 else
51029 insn_num_uops = (int) path;
51031 /* If current window is full, get a new window.
51032 Window number zero is full, if MAX_INSN uops are scheduled in it.
51033 Window number one is full, if window zero's bytes plus window
51034 one's bytes is 32, or if the bytes of the new instruction added
51035 to the total makes it greater than 48, or it has already MAX_INSN
51036 instructions in it. */
51037 num_insn = window_list->num_insn;
51038 num_uops = window_list->num_uops;
51039 window_num = window_list->window_num;
51040 insn_fits = fits_dispatch_window (insn);
51042 if (num_insn >= MAX_INSN
51043 || num_uops + insn_num_uops > MAX_INSN
51044 || !(insn_fits))
51046 window_num = ~window_num & 1;
51047 window_list = allocate_next_window (window_num);
51050 if (window_num == 0)
51052 add_insn_window (insn, window_list, insn_num_uops);
51053 if (window_list->num_insn >= MAX_INSN
51054 && insn_group == disp_branch)
51056 process_end_window ();
51057 return;
51060 else if (window_num == 1)
51062 window0_list = window_list->prev;
51063 sum = window0_list->window_size + window_list->window_size;
51064 if (sum == 32
51065 || (byte_len + sum) >= 48)
51067 process_end_window ();
51068 window_list = dispatch_window_list;
51071 add_insn_window (insn, window_list, insn_num_uops);
51073 else
51074 gcc_unreachable ();
51076 if (is_end_basic_block (insn_group))
51078 /* End of basic block is reached do end-basic-block process. */
51079 process_end_window ();
51080 return;
51084 /* Print the dispatch window, WINDOW_NUM, to FILE. */
51086 DEBUG_FUNCTION static void
51087 debug_dispatch_window_file (FILE *file, int window_num)
51089 dispatch_windows *list;
51090 int i;
51092 if (window_num == 0)
51093 list = dispatch_window_list;
51094 else
51095 list = dispatch_window_list1;
51097 fprintf (file, "Window #%d:\n", list->window_num);
51098 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
51099 list->num_insn, list->num_uops, list->window_size);
51100 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51101 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
51103 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
51104 list->num_stores);
51105 fprintf (file, " insn info:\n");
51107 for (i = 0; i < MAX_INSN; i++)
51109 if (!list->window[i].insn)
51110 break;
51111 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
51112 i, group_name[list->window[i].group],
51113 i, (void *)list->window[i].insn,
51114 i, list->window[i].path,
51115 i, list->window[i].byte_len,
51116 i, list->window[i].imm_bytes);
51120 /* Print to stdout a dispatch window. */
51122 DEBUG_FUNCTION void
51123 debug_dispatch_window (int window_num)
51125 debug_dispatch_window_file (stdout, window_num);
51128 /* Print INSN dispatch information to FILE. */
51130 DEBUG_FUNCTION static void
51131 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
51133 int byte_len;
51134 enum insn_path path;
51135 enum dispatch_group group;
51136 int imm_size;
51137 int num_imm_operand;
51138 int num_imm32_operand;
51139 int num_imm64_operand;
51141 if (INSN_CODE (insn) < 0)
51142 return;
51144 byte_len = min_insn_size (insn);
51145 path = get_insn_path (insn);
51146 group = get_insn_group (insn);
51147 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51148 &num_imm64_operand);
51150 fprintf (file, " insn info:\n");
51151 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
51152 group_name[group], path, byte_len);
51153 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51154 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
51157 /* Print to STDERR the status of the ready list with respect to
51158 dispatch windows. */
51160 DEBUG_FUNCTION void
51161 debug_ready_dispatch (void)
51163 int i;
51164 int no_ready = number_in_ready ();
51166 fprintf (stdout, "Number of ready: %d\n", no_ready);
51168 for (i = 0; i < no_ready; i++)
51169 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
51172 /* This routine is the driver of the dispatch scheduler. */
51174 static void
51175 do_dispatch (rtx_insn *insn, int mode)
51177 if (mode == DISPATCH_INIT)
51178 init_dispatch_sched ();
51179 else if (mode == ADD_TO_DISPATCH_WINDOW)
51180 add_to_dispatch_window (insn);
51183 /* Return TRUE if Dispatch Scheduling is supported. */
51185 static bool
51186 has_dispatch (rtx_insn *insn, int action)
51188 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
51189 && flag_dispatch_scheduler)
51190 switch (action)
51192 default:
51193 return false;
51195 case IS_DISPATCH_ON:
51196 return true;
51197 break;
51199 case IS_CMP:
51200 return is_cmp (insn);
51202 case DISPATCH_VIOLATION:
51203 return dispatch_violation ();
51205 case FITS_DISPATCH_WINDOW:
51206 return fits_dispatch_window (insn);
51209 return false;
51212 /* Implementation of reassociation_width target hook used by
51213 reassoc phase to identify parallelism level in reassociated
51214 tree. Statements tree_code is passed in OPC. Arguments type
51215 is passed in MODE.
51217 Currently parallel reassociation is enabled for Atom
51218 processors only and we set reassociation width to be 2
51219 because Atom may issue up to 2 instructions per cycle.
51221 Return value should be fixed if parallel reassociation is
51222 enabled for other processors. */
51224 static int
51225 ix86_reassociation_width (unsigned int, machine_mode mode)
51227 /* Vector part. */
51228 if (VECTOR_MODE_P (mode))
51230 if (TARGET_VECTOR_PARALLEL_EXECUTION)
51231 return 2;
51232 else
51233 return 1;
51236 /* Scalar part. */
51237 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
51238 return 2;
51239 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
51240 return 2;
51241 else
51242 return 1;
51245 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
51246 place emms and femms instructions. */
51248 static machine_mode
51249 ix86_preferred_simd_mode (machine_mode mode)
51251 if (!TARGET_SSE)
51252 return word_mode;
51254 switch (mode)
51256 case QImode:
51257 return TARGET_AVX512BW ? V64QImode :
51258 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
51259 case HImode:
51260 return TARGET_AVX512BW ? V32HImode :
51261 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
51262 case SImode:
51263 return TARGET_AVX512F ? V16SImode :
51264 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
51265 case DImode:
51266 return TARGET_AVX512F ? V8DImode :
51267 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
51269 case SFmode:
51270 if (TARGET_AVX512F)
51271 return V16SFmode;
51272 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51273 return V8SFmode;
51274 else
51275 return V4SFmode;
51277 case DFmode:
51278 if (!TARGET_VECTORIZE_DOUBLE)
51279 return word_mode;
51280 else if (TARGET_AVX512F)
51281 return V8DFmode;
51282 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51283 return V4DFmode;
51284 else if (TARGET_SSE2)
51285 return V2DFmode;
51286 /* FALLTHRU */
51288 default:
51289 return word_mode;
51293 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
51294 vectors. If AVX512F is enabled then try vectorizing with 512bit,
51295 256bit and 128bit vectors. */
51297 static unsigned int
51298 ix86_autovectorize_vector_sizes (void)
51300 return TARGET_AVX512F ? 64 | 32 | 16 :
51301 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
51306 /* Return class of registers which could be used for pseudo of MODE
51307 and of class RCLASS for spilling instead of memory. Return NO_REGS
51308 if it is not possible or non-profitable. */
51309 static reg_class_t
51310 ix86_spill_class (reg_class_t rclass, machine_mode mode)
51312 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
51313 && (mode == SImode || (TARGET_64BIT && mode == DImode))
51314 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
51315 return ALL_SSE_REGS;
51316 return NO_REGS;
51319 /* Implement targetm.vectorize.init_cost. */
51321 static void *
51322 ix86_init_cost (struct loop *)
51324 unsigned *cost = XNEWVEC (unsigned, 3);
51325 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
51326 return cost;
51329 /* Implement targetm.vectorize.add_stmt_cost. */
51331 static unsigned
51332 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
51333 struct _stmt_vec_info *stmt_info, int misalign,
51334 enum vect_cost_model_location where)
51336 unsigned *cost = (unsigned *) data;
51337 unsigned retval = 0;
51339 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
51340 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
51342 /* Statements in an inner loop relative to the loop being
51343 vectorized are weighted more heavily. The value here is
51344 arbitrary and could potentially be improved with analysis. */
51345 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
51346 count *= 50; /* FIXME. */
51348 retval = (unsigned) (count * stmt_cost);
51350 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
51351 for Silvermont as it has out of order integer pipeline and can execute
51352 2 scalar instruction per tick, but has in order SIMD pipeline. */
51353 if (TARGET_SILVERMONT || TARGET_INTEL)
51354 if (stmt_info && stmt_info->stmt)
51356 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
51357 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
51358 retval = (retval * 17) / 10;
51361 cost[where] += retval;
51363 return retval;
51366 /* Implement targetm.vectorize.finish_cost. */
51368 static void
51369 ix86_finish_cost (void *data, unsigned *prologue_cost,
51370 unsigned *body_cost, unsigned *epilogue_cost)
51372 unsigned *cost = (unsigned *) data;
51373 *prologue_cost = cost[vect_prologue];
51374 *body_cost = cost[vect_body];
51375 *epilogue_cost = cost[vect_epilogue];
51378 /* Implement targetm.vectorize.destroy_cost_data. */
51380 static void
51381 ix86_destroy_cost_data (void *data)
51383 free (data);
51386 /* Validate target specific memory model bits in VAL. */
51388 static unsigned HOST_WIDE_INT
51389 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
51391 enum memmodel model = memmodel_from_int (val);
51392 bool strong;
51394 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
51395 |MEMMODEL_MASK)
51396 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
51398 warning (OPT_Winvalid_memory_model,
51399 "Unknown architecture specific memory model");
51400 return MEMMODEL_SEQ_CST;
51402 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
51403 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
51405 warning (OPT_Winvalid_memory_model,
51406 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
51407 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
51409 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
51411 warning (OPT_Winvalid_memory_model,
51412 "HLE_RELEASE not used with RELEASE or stronger memory model");
51413 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
51415 return val;
51418 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
51419 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
51420 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
51421 or number of vecsize_mangle variants that should be emitted. */
51423 static int
51424 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
51425 struct cgraph_simd_clone *clonei,
51426 tree base_type, int num)
51428 int ret = 1;
51430 if (clonei->simdlen
51431 && (clonei->simdlen < 2
51432 || clonei->simdlen > 16
51433 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
51435 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51436 "unsupported simdlen %d", clonei->simdlen);
51437 return 0;
51440 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
51441 if (TREE_CODE (ret_type) != VOID_TYPE)
51442 switch (TYPE_MODE (ret_type))
51444 case QImode:
51445 case HImode:
51446 case SImode:
51447 case DImode:
51448 case SFmode:
51449 case DFmode:
51450 /* case SCmode: */
51451 /* case DCmode: */
51452 break;
51453 default:
51454 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51455 "unsupported return type %qT for simd\n", ret_type);
51456 return 0;
51459 tree t;
51460 int i;
51462 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
51463 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
51464 switch (TYPE_MODE (TREE_TYPE (t)))
51466 case QImode:
51467 case HImode:
51468 case SImode:
51469 case DImode:
51470 case SFmode:
51471 case DFmode:
51472 /* case SCmode: */
51473 /* case DCmode: */
51474 break;
51475 default:
51476 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51477 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
51478 return 0;
51481 if (clonei->cilk_elemental)
51483 /* Parse here processor clause. If not present, default to 'b'. */
51484 clonei->vecsize_mangle = 'b';
51486 else if (!TREE_PUBLIC (node->decl))
51488 /* If the function isn't exported, we can pick up just one ISA
51489 for the clones. */
51490 if (TARGET_AVX2)
51491 clonei->vecsize_mangle = 'd';
51492 else if (TARGET_AVX)
51493 clonei->vecsize_mangle = 'c';
51494 else
51495 clonei->vecsize_mangle = 'b';
51496 ret = 1;
51498 else
51500 clonei->vecsize_mangle = "bcd"[num];
51501 ret = 3;
51503 switch (clonei->vecsize_mangle)
51505 case 'b':
51506 clonei->vecsize_int = 128;
51507 clonei->vecsize_float = 128;
51508 break;
51509 case 'c':
51510 clonei->vecsize_int = 128;
51511 clonei->vecsize_float = 256;
51512 break;
51513 case 'd':
51514 clonei->vecsize_int = 256;
51515 clonei->vecsize_float = 256;
51516 break;
51518 if (clonei->simdlen == 0)
51520 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
51521 clonei->simdlen = clonei->vecsize_int;
51522 else
51523 clonei->simdlen = clonei->vecsize_float;
51524 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
51525 if (clonei->simdlen > 16)
51526 clonei->simdlen = 16;
51528 return ret;
51531 /* Add target attribute to SIMD clone NODE if needed. */
51533 static void
51534 ix86_simd_clone_adjust (struct cgraph_node *node)
51536 const char *str = NULL;
51537 gcc_assert (node->decl == cfun->decl);
51538 switch (node->simdclone->vecsize_mangle)
51540 case 'b':
51541 if (!TARGET_SSE2)
51542 str = "sse2";
51543 break;
51544 case 'c':
51545 if (!TARGET_AVX)
51546 str = "avx";
51547 break;
51548 case 'd':
51549 if (!TARGET_AVX2)
51550 str = "avx2";
51551 break;
51552 default:
51553 gcc_unreachable ();
51555 if (str == NULL)
51556 return;
51557 push_cfun (NULL);
51558 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
51559 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
51560 gcc_assert (ok);
51561 pop_cfun ();
51562 ix86_reset_previous_fndecl ();
51563 ix86_set_current_function (node->decl);
51566 /* If SIMD clone NODE can't be used in a vectorized loop
51567 in current function, return -1, otherwise return a badness of using it
51568 (0 if it is most desirable from vecsize_mangle point of view, 1
51569 slightly less desirable, etc.). */
51571 static int
51572 ix86_simd_clone_usable (struct cgraph_node *node)
51574 switch (node->simdclone->vecsize_mangle)
51576 case 'b':
51577 if (!TARGET_SSE2)
51578 return -1;
51579 if (!TARGET_AVX)
51580 return 0;
51581 return TARGET_AVX2 ? 2 : 1;
51582 case 'c':
51583 if (!TARGET_AVX)
51584 return -1;
51585 return TARGET_AVX2 ? 1 : 0;
51586 break;
51587 case 'd':
51588 if (!TARGET_AVX2)
51589 return -1;
51590 return 0;
51591 default:
51592 gcc_unreachable ();
51596 /* This function adjusts the unroll factor based on
51597 the hardware capabilities. For ex, bdver3 has
51598 a loop buffer which makes unrolling of smaller
51599 loops less important. This function decides the
51600 unroll factor using number of memory references
51601 (value 32 is used) as a heuristic. */
51603 static unsigned
51604 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
51606 basic_block *bbs;
51607 rtx_insn *insn;
51608 unsigned i;
51609 unsigned mem_count = 0;
51611 if (!TARGET_ADJUST_UNROLL)
51612 return nunroll;
51614 /* Count the number of memory references within the loop body.
51615 This value determines the unrolling factor for bdver3 and bdver4
51616 architectures. */
51617 subrtx_iterator::array_type array;
51618 bbs = get_loop_body (loop);
51619 for (i = 0; i < loop->num_nodes; i++)
51620 FOR_BB_INSNS (bbs[i], insn)
51621 if (NONDEBUG_INSN_P (insn))
51622 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
51623 if (const_rtx x = *iter)
51624 if (MEM_P (x))
51626 machine_mode mode = GET_MODE (x);
51627 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
51628 if (n_words > 4)
51629 mem_count += 2;
51630 else
51631 mem_count += 1;
51633 free (bbs);
51635 if (mem_count && mem_count <=32)
51636 return 32/mem_count;
51638 return nunroll;
51642 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
51644 static bool
51645 ix86_float_exceptions_rounding_supported_p (void)
51647 /* For x87 floating point with standard excess precision handling,
51648 there is no adddf3 pattern (since x87 floating point only has
51649 XFmode operations) so the default hook implementation gets this
51650 wrong. */
51651 return TARGET_80387 || TARGET_SSE_MATH;
51654 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
51656 static void
51657 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
51659 if (!TARGET_80387 && !TARGET_SSE_MATH)
51660 return;
51661 tree exceptions_var = create_tmp_var (integer_type_node);
51662 if (TARGET_80387)
51664 tree fenv_index_type = build_index_type (size_int (6));
51665 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
51666 tree fenv_var = create_tmp_var (fenv_type);
51667 mark_addressable (fenv_var);
51668 tree fenv_ptr = build_pointer_type (fenv_type);
51669 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
51670 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
51671 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
51672 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
51673 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
51674 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
51675 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
51676 tree hold_fnclex = build_call_expr (fnclex, 0);
51677 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
51678 hold_fnclex);
51679 *clear = build_call_expr (fnclex, 0);
51680 tree sw_var = create_tmp_var (short_unsigned_type_node);
51681 tree fnstsw_call = build_call_expr (fnstsw, 0);
51682 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
51683 sw_var, fnstsw_call);
51684 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
51685 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
51686 exceptions_var, exceptions_x87);
51687 *update = build2 (COMPOUND_EXPR, integer_type_node,
51688 sw_mod, update_mod);
51689 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
51690 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
51692 if (TARGET_SSE_MATH)
51694 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node);
51695 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node);
51696 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
51697 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
51698 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
51699 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
51700 mxcsr_orig_var, stmxcsr_hold_call);
51701 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
51702 mxcsr_orig_var,
51703 build_int_cst (unsigned_type_node, 0x1f80));
51704 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
51705 build_int_cst (unsigned_type_node, 0xffffffc0));
51706 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
51707 mxcsr_mod_var, hold_mod_val);
51708 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51709 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
51710 hold_assign_orig, hold_assign_mod);
51711 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
51712 ldmxcsr_hold_call);
51713 if (*hold)
51714 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
51715 else
51716 *hold = hold_all;
51717 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51718 if (*clear)
51719 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
51720 ldmxcsr_clear_call);
51721 else
51722 *clear = ldmxcsr_clear_call;
51723 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
51724 tree exceptions_sse = fold_convert (integer_type_node,
51725 stxmcsr_update_call);
51726 if (*update)
51728 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
51729 exceptions_var, exceptions_sse);
51730 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
51731 exceptions_var, exceptions_mod);
51732 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
51733 exceptions_assign);
51735 else
51736 *update = build2 (MODIFY_EXPR, integer_type_node,
51737 exceptions_var, exceptions_sse);
51738 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
51739 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51740 ldmxcsr_update_call);
51742 tree atomic_feraiseexcept
51743 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
51744 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
51745 1, exceptions_var);
51746 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51747 atomic_feraiseexcept_call);
51750 /* Return mode to be used for bounds or VOIDmode
51751 if bounds are not supported. */
51753 static enum machine_mode
51754 ix86_mpx_bound_mode ()
51756 /* Do not support pointer checker if MPX
51757 is not enabled. */
51758 if (!TARGET_MPX)
51760 if (flag_check_pointer_bounds)
51761 warning (0, "Pointer Checker requires MPX support on this target."
51762 " Use -mmpx options to enable MPX.");
51763 return VOIDmode;
51766 return BNDmode;
51769 /* Return constant used to statically initialize constant bounds.
51771 This function is used to create special bound values. For now
51772 only INIT bounds and NONE bounds are expected. More special
51773 values may be added later. */
51775 static tree
51776 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
51778 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
51779 : build_zero_cst (pointer_sized_int_node);
51780 tree high = ub ? build_zero_cst (pointer_sized_int_node)
51781 : build_minus_one_cst (pointer_sized_int_node);
51783 /* This function is supposed to be used to create INIT and
51784 NONE bounds only. */
51785 gcc_assert ((lb == 0 && ub == -1)
51786 || (lb == -1 && ub == 0));
51788 return build_complex (NULL, low, high);
51791 /* Generate a list of statements STMTS to initialize pointer bounds
51792 variable VAR with bounds LB and UB. Return the number of generated
51793 statements. */
51795 static int
51796 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
51798 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
51799 tree lhs, modify, var_p;
51801 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
51802 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
51804 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
51805 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
51806 append_to_statement_list (modify, stmts);
51808 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
51809 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
51810 TYPE_SIZE_UNIT (pointer_sized_int_node)));
51811 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
51812 append_to_statement_list (modify, stmts);
51814 return 2;
51817 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
51818 /* For i386, common symbol is local only for non-PIE binaries. For
51819 x86-64, common symbol is local only for non-PIE binaries or linker
51820 supports copy reloc in PIE binaries. */
51822 static bool
51823 ix86_binds_local_p (const_tree exp)
51825 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
51826 (!flag_pic
51827 || (TARGET_64BIT
51828 && HAVE_LD_PIE_COPYRELOC != 0)));
51830 #endif
51832 /* If MEM is in the form of [base+offset], extract the two parts
51833 of address and set to BASE and OFFSET, otherwise return false. */
51835 static bool
51836 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
51838 rtx addr;
51840 gcc_assert (MEM_P (mem));
51842 addr = XEXP (mem, 0);
51844 if (GET_CODE (addr) == CONST)
51845 addr = XEXP (addr, 0);
51847 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
51849 *base = addr;
51850 *offset = const0_rtx;
51851 return true;
51854 if (GET_CODE (addr) == PLUS
51855 && (REG_P (XEXP (addr, 0))
51856 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
51857 && CONST_INT_P (XEXP (addr, 1)))
51859 *base = XEXP (addr, 0);
51860 *offset = XEXP (addr, 1);
51861 return true;
51864 return false;
51867 /* Given OPERANDS of consecutive load/store, check if we can merge
51868 them into move multiple. LOAD is true if they are load instructions.
51869 MODE is the mode of memory operands. */
51871 bool
51872 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
51873 enum machine_mode mode)
51875 HOST_WIDE_INT offval_1, offval_2, msize;
51876 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
51878 if (load)
51880 mem_1 = operands[1];
51881 mem_2 = operands[3];
51882 reg_1 = operands[0];
51883 reg_2 = operands[2];
51885 else
51887 mem_1 = operands[0];
51888 mem_2 = operands[2];
51889 reg_1 = operands[1];
51890 reg_2 = operands[3];
51893 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
51895 if (REGNO (reg_1) != REGNO (reg_2))
51896 return false;
51898 /* Check if the addresses are in the form of [base+offset]. */
51899 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
51900 return false;
51901 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
51902 return false;
51904 /* Check if the bases are the same. */
51905 if (!rtx_equal_p (base_1, base_2))
51906 return false;
51908 offval_1 = INTVAL (offset_1);
51909 offval_2 = INTVAL (offset_2);
51910 msize = GET_MODE_SIZE (mode);
51911 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
51912 if (offval_1 + msize != offval_2)
51913 return false;
51915 return true;
51918 /* Initialize the GCC target structure. */
51919 #undef TARGET_RETURN_IN_MEMORY
51920 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
51922 #undef TARGET_LEGITIMIZE_ADDRESS
51923 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
51925 #undef TARGET_ATTRIBUTE_TABLE
51926 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
51927 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
51928 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
51929 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51930 # undef TARGET_MERGE_DECL_ATTRIBUTES
51931 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
51932 #endif
51934 #undef TARGET_COMP_TYPE_ATTRIBUTES
51935 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
51937 #undef TARGET_INIT_BUILTINS
51938 #define TARGET_INIT_BUILTINS ix86_init_builtins
51939 #undef TARGET_BUILTIN_DECL
51940 #define TARGET_BUILTIN_DECL ix86_builtin_decl
51941 #undef TARGET_EXPAND_BUILTIN
51942 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
51944 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
51945 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
51946 ix86_builtin_vectorized_function
51948 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
51949 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
51951 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
51952 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
51954 #undef TARGET_VECTORIZE_BUILTIN_GATHER
51955 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
51957 #undef TARGET_BUILTIN_RECIPROCAL
51958 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
51960 #undef TARGET_ASM_FUNCTION_EPILOGUE
51961 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
51963 #undef TARGET_ENCODE_SECTION_INFO
51964 #ifndef SUBTARGET_ENCODE_SECTION_INFO
51965 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
51966 #else
51967 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
51968 #endif
51970 #undef TARGET_ASM_OPEN_PAREN
51971 #define TARGET_ASM_OPEN_PAREN ""
51972 #undef TARGET_ASM_CLOSE_PAREN
51973 #define TARGET_ASM_CLOSE_PAREN ""
51975 #undef TARGET_ASM_BYTE_OP
51976 #define TARGET_ASM_BYTE_OP ASM_BYTE
51978 #undef TARGET_ASM_ALIGNED_HI_OP
51979 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
51980 #undef TARGET_ASM_ALIGNED_SI_OP
51981 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
51982 #ifdef ASM_QUAD
51983 #undef TARGET_ASM_ALIGNED_DI_OP
51984 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
51985 #endif
51987 #undef TARGET_PROFILE_BEFORE_PROLOGUE
51988 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
51990 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
51991 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
51993 #undef TARGET_ASM_UNALIGNED_HI_OP
51994 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
51995 #undef TARGET_ASM_UNALIGNED_SI_OP
51996 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
51997 #undef TARGET_ASM_UNALIGNED_DI_OP
51998 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
52000 #undef TARGET_PRINT_OPERAND
52001 #define TARGET_PRINT_OPERAND ix86_print_operand
52002 #undef TARGET_PRINT_OPERAND_ADDRESS
52003 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
52004 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
52005 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
52006 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
52007 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
52009 #undef TARGET_SCHED_INIT_GLOBAL
52010 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
52011 #undef TARGET_SCHED_ADJUST_COST
52012 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
52013 #undef TARGET_SCHED_ISSUE_RATE
52014 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
52015 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
52016 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
52017 ia32_multipass_dfa_lookahead
52018 #undef TARGET_SCHED_MACRO_FUSION_P
52019 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
52020 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
52021 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
52023 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
52024 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
52026 #undef TARGET_MEMMODEL_CHECK
52027 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
52029 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
52030 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
52032 #ifdef HAVE_AS_TLS
52033 #undef TARGET_HAVE_TLS
52034 #define TARGET_HAVE_TLS true
52035 #endif
52036 #undef TARGET_CANNOT_FORCE_CONST_MEM
52037 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
52038 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
52039 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
52041 #undef TARGET_DELEGITIMIZE_ADDRESS
52042 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
52044 #undef TARGET_MS_BITFIELD_LAYOUT_P
52045 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
52047 #if TARGET_MACHO
52048 #undef TARGET_BINDS_LOCAL_P
52049 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
52050 #else
52051 #undef TARGET_BINDS_LOCAL_P
52052 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
52053 #endif
52054 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
52055 #undef TARGET_BINDS_LOCAL_P
52056 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
52057 #endif
52059 #undef TARGET_ASM_OUTPUT_MI_THUNK
52060 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
52061 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
52062 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
52064 #undef TARGET_ASM_FILE_START
52065 #define TARGET_ASM_FILE_START x86_file_start
52067 #undef TARGET_OPTION_OVERRIDE
52068 #define TARGET_OPTION_OVERRIDE ix86_option_override
52070 #undef TARGET_REGISTER_MOVE_COST
52071 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
52072 #undef TARGET_MEMORY_MOVE_COST
52073 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
52074 #undef TARGET_RTX_COSTS
52075 #define TARGET_RTX_COSTS ix86_rtx_costs
52076 #undef TARGET_ADDRESS_COST
52077 #define TARGET_ADDRESS_COST ix86_address_cost
52079 #undef TARGET_FIXED_CONDITION_CODE_REGS
52080 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
52081 #undef TARGET_CC_MODES_COMPATIBLE
52082 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
52084 #undef TARGET_MACHINE_DEPENDENT_REORG
52085 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
52087 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
52088 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
52090 #undef TARGET_BUILD_BUILTIN_VA_LIST
52091 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
52093 #undef TARGET_FOLD_BUILTIN
52094 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
52096 #undef TARGET_COMPARE_VERSION_PRIORITY
52097 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
52099 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
52100 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
52101 ix86_generate_version_dispatcher_body
52103 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
52104 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
52105 ix86_get_function_versions_dispatcher
52107 #undef TARGET_ENUM_VA_LIST_P
52108 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
52110 #undef TARGET_FN_ABI_VA_LIST
52111 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
52113 #undef TARGET_CANONICAL_VA_LIST_TYPE
52114 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
52116 #undef TARGET_EXPAND_BUILTIN_VA_START
52117 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
52119 #undef TARGET_MD_ASM_ADJUST
52120 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
52122 #undef TARGET_PROMOTE_PROTOTYPES
52123 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
52124 #undef TARGET_SETUP_INCOMING_VARARGS
52125 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
52126 #undef TARGET_MUST_PASS_IN_STACK
52127 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
52128 #undef TARGET_FUNCTION_ARG_ADVANCE
52129 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
52130 #undef TARGET_FUNCTION_ARG
52131 #define TARGET_FUNCTION_ARG ix86_function_arg
52132 #undef TARGET_INIT_PIC_REG
52133 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
52134 #undef TARGET_USE_PSEUDO_PIC_REG
52135 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
52136 #undef TARGET_FUNCTION_ARG_BOUNDARY
52137 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
52138 #undef TARGET_PASS_BY_REFERENCE
52139 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
52140 #undef TARGET_INTERNAL_ARG_POINTER
52141 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
52142 #undef TARGET_UPDATE_STACK_BOUNDARY
52143 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
52144 #undef TARGET_GET_DRAP_RTX
52145 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
52146 #undef TARGET_STRICT_ARGUMENT_NAMING
52147 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
52148 #undef TARGET_STATIC_CHAIN
52149 #define TARGET_STATIC_CHAIN ix86_static_chain
52150 #undef TARGET_TRAMPOLINE_INIT
52151 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
52152 #undef TARGET_RETURN_POPS_ARGS
52153 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
52155 #undef TARGET_LEGITIMATE_COMBINED_INSN
52156 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
52158 #undef TARGET_ASAN_SHADOW_OFFSET
52159 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
52161 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
52162 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
52164 #undef TARGET_SCALAR_MODE_SUPPORTED_P
52165 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
52167 #undef TARGET_VECTOR_MODE_SUPPORTED_P
52168 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
52170 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
52171 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
52172 ix86_libgcc_floating_mode_supported_p
52174 #undef TARGET_C_MODE_FOR_SUFFIX
52175 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
52177 #ifdef HAVE_AS_TLS
52178 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
52179 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
52180 #endif
52182 #ifdef SUBTARGET_INSERT_ATTRIBUTES
52183 #undef TARGET_INSERT_ATTRIBUTES
52184 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
52185 #endif
52187 #undef TARGET_MANGLE_TYPE
52188 #define TARGET_MANGLE_TYPE ix86_mangle_type
52190 #if !TARGET_MACHO
52191 #undef TARGET_STACK_PROTECT_FAIL
52192 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
52193 #endif
52195 #undef TARGET_FUNCTION_VALUE
52196 #define TARGET_FUNCTION_VALUE ix86_function_value
52198 #undef TARGET_FUNCTION_VALUE_REGNO_P
52199 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
52201 #undef TARGET_PROMOTE_FUNCTION_MODE
52202 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
52204 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
52205 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
52207 #undef TARGET_MEMBER_TYPE_FORCES_BLK
52208 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
52210 #undef TARGET_INSTANTIATE_DECLS
52211 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
52213 #undef TARGET_SECONDARY_RELOAD
52214 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
52216 #undef TARGET_CLASS_MAX_NREGS
52217 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
52219 #undef TARGET_PREFERRED_RELOAD_CLASS
52220 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
52221 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
52222 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
52223 #undef TARGET_CLASS_LIKELY_SPILLED_P
52224 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
52226 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
52227 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
52228 ix86_builtin_vectorization_cost
52229 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
52230 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
52231 ix86_vectorize_vec_perm_const_ok
52232 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
52233 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
52234 ix86_preferred_simd_mode
52235 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
52236 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
52237 ix86_autovectorize_vector_sizes
52238 #undef TARGET_VECTORIZE_INIT_COST
52239 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
52240 #undef TARGET_VECTORIZE_ADD_STMT_COST
52241 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
52242 #undef TARGET_VECTORIZE_FINISH_COST
52243 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
52244 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
52245 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
52247 #undef TARGET_SET_CURRENT_FUNCTION
52248 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
52250 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
52251 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
52253 #undef TARGET_OPTION_SAVE
52254 #define TARGET_OPTION_SAVE ix86_function_specific_save
52256 #undef TARGET_OPTION_RESTORE
52257 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
52259 #undef TARGET_OPTION_POST_STREAM_IN
52260 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
52262 #undef TARGET_OPTION_PRINT
52263 #define TARGET_OPTION_PRINT ix86_function_specific_print
52265 #undef TARGET_OPTION_FUNCTION_VERSIONS
52266 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
52268 #undef TARGET_CAN_INLINE_P
52269 #define TARGET_CAN_INLINE_P ix86_can_inline_p
52271 #undef TARGET_EXPAND_TO_RTL_HOOK
52272 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
52274 #undef TARGET_LEGITIMATE_ADDRESS_P
52275 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
52277 #undef TARGET_LRA_P
52278 #define TARGET_LRA_P hook_bool_void_true
52280 #undef TARGET_REGISTER_PRIORITY
52281 #define TARGET_REGISTER_PRIORITY ix86_register_priority
52283 #undef TARGET_REGISTER_USAGE_LEVELING_P
52284 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
52286 #undef TARGET_LEGITIMATE_CONSTANT_P
52287 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
52289 #undef TARGET_FRAME_POINTER_REQUIRED
52290 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
52292 #undef TARGET_CAN_ELIMINATE
52293 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
52295 #undef TARGET_EXTRA_LIVE_ON_ENTRY
52296 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
52298 #undef TARGET_ASM_CODE_END
52299 #define TARGET_ASM_CODE_END ix86_code_end
52301 #undef TARGET_CONDITIONAL_REGISTER_USAGE
52302 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
52304 #if TARGET_MACHO
52305 #undef TARGET_INIT_LIBFUNCS
52306 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
52307 #endif
52309 #undef TARGET_LOOP_UNROLL_ADJUST
52310 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
52312 #undef TARGET_SPILL_CLASS
52313 #define TARGET_SPILL_CLASS ix86_spill_class
52315 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
52316 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
52317 ix86_simd_clone_compute_vecsize_and_simdlen
52319 #undef TARGET_SIMD_CLONE_ADJUST
52320 #define TARGET_SIMD_CLONE_ADJUST \
52321 ix86_simd_clone_adjust
52323 #undef TARGET_SIMD_CLONE_USABLE
52324 #define TARGET_SIMD_CLONE_USABLE \
52325 ix86_simd_clone_usable
52327 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
52328 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
52329 ix86_float_exceptions_rounding_supported_p
52331 #undef TARGET_MODE_EMIT
52332 #define TARGET_MODE_EMIT ix86_emit_mode_set
52334 #undef TARGET_MODE_NEEDED
52335 #define TARGET_MODE_NEEDED ix86_mode_needed
52337 #undef TARGET_MODE_AFTER
52338 #define TARGET_MODE_AFTER ix86_mode_after
52340 #undef TARGET_MODE_ENTRY
52341 #define TARGET_MODE_ENTRY ix86_mode_entry
52343 #undef TARGET_MODE_EXIT
52344 #define TARGET_MODE_EXIT ix86_mode_exit
52346 #undef TARGET_MODE_PRIORITY
52347 #define TARGET_MODE_PRIORITY ix86_mode_priority
52349 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
52350 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
52352 #undef TARGET_LOAD_BOUNDS_FOR_ARG
52353 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
52355 #undef TARGET_STORE_BOUNDS_FOR_ARG
52356 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
52358 #undef TARGET_LOAD_RETURNED_BOUNDS
52359 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
52361 #undef TARGET_STORE_RETURNED_BOUNDS
52362 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
52364 #undef TARGET_CHKP_BOUND_MODE
52365 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
52367 #undef TARGET_BUILTIN_CHKP_FUNCTION
52368 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
52370 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
52371 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
52373 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
52374 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
52376 #undef TARGET_CHKP_INITIALIZE_BOUNDS
52377 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
52379 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
52380 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
52382 #undef TARGET_OFFLOAD_OPTIONS
52383 #define TARGET_OFFLOAD_OPTIONS \
52384 ix86_offload_options
52386 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
52387 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
52389 struct gcc_target targetm = TARGET_INITIALIZER;
52391 #include "gt-i386.h"