gcc/testsuite
[official-gcc.git] / gcc / config / i386 / i386.c
blob805c430c2480bcbe88c6ee00ed24fa684f6f24ce
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "hashtab.h"
42 #include "hash-set.h"
43 #include "vec.h"
44 #include "machmode.h"
45 #include "input.h"
46 #include "function.h"
47 #include "recog.h"
48 #include "expr.h"
49 #include "optabs.h"
50 #include "diagnostic-core.h"
51 #include "toplev.h"
52 #include "predict.h"
53 #include "dominance.h"
54 #include "cfg.h"
55 #include "cfgrtl.h"
56 #include "cfganal.h"
57 #include "lcm.h"
58 #include "cfgbuild.h"
59 #include "cfgcleanup.h"
60 #include "basic-block.h"
61 #include "ggc.h"
62 #include "target.h"
63 #include "target-def.h"
64 #include "common/common-target.h"
65 #include "langhooks.h"
66 #include "reload.h"
67 #include "hash-map.h"
68 #include "is-a.h"
69 #include "plugin-api.h"
70 #include "ipa-ref.h"
71 #include "cgraph.h"
72 #include "hash-table.h"
73 #include "tree-ssa-alias.h"
74 #include "internal-fn.h"
75 #include "gimple-fold.h"
76 #include "tree-eh.h"
77 #include "gimple-expr.h"
78 #include "gimple.h"
79 #include "gimplify.h"
80 #include "cfgloop.h"
81 #include "dwarf2.h"
82 #include "df.h"
83 #include "tm-constrs.h"
84 #include "params.h"
85 #include "cselib.h"
86 #include "debug.h"
87 #include "sched-int.h"
88 #include "sbitmap.h"
89 #include "fibheap.h"
90 #include "opts.h"
91 #include "diagnostic.h"
92 #include "dumpfile.h"
93 #include "tree-pass.h"
94 #include "wide-int.h"
95 #include "context.h"
96 #include "pass_manager.h"
97 #include "target-globals.h"
98 #include "tree-vectorizer.h"
99 #include "shrink-wrap.h"
100 #include "builtins.h"
101 #include "rtl-iter.h"
102 #include "tree-iterator.h"
103 #include "tree-chkp.h"
104 #include "rtl-chkp.h"
106 static rtx legitimize_dllimport_symbol (rtx, bool);
107 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
108 static rtx legitimize_pe_coff_symbol (rtx, bool);
110 #ifndef CHECK_STACK_LIMIT
111 #define CHECK_STACK_LIMIT (-1)
112 #endif
114 /* Return index of given mode in mult and division cost tables. */
115 #define MODE_INDEX(mode) \
116 ((mode) == QImode ? 0 \
117 : (mode) == HImode ? 1 \
118 : (mode) == SImode ? 2 \
119 : (mode) == DImode ? 3 \
120 : 4)
122 /* Processor costs (relative to an add) */
123 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
124 #define COSTS_N_BYTES(N) ((N) * 2)
126 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
128 static stringop_algs ix86_size_memcpy[2] = {
129 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
130 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
131 static stringop_algs ix86_size_memset[2] = {
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
135 const
136 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
137 COSTS_N_BYTES (2), /* cost of an add instruction */
138 COSTS_N_BYTES (3), /* cost of a lea instruction */
139 COSTS_N_BYTES (2), /* variable shift costs */
140 COSTS_N_BYTES (3), /* constant shift costs */
141 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
142 COSTS_N_BYTES (3), /* HI */
143 COSTS_N_BYTES (3), /* SI */
144 COSTS_N_BYTES (3), /* DI */
145 COSTS_N_BYTES (5)}, /* other */
146 0, /* cost of multiply per each bit set */
147 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
148 COSTS_N_BYTES (3), /* HI */
149 COSTS_N_BYTES (3), /* SI */
150 COSTS_N_BYTES (3), /* DI */
151 COSTS_N_BYTES (5)}, /* other */
152 COSTS_N_BYTES (3), /* cost of movsx */
153 COSTS_N_BYTES (3), /* cost of movzx */
154 0, /* "large" insn */
155 2, /* MOVE_RATIO */
156 2, /* cost for loading QImode using movzbl */
157 {2, 2, 2}, /* cost of loading integer registers
158 in QImode, HImode and SImode.
159 Relative to reg-reg move (2). */
160 {2, 2, 2}, /* cost of storing integer registers */
161 2, /* cost of reg,reg fld/fst */
162 {2, 2, 2}, /* cost of loading fp registers
163 in SFmode, DFmode and XFmode */
164 {2, 2, 2}, /* cost of storing fp registers
165 in SFmode, DFmode and XFmode */
166 3, /* cost of moving MMX register */
167 {3, 3}, /* cost of loading MMX registers
168 in SImode and DImode */
169 {3, 3}, /* cost of storing MMX registers
170 in SImode and DImode */
171 3, /* cost of moving SSE register */
172 {3, 3, 3}, /* cost of loading SSE registers
173 in SImode, DImode and TImode */
174 {3, 3, 3}, /* cost of storing SSE registers
175 in SImode, DImode and TImode */
176 3, /* MMX or SSE register to integer */
177 0, /* size of l1 cache */
178 0, /* size of l2 cache */
179 0, /* size of prefetch block */
180 0, /* number of parallel prefetches */
181 2, /* Branch cost */
182 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
183 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
184 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
185 COSTS_N_BYTES (2), /* cost of FABS instruction. */
186 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
187 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
188 ix86_size_memcpy,
189 ix86_size_memset,
190 1, /* scalar_stmt_cost. */
191 1, /* scalar load_cost. */
192 1, /* scalar_store_cost. */
193 1, /* vec_stmt_cost. */
194 1, /* vec_to_scalar_cost. */
195 1, /* scalar_to_vec_cost. */
196 1, /* vec_align_load_cost. */
197 1, /* vec_unalign_load_cost. */
198 1, /* vec_store_cost. */
199 1, /* cond_taken_branch_cost. */
200 1, /* cond_not_taken_branch_cost. */
203 /* Processor costs (relative to an add) */
204 static stringop_algs i386_memcpy[2] = {
205 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
206 DUMMY_STRINGOP_ALGS};
207 static stringop_algs i386_memset[2] = {
208 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
209 DUMMY_STRINGOP_ALGS};
211 static const
212 struct processor_costs i386_cost = { /* 386 specific costs */
213 COSTS_N_INSNS (1), /* cost of an add instruction */
214 COSTS_N_INSNS (1), /* cost of a lea instruction */
215 COSTS_N_INSNS (3), /* variable shift costs */
216 COSTS_N_INSNS (2), /* constant shift costs */
217 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
218 COSTS_N_INSNS (6), /* HI */
219 COSTS_N_INSNS (6), /* SI */
220 COSTS_N_INSNS (6), /* DI */
221 COSTS_N_INSNS (6)}, /* other */
222 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
223 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
224 COSTS_N_INSNS (23), /* HI */
225 COSTS_N_INSNS (23), /* SI */
226 COSTS_N_INSNS (23), /* DI */
227 COSTS_N_INSNS (23)}, /* other */
228 COSTS_N_INSNS (3), /* cost of movsx */
229 COSTS_N_INSNS (2), /* cost of movzx */
230 15, /* "large" insn */
231 3, /* MOVE_RATIO */
232 4, /* cost for loading QImode using movzbl */
233 {2, 4, 2}, /* cost of loading integer registers
234 in QImode, HImode and SImode.
235 Relative to reg-reg move (2). */
236 {2, 4, 2}, /* cost of storing integer registers */
237 2, /* cost of reg,reg fld/fst */
238 {8, 8, 8}, /* cost of loading fp registers
239 in SFmode, DFmode and XFmode */
240 {8, 8, 8}, /* cost of storing fp registers
241 in SFmode, DFmode and XFmode */
242 2, /* cost of moving MMX register */
243 {4, 8}, /* cost of loading MMX registers
244 in SImode and DImode */
245 {4, 8}, /* cost of storing MMX registers
246 in SImode and DImode */
247 2, /* cost of moving SSE register */
248 {4, 8, 16}, /* cost of loading SSE registers
249 in SImode, DImode and TImode */
250 {4, 8, 16}, /* cost of storing SSE registers
251 in SImode, DImode and TImode */
252 3, /* MMX or SSE register to integer */
253 0, /* size of l1 cache */
254 0, /* size of l2 cache */
255 0, /* size of prefetch block */
256 0, /* number of parallel prefetches */
257 1, /* Branch cost */
258 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
259 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
260 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
261 COSTS_N_INSNS (22), /* cost of FABS instruction. */
262 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
263 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
264 i386_memcpy,
265 i386_memset,
266 1, /* scalar_stmt_cost. */
267 1, /* scalar load_cost. */
268 1, /* scalar_store_cost. */
269 1, /* vec_stmt_cost. */
270 1, /* vec_to_scalar_cost. */
271 1, /* scalar_to_vec_cost. */
272 1, /* vec_align_load_cost. */
273 2, /* vec_unalign_load_cost. */
274 1, /* vec_store_cost. */
275 3, /* cond_taken_branch_cost. */
276 1, /* cond_not_taken_branch_cost. */
279 static stringop_algs i486_memcpy[2] = {
280 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
281 DUMMY_STRINGOP_ALGS};
282 static stringop_algs i486_memset[2] = {
283 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
284 DUMMY_STRINGOP_ALGS};
286 static const
287 struct processor_costs i486_cost = { /* 486 specific costs */
288 COSTS_N_INSNS (1), /* cost of an add instruction */
289 COSTS_N_INSNS (1), /* cost of a lea instruction */
290 COSTS_N_INSNS (3), /* variable shift costs */
291 COSTS_N_INSNS (2), /* constant shift costs */
292 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
293 COSTS_N_INSNS (12), /* HI */
294 COSTS_N_INSNS (12), /* SI */
295 COSTS_N_INSNS (12), /* DI */
296 COSTS_N_INSNS (12)}, /* other */
297 1, /* cost of multiply per each bit set */
298 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
299 COSTS_N_INSNS (40), /* HI */
300 COSTS_N_INSNS (40), /* SI */
301 COSTS_N_INSNS (40), /* DI */
302 COSTS_N_INSNS (40)}, /* other */
303 COSTS_N_INSNS (3), /* cost of movsx */
304 COSTS_N_INSNS (2), /* cost of movzx */
305 15, /* "large" insn */
306 3, /* MOVE_RATIO */
307 4, /* cost for loading QImode using movzbl */
308 {2, 4, 2}, /* cost of loading integer registers
309 in QImode, HImode and SImode.
310 Relative to reg-reg move (2). */
311 {2, 4, 2}, /* cost of storing integer registers */
312 2, /* cost of reg,reg fld/fst */
313 {8, 8, 8}, /* cost of loading fp registers
314 in SFmode, DFmode and XFmode */
315 {8, 8, 8}, /* cost of storing fp registers
316 in SFmode, DFmode and XFmode */
317 2, /* cost of moving MMX register */
318 {4, 8}, /* cost of loading MMX registers
319 in SImode and DImode */
320 {4, 8}, /* cost of storing MMX registers
321 in SImode and DImode */
322 2, /* cost of moving SSE register */
323 {4, 8, 16}, /* cost of loading SSE registers
324 in SImode, DImode and TImode */
325 {4, 8, 16}, /* cost of storing SSE registers
326 in SImode, DImode and TImode */
327 3, /* MMX or SSE register to integer */
328 4, /* size of l1 cache. 486 has 8kB cache
329 shared for code and data, so 4kB is
330 not really precise. */
331 4, /* size of l2 cache */
332 0, /* size of prefetch block */
333 0, /* number of parallel prefetches */
334 1, /* Branch cost */
335 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
336 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
337 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
338 COSTS_N_INSNS (3), /* cost of FABS instruction. */
339 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
340 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
341 i486_memcpy,
342 i486_memset,
343 1, /* scalar_stmt_cost. */
344 1, /* scalar load_cost. */
345 1, /* scalar_store_cost. */
346 1, /* vec_stmt_cost. */
347 1, /* vec_to_scalar_cost. */
348 1, /* scalar_to_vec_cost. */
349 1, /* vec_align_load_cost. */
350 2, /* vec_unalign_load_cost. */
351 1, /* vec_store_cost. */
352 3, /* cond_taken_branch_cost. */
353 1, /* cond_not_taken_branch_cost. */
356 static stringop_algs pentium_memcpy[2] = {
357 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
358 DUMMY_STRINGOP_ALGS};
359 static stringop_algs pentium_memset[2] = {
360 {libcall, {{-1, rep_prefix_4_byte, false}}},
361 DUMMY_STRINGOP_ALGS};
363 static const
364 struct processor_costs pentium_cost = {
365 COSTS_N_INSNS (1), /* cost of an add instruction */
366 COSTS_N_INSNS (1), /* cost of a lea instruction */
367 COSTS_N_INSNS (4), /* variable shift costs */
368 COSTS_N_INSNS (1), /* constant shift costs */
369 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
370 COSTS_N_INSNS (11), /* HI */
371 COSTS_N_INSNS (11), /* SI */
372 COSTS_N_INSNS (11), /* DI */
373 COSTS_N_INSNS (11)}, /* other */
374 0, /* cost of multiply per each bit set */
375 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
376 COSTS_N_INSNS (25), /* HI */
377 COSTS_N_INSNS (25), /* SI */
378 COSTS_N_INSNS (25), /* DI */
379 COSTS_N_INSNS (25)}, /* other */
380 COSTS_N_INSNS (3), /* cost of movsx */
381 COSTS_N_INSNS (2), /* cost of movzx */
382 8, /* "large" insn */
383 6, /* MOVE_RATIO */
384 6, /* cost for loading QImode using movzbl */
385 {2, 4, 2}, /* cost of loading integer registers
386 in QImode, HImode and SImode.
387 Relative to reg-reg move (2). */
388 {2, 4, 2}, /* cost of storing integer registers */
389 2, /* cost of reg,reg fld/fst */
390 {2, 2, 6}, /* cost of loading fp registers
391 in SFmode, DFmode and XFmode */
392 {4, 4, 6}, /* cost of storing fp registers
393 in SFmode, DFmode and XFmode */
394 8, /* cost of moving MMX register */
395 {8, 8}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {8, 8}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 8, 16}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 8, 16}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 3, /* MMX or SSE register to integer */
405 8, /* size of l1 cache. */
406 8, /* size of l2 cache */
407 0, /* size of prefetch block */
408 0, /* number of parallel prefetches */
409 2, /* Branch cost */
410 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
411 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
412 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
413 COSTS_N_INSNS (1), /* cost of FABS instruction. */
414 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
415 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
416 pentium_memcpy,
417 pentium_memset,
418 1, /* scalar_stmt_cost. */
419 1, /* scalar load_cost. */
420 1, /* scalar_store_cost. */
421 1, /* vec_stmt_cost. */
422 1, /* vec_to_scalar_cost. */
423 1, /* scalar_to_vec_cost. */
424 1, /* vec_align_load_cost. */
425 2, /* vec_unalign_load_cost. */
426 1, /* vec_store_cost. */
427 3, /* cond_taken_branch_cost. */
428 1, /* cond_not_taken_branch_cost. */
431 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
432 (we ensure the alignment). For small blocks inline loop is still a
433 noticeable win, for bigger blocks either rep movsl or rep movsb is
434 way to go. Rep movsb has apparently more expensive startup time in CPU,
435 but after 4K the difference is down in the noise. */
436 static stringop_algs pentiumpro_memcpy[2] = {
437 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
438 {8192, rep_prefix_4_byte, false},
439 {-1, rep_prefix_1_byte, false}}},
440 DUMMY_STRINGOP_ALGS};
441 static stringop_algs pentiumpro_memset[2] = {
442 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
443 {8192, rep_prefix_4_byte, false},
444 {-1, libcall, false}}},
445 DUMMY_STRINGOP_ALGS};
446 static const
447 struct processor_costs pentiumpro_cost = {
448 COSTS_N_INSNS (1), /* cost of an add instruction */
449 COSTS_N_INSNS (1), /* cost of a lea instruction */
450 COSTS_N_INSNS (1), /* variable shift costs */
451 COSTS_N_INSNS (1), /* constant shift costs */
452 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
453 COSTS_N_INSNS (4), /* HI */
454 COSTS_N_INSNS (4), /* SI */
455 COSTS_N_INSNS (4), /* DI */
456 COSTS_N_INSNS (4)}, /* other */
457 0, /* cost of multiply per each bit set */
458 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
459 COSTS_N_INSNS (17), /* HI */
460 COSTS_N_INSNS (17), /* SI */
461 COSTS_N_INSNS (17), /* DI */
462 COSTS_N_INSNS (17)}, /* other */
463 COSTS_N_INSNS (1), /* cost of movsx */
464 COSTS_N_INSNS (1), /* cost of movzx */
465 8, /* "large" insn */
466 6, /* MOVE_RATIO */
467 2, /* cost for loading QImode using movzbl */
468 {4, 4, 4}, /* cost of loading integer registers
469 in QImode, HImode and SImode.
470 Relative to reg-reg move (2). */
471 {2, 2, 2}, /* cost of storing integer registers */
472 2, /* cost of reg,reg fld/fst */
473 {2, 2, 6}, /* cost of loading fp registers
474 in SFmode, DFmode and XFmode */
475 {4, 4, 6}, /* cost of storing fp registers
476 in SFmode, DFmode and XFmode */
477 2, /* cost of moving MMX register */
478 {2, 2}, /* cost of loading MMX registers
479 in SImode and DImode */
480 {2, 2}, /* cost of storing MMX registers
481 in SImode and DImode */
482 2, /* cost of moving SSE register */
483 {2, 2, 8}, /* cost of loading SSE registers
484 in SImode, DImode and TImode */
485 {2, 2, 8}, /* cost of storing SSE registers
486 in SImode, DImode and TImode */
487 3, /* MMX or SSE register to integer */
488 8, /* size of l1 cache. */
489 256, /* size of l2 cache */
490 32, /* size of prefetch block */
491 6, /* number of parallel prefetches */
492 2, /* Branch cost */
493 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
494 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
495 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
496 COSTS_N_INSNS (2), /* cost of FABS instruction. */
497 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
498 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
499 pentiumpro_memcpy,
500 pentiumpro_memset,
501 1, /* scalar_stmt_cost. */
502 1, /* scalar load_cost. */
503 1, /* scalar_store_cost. */
504 1, /* vec_stmt_cost. */
505 1, /* vec_to_scalar_cost. */
506 1, /* scalar_to_vec_cost. */
507 1, /* vec_align_load_cost. */
508 2, /* vec_unalign_load_cost. */
509 1, /* vec_store_cost. */
510 3, /* cond_taken_branch_cost. */
511 1, /* cond_not_taken_branch_cost. */
514 static stringop_algs geode_memcpy[2] = {
515 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
516 DUMMY_STRINGOP_ALGS};
517 static stringop_algs geode_memset[2] = {
518 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
519 DUMMY_STRINGOP_ALGS};
520 static const
521 struct processor_costs geode_cost = {
522 COSTS_N_INSNS (1), /* cost of an add instruction */
523 COSTS_N_INSNS (1), /* cost of a lea instruction */
524 COSTS_N_INSNS (2), /* variable shift costs */
525 COSTS_N_INSNS (1), /* constant shift costs */
526 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
527 COSTS_N_INSNS (4), /* HI */
528 COSTS_N_INSNS (7), /* SI */
529 COSTS_N_INSNS (7), /* DI */
530 COSTS_N_INSNS (7)}, /* other */
531 0, /* cost of multiply per each bit set */
532 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
533 COSTS_N_INSNS (23), /* HI */
534 COSTS_N_INSNS (39), /* SI */
535 COSTS_N_INSNS (39), /* DI */
536 COSTS_N_INSNS (39)}, /* other */
537 COSTS_N_INSNS (1), /* cost of movsx */
538 COSTS_N_INSNS (1), /* cost of movzx */
539 8, /* "large" insn */
540 4, /* MOVE_RATIO */
541 1, /* cost for loading QImode using movzbl */
542 {1, 1, 1}, /* cost of loading integer registers
543 in QImode, HImode and SImode.
544 Relative to reg-reg move (2). */
545 {1, 1, 1}, /* cost of storing integer registers */
546 1, /* cost of reg,reg fld/fst */
547 {1, 1, 1}, /* cost of loading fp registers
548 in SFmode, DFmode and XFmode */
549 {4, 6, 6}, /* cost of storing fp registers
550 in SFmode, DFmode and XFmode */
552 1, /* cost of moving MMX register */
553 {1, 1}, /* cost of loading MMX registers
554 in SImode and DImode */
555 {1, 1}, /* cost of storing MMX registers
556 in SImode and DImode */
557 1, /* cost of moving SSE register */
558 {1, 1, 1}, /* cost of loading SSE registers
559 in SImode, DImode and TImode */
560 {1, 1, 1}, /* cost of storing SSE registers
561 in SImode, DImode and TImode */
562 1, /* MMX or SSE register to integer */
563 64, /* size of l1 cache. */
564 128, /* size of l2 cache. */
565 32, /* size of prefetch block */
566 1, /* number of parallel prefetches */
567 1, /* Branch cost */
568 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
569 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
570 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
571 COSTS_N_INSNS (1), /* cost of FABS instruction. */
572 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
573 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
574 geode_memcpy,
575 geode_memset,
576 1, /* scalar_stmt_cost. */
577 1, /* scalar load_cost. */
578 1, /* scalar_store_cost. */
579 1, /* vec_stmt_cost. */
580 1, /* vec_to_scalar_cost. */
581 1, /* scalar_to_vec_cost. */
582 1, /* vec_align_load_cost. */
583 2, /* vec_unalign_load_cost. */
584 1, /* vec_store_cost. */
585 3, /* cond_taken_branch_cost. */
586 1, /* cond_not_taken_branch_cost. */
589 static stringop_algs k6_memcpy[2] = {
590 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
591 DUMMY_STRINGOP_ALGS};
592 static stringop_algs k6_memset[2] = {
593 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
594 DUMMY_STRINGOP_ALGS};
595 static const
596 struct processor_costs k6_cost = {
597 COSTS_N_INSNS (1), /* cost of an add instruction */
598 COSTS_N_INSNS (2), /* cost of a lea instruction */
599 COSTS_N_INSNS (1), /* variable shift costs */
600 COSTS_N_INSNS (1), /* constant shift costs */
601 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
602 COSTS_N_INSNS (3), /* HI */
603 COSTS_N_INSNS (3), /* SI */
604 COSTS_N_INSNS (3), /* DI */
605 COSTS_N_INSNS (3)}, /* other */
606 0, /* cost of multiply per each bit set */
607 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
608 COSTS_N_INSNS (18), /* HI */
609 COSTS_N_INSNS (18), /* SI */
610 COSTS_N_INSNS (18), /* DI */
611 COSTS_N_INSNS (18)}, /* other */
612 COSTS_N_INSNS (2), /* cost of movsx */
613 COSTS_N_INSNS (2), /* cost of movzx */
614 8, /* "large" insn */
615 4, /* MOVE_RATIO */
616 3, /* cost for loading QImode using movzbl */
617 {4, 5, 4}, /* cost of loading integer registers
618 in QImode, HImode and SImode.
619 Relative to reg-reg move (2). */
620 {2, 3, 2}, /* cost of storing integer registers */
621 4, /* cost of reg,reg fld/fst */
622 {6, 6, 6}, /* cost of loading fp registers
623 in SFmode, DFmode and XFmode */
624 {4, 4, 4}, /* cost of storing fp registers
625 in SFmode, DFmode and XFmode */
626 2, /* cost of moving MMX register */
627 {2, 2}, /* cost of loading MMX registers
628 in SImode and DImode */
629 {2, 2}, /* cost of storing MMX registers
630 in SImode and DImode */
631 2, /* cost of moving SSE register */
632 {2, 2, 8}, /* cost of loading SSE registers
633 in SImode, DImode and TImode */
634 {2, 2, 8}, /* cost of storing SSE registers
635 in SImode, DImode and TImode */
636 6, /* MMX or SSE register to integer */
637 32, /* size of l1 cache. */
638 32, /* size of l2 cache. Some models
639 have integrated l2 cache, but
640 optimizing for k6 is not important
641 enough to worry about that. */
642 32, /* size of prefetch block */
643 1, /* number of parallel prefetches */
644 1, /* Branch cost */
645 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
646 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
647 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
648 COSTS_N_INSNS (2), /* cost of FABS instruction. */
649 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
650 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
651 k6_memcpy,
652 k6_memset,
653 1, /* scalar_stmt_cost. */
654 1, /* scalar load_cost. */
655 1, /* scalar_store_cost. */
656 1, /* vec_stmt_cost. */
657 1, /* vec_to_scalar_cost. */
658 1, /* scalar_to_vec_cost. */
659 1, /* vec_align_load_cost. */
660 2, /* vec_unalign_load_cost. */
661 1, /* vec_store_cost. */
662 3, /* cond_taken_branch_cost. */
663 1, /* cond_not_taken_branch_cost. */
666 /* For some reason, Athlon deals better with REP prefix (relative to loops)
667 compared to K8. Alignment becomes important after 8 bytes for memcpy and
668 128 bytes for memset. */
669 static stringop_algs athlon_memcpy[2] = {
670 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
671 DUMMY_STRINGOP_ALGS};
672 static stringop_algs athlon_memset[2] = {
673 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
674 DUMMY_STRINGOP_ALGS};
675 static const
676 struct processor_costs athlon_cost = {
677 COSTS_N_INSNS (1), /* cost of an add instruction */
678 COSTS_N_INSNS (2), /* cost of a lea instruction */
679 COSTS_N_INSNS (1), /* variable shift costs */
680 COSTS_N_INSNS (1), /* constant shift costs */
681 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
682 COSTS_N_INSNS (5), /* HI */
683 COSTS_N_INSNS (5), /* SI */
684 COSTS_N_INSNS (5), /* DI */
685 COSTS_N_INSNS (5)}, /* other */
686 0, /* cost of multiply per each bit set */
687 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
688 COSTS_N_INSNS (26), /* HI */
689 COSTS_N_INSNS (42), /* SI */
690 COSTS_N_INSNS (74), /* DI */
691 COSTS_N_INSNS (74)}, /* other */
692 COSTS_N_INSNS (1), /* cost of movsx */
693 COSTS_N_INSNS (1), /* cost of movzx */
694 8, /* "large" insn */
695 9, /* MOVE_RATIO */
696 4, /* cost for loading QImode using movzbl */
697 {3, 4, 3}, /* cost of loading integer registers
698 in QImode, HImode and SImode.
699 Relative to reg-reg move (2). */
700 {3, 4, 3}, /* cost of storing integer registers */
701 4, /* cost of reg,reg fld/fst */
702 {4, 4, 12}, /* cost of loading fp registers
703 in SFmode, DFmode and XFmode */
704 {6, 6, 8}, /* cost of storing fp registers
705 in SFmode, DFmode and XFmode */
706 2, /* cost of moving MMX register */
707 {4, 4}, /* cost of loading MMX registers
708 in SImode and DImode */
709 {4, 4}, /* cost of storing MMX registers
710 in SImode and DImode */
711 2, /* cost of moving SSE register */
712 {4, 4, 6}, /* cost of loading SSE registers
713 in SImode, DImode and TImode */
714 {4, 4, 5}, /* cost of storing SSE registers
715 in SImode, DImode and TImode */
716 5, /* MMX or SSE register to integer */
717 64, /* size of l1 cache. */
718 256, /* size of l2 cache. */
719 64, /* size of prefetch block */
720 6, /* number of parallel prefetches */
721 5, /* Branch cost */
722 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
723 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
724 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
725 COSTS_N_INSNS (2), /* cost of FABS instruction. */
726 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
727 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
728 athlon_memcpy,
729 athlon_memset,
730 1, /* scalar_stmt_cost. */
731 1, /* scalar load_cost. */
732 1, /* scalar_store_cost. */
733 1, /* vec_stmt_cost. */
734 1, /* vec_to_scalar_cost. */
735 1, /* scalar_to_vec_cost. */
736 1, /* vec_align_load_cost. */
737 2, /* vec_unalign_load_cost. */
738 1, /* vec_store_cost. */
739 3, /* cond_taken_branch_cost. */
740 1, /* cond_not_taken_branch_cost. */
743 /* K8 has optimized REP instruction for medium sized blocks, but for very
744 small blocks it is better to use loop. For large blocks, libcall can
745 do nontemporary accesses and beat inline considerably. */
746 static stringop_algs k8_memcpy[2] = {
747 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
748 {-1, rep_prefix_4_byte, false}}},
749 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
750 {-1, libcall, false}}}};
751 static stringop_algs k8_memset[2] = {
752 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
753 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
754 {libcall, {{48, unrolled_loop, false},
755 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
756 static const
757 struct processor_costs k8_cost = {
758 COSTS_N_INSNS (1), /* cost of an add instruction */
759 COSTS_N_INSNS (2), /* cost of a lea instruction */
760 COSTS_N_INSNS (1), /* variable shift costs */
761 COSTS_N_INSNS (1), /* constant shift costs */
762 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
763 COSTS_N_INSNS (4), /* HI */
764 COSTS_N_INSNS (3), /* SI */
765 COSTS_N_INSNS (4), /* DI */
766 COSTS_N_INSNS (5)}, /* other */
767 0, /* cost of multiply per each bit set */
768 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
769 COSTS_N_INSNS (26), /* HI */
770 COSTS_N_INSNS (42), /* SI */
771 COSTS_N_INSNS (74), /* DI */
772 COSTS_N_INSNS (74)}, /* other */
773 COSTS_N_INSNS (1), /* cost of movsx */
774 COSTS_N_INSNS (1), /* cost of movzx */
775 8, /* "large" insn */
776 9, /* MOVE_RATIO */
777 4, /* cost for loading QImode using movzbl */
778 {3, 4, 3}, /* cost of loading integer registers
779 in QImode, HImode and SImode.
780 Relative to reg-reg move (2). */
781 {3, 4, 3}, /* cost of storing integer registers */
782 4, /* cost of reg,reg fld/fst */
783 {4, 4, 12}, /* cost of loading fp registers
784 in SFmode, DFmode and XFmode */
785 {6, 6, 8}, /* cost of storing fp registers
786 in SFmode, DFmode and XFmode */
787 2, /* cost of moving MMX register */
788 {3, 3}, /* cost of loading MMX registers
789 in SImode and DImode */
790 {4, 4}, /* cost of storing MMX registers
791 in SImode and DImode */
792 2, /* cost of moving SSE register */
793 {4, 3, 6}, /* cost of loading SSE registers
794 in SImode, DImode and TImode */
795 {4, 4, 5}, /* cost of storing SSE registers
796 in SImode, DImode and TImode */
797 5, /* MMX or SSE register to integer */
798 64, /* size of l1 cache. */
799 512, /* size of l2 cache. */
800 64, /* size of prefetch block */
801 /* New AMD processors never drop prefetches; if they cannot be performed
802 immediately, they are queued. We set number of simultaneous prefetches
803 to a large constant to reflect this (it probably is not a good idea not
804 to limit number of prefetches at all, as their execution also takes some
805 time). */
806 100, /* number of parallel prefetches */
807 3, /* Branch cost */
808 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
809 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
810 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
811 COSTS_N_INSNS (2), /* cost of FABS instruction. */
812 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
813 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
815 k8_memcpy,
816 k8_memset,
817 4, /* scalar_stmt_cost. */
818 2, /* scalar load_cost. */
819 2, /* scalar_store_cost. */
820 5, /* vec_stmt_cost. */
821 0, /* vec_to_scalar_cost. */
822 2, /* scalar_to_vec_cost. */
823 2, /* vec_align_load_cost. */
824 3, /* vec_unalign_load_cost. */
825 3, /* vec_store_cost. */
826 3, /* cond_taken_branch_cost. */
827 2, /* cond_not_taken_branch_cost. */
830 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
831 very small blocks it is better to use loop. For large blocks, libcall can
832 do nontemporary accesses and beat inline considerably. */
833 static stringop_algs amdfam10_memcpy[2] = {
834 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
835 {-1, rep_prefix_4_byte, false}}},
836 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
837 {-1, libcall, false}}}};
838 static stringop_algs amdfam10_memset[2] = {
839 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
840 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
841 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
842 {-1, libcall, false}}}};
843 struct processor_costs amdfam10_cost = {
844 COSTS_N_INSNS (1), /* cost of an add instruction */
845 COSTS_N_INSNS (2), /* cost of a lea instruction */
846 COSTS_N_INSNS (1), /* variable shift costs */
847 COSTS_N_INSNS (1), /* constant shift costs */
848 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
849 COSTS_N_INSNS (4), /* HI */
850 COSTS_N_INSNS (3), /* SI */
851 COSTS_N_INSNS (4), /* DI */
852 COSTS_N_INSNS (5)}, /* other */
853 0, /* cost of multiply per each bit set */
854 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
855 COSTS_N_INSNS (35), /* HI */
856 COSTS_N_INSNS (51), /* SI */
857 COSTS_N_INSNS (83), /* DI */
858 COSTS_N_INSNS (83)}, /* other */
859 COSTS_N_INSNS (1), /* cost of movsx */
860 COSTS_N_INSNS (1), /* cost of movzx */
861 8, /* "large" insn */
862 9, /* MOVE_RATIO */
863 4, /* cost for loading QImode using movzbl */
864 {3, 4, 3}, /* cost of loading integer registers
865 in QImode, HImode and SImode.
866 Relative to reg-reg move (2). */
867 {3, 4, 3}, /* cost of storing integer registers */
868 4, /* cost of reg,reg fld/fst */
869 {4, 4, 12}, /* cost of loading fp registers
870 in SFmode, DFmode and XFmode */
871 {6, 6, 8}, /* cost of storing fp registers
872 in SFmode, DFmode and XFmode */
873 2, /* cost of moving MMX register */
874 {3, 3}, /* cost of loading MMX registers
875 in SImode and DImode */
876 {4, 4}, /* cost of storing MMX registers
877 in SImode and DImode */
878 2, /* cost of moving SSE register */
879 {4, 4, 3}, /* cost of loading SSE registers
880 in SImode, DImode and TImode */
881 {4, 4, 5}, /* cost of storing SSE registers
882 in SImode, DImode and TImode */
883 3, /* MMX or SSE register to integer */
884 /* On K8:
885 MOVD reg64, xmmreg Double FSTORE 4
886 MOVD reg32, xmmreg Double FSTORE 4
887 On AMDFAM10:
888 MOVD reg64, xmmreg Double FADD 3
889 1/1 1/1
890 MOVD reg32, xmmreg Double FADD 3
891 1/1 1/1 */
892 64, /* size of l1 cache. */
893 512, /* size of l2 cache. */
894 64, /* size of prefetch block */
895 /* New AMD processors never drop prefetches; if they cannot be performed
896 immediately, they are queued. We set number of simultaneous prefetches
897 to a large constant to reflect this (it probably is not a good idea not
898 to limit number of prefetches at all, as their execution also takes some
899 time). */
900 100, /* number of parallel prefetches */
901 2, /* Branch cost */
902 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
903 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
904 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
905 COSTS_N_INSNS (2), /* cost of FABS instruction. */
906 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
907 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
909 amdfam10_memcpy,
910 amdfam10_memset,
911 4, /* scalar_stmt_cost. */
912 2, /* scalar load_cost. */
913 2, /* scalar_store_cost. */
914 6, /* vec_stmt_cost. */
915 0, /* vec_to_scalar_cost. */
916 2, /* scalar_to_vec_cost. */
917 2, /* vec_align_load_cost. */
918 2, /* vec_unalign_load_cost. */
919 2, /* vec_store_cost. */
920 2, /* cond_taken_branch_cost. */
921 1, /* cond_not_taken_branch_cost. */
924 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
925 very small blocks it is better to use loop. For large blocks, libcall
926 can do nontemporary accesses and beat inline considerably. */
927 static stringop_algs bdver1_memcpy[2] = {
928 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
929 {-1, rep_prefix_4_byte, false}}},
930 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
931 {-1, libcall, false}}}};
932 static stringop_algs bdver1_memset[2] = {
933 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
934 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
935 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
936 {-1, libcall, false}}}};
938 const struct processor_costs bdver1_cost = {
939 COSTS_N_INSNS (1), /* cost of an add instruction */
940 COSTS_N_INSNS (1), /* cost of a lea instruction */
941 COSTS_N_INSNS (1), /* variable shift costs */
942 COSTS_N_INSNS (1), /* constant shift costs */
943 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
944 COSTS_N_INSNS (4), /* HI */
945 COSTS_N_INSNS (4), /* SI */
946 COSTS_N_INSNS (6), /* DI */
947 COSTS_N_INSNS (6)}, /* other */
948 0, /* cost of multiply per each bit set */
949 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
950 COSTS_N_INSNS (35), /* HI */
951 COSTS_N_INSNS (51), /* SI */
952 COSTS_N_INSNS (83), /* DI */
953 COSTS_N_INSNS (83)}, /* other */
954 COSTS_N_INSNS (1), /* cost of movsx */
955 COSTS_N_INSNS (1), /* cost of movzx */
956 8, /* "large" insn */
957 9, /* MOVE_RATIO */
958 4, /* cost for loading QImode using movzbl */
959 {5, 5, 4}, /* cost of loading integer registers
960 in QImode, HImode and SImode.
961 Relative to reg-reg move (2). */
962 {4, 4, 4}, /* cost of storing integer registers */
963 2, /* cost of reg,reg fld/fst */
964 {5, 5, 12}, /* cost of loading fp registers
965 in SFmode, DFmode and XFmode */
966 {4, 4, 8}, /* cost of storing fp registers
967 in SFmode, DFmode and XFmode */
968 2, /* cost of moving MMX register */
969 {4, 4}, /* cost of loading MMX registers
970 in SImode and DImode */
971 {4, 4}, /* cost of storing MMX registers
972 in SImode and DImode */
973 2, /* cost of moving SSE register */
974 {4, 4, 4}, /* cost of loading SSE registers
975 in SImode, DImode and TImode */
976 {4, 4, 4}, /* cost of storing SSE registers
977 in SImode, DImode and TImode */
978 2, /* MMX or SSE register to integer */
979 /* On K8:
980 MOVD reg64, xmmreg Double FSTORE 4
981 MOVD reg32, xmmreg Double FSTORE 4
982 On AMDFAM10:
983 MOVD reg64, xmmreg Double FADD 3
984 1/1 1/1
985 MOVD reg32, xmmreg Double FADD 3
986 1/1 1/1 */
987 16, /* size of l1 cache. */
988 2048, /* size of l2 cache. */
989 64, /* size of prefetch block */
990 /* New AMD processors never drop prefetches; if they cannot be performed
991 immediately, they are queued. We set number of simultaneous prefetches
992 to a large constant to reflect this (it probably is not a good idea not
993 to limit number of prefetches at all, as their execution also takes some
994 time). */
995 100, /* number of parallel prefetches */
996 2, /* Branch cost */
997 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
998 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
999 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1000 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1001 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1002 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1004 bdver1_memcpy,
1005 bdver1_memset,
1006 6, /* scalar_stmt_cost. */
1007 4, /* scalar load_cost. */
1008 4, /* scalar_store_cost. */
1009 6, /* vec_stmt_cost. */
1010 0, /* vec_to_scalar_cost. */
1011 2, /* scalar_to_vec_cost. */
1012 4, /* vec_align_load_cost. */
1013 4, /* vec_unalign_load_cost. */
1014 4, /* vec_store_cost. */
1015 2, /* cond_taken_branch_cost. */
1016 1, /* cond_not_taken_branch_cost. */
1019 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1020 very small blocks it is better to use loop. For large blocks, libcall
1021 can do nontemporary accesses and beat inline considerably. */
1023 static stringop_algs bdver2_memcpy[2] = {
1024 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1025 {-1, rep_prefix_4_byte, false}}},
1026 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1027 {-1, libcall, false}}}};
1028 static stringop_algs bdver2_memset[2] = {
1029 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1030 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1031 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1032 {-1, libcall, false}}}};
1034 const struct processor_costs bdver2_cost = {
1035 COSTS_N_INSNS (1), /* cost of an add instruction */
1036 COSTS_N_INSNS (1), /* cost of a lea instruction */
1037 COSTS_N_INSNS (1), /* variable shift costs */
1038 COSTS_N_INSNS (1), /* constant shift costs */
1039 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1040 COSTS_N_INSNS (4), /* HI */
1041 COSTS_N_INSNS (4), /* SI */
1042 COSTS_N_INSNS (6), /* DI */
1043 COSTS_N_INSNS (6)}, /* other */
1044 0, /* cost of multiply per each bit set */
1045 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1046 COSTS_N_INSNS (35), /* HI */
1047 COSTS_N_INSNS (51), /* SI */
1048 COSTS_N_INSNS (83), /* DI */
1049 COSTS_N_INSNS (83)}, /* other */
1050 COSTS_N_INSNS (1), /* cost of movsx */
1051 COSTS_N_INSNS (1), /* cost of movzx */
1052 8, /* "large" insn */
1053 9, /* MOVE_RATIO */
1054 4, /* cost for loading QImode using movzbl */
1055 {5, 5, 4}, /* cost of loading integer registers
1056 in QImode, HImode and SImode.
1057 Relative to reg-reg move (2). */
1058 {4, 4, 4}, /* cost of storing integer registers */
1059 2, /* cost of reg,reg fld/fst */
1060 {5, 5, 12}, /* cost of loading fp registers
1061 in SFmode, DFmode and XFmode */
1062 {4, 4, 8}, /* cost of storing fp registers
1063 in SFmode, DFmode and XFmode */
1064 2, /* cost of moving MMX register */
1065 {4, 4}, /* cost of loading MMX registers
1066 in SImode and DImode */
1067 {4, 4}, /* cost of storing MMX registers
1068 in SImode and DImode */
1069 2, /* cost of moving SSE register */
1070 {4, 4, 4}, /* cost of loading SSE registers
1071 in SImode, DImode and TImode */
1072 {4, 4, 4}, /* cost of storing SSE registers
1073 in SImode, DImode and TImode */
1074 2, /* MMX or SSE register to integer */
1075 /* On K8:
1076 MOVD reg64, xmmreg Double FSTORE 4
1077 MOVD reg32, xmmreg Double FSTORE 4
1078 On AMDFAM10:
1079 MOVD reg64, xmmreg Double FADD 3
1080 1/1 1/1
1081 MOVD reg32, xmmreg Double FADD 3
1082 1/1 1/1 */
1083 16, /* size of l1 cache. */
1084 2048, /* size of l2 cache. */
1085 64, /* size of prefetch block */
1086 /* New AMD processors never drop prefetches; if they cannot be performed
1087 immediately, they are queued. We set number of simultaneous prefetches
1088 to a large constant to reflect this (it probably is not a good idea not
1089 to limit number of prefetches at all, as their execution also takes some
1090 time). */
1091 100, /* number of parallel prefetches */
1092 2, /* Branch cost */
1093 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1094 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1095 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1096 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1097 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1098 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1100 bdver2_memcpy,
1101 bdver2_memset,
1102 6, /* scalar_stmt_cost. */
1103 4, /* scalar load_cost. */
1104 4, /* scalar_store_cost. */
1105 6, /* vec_stmt_cost. */
1106 0, /* vec_to_scalar_cost. */
1107 2, /* scalar_to_vec_cost. */
1108 4, /* vec_align_load_cost. */
1109 4, /* vec_unalign_load_cost. */
1110 4, /* vec_store_cost. */
1111 2, /* cond_taken_branch_cost. */
1112 1, /* cond_not_taken_branch_cost. */
1116 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1117 very small blocks it is better to use loop. For large blocks, libcall
1118 can do nontemporary accesses and beat inline considerably. */
1119 static stringop_algs bdver3_memcpy[2] = {
1120 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1121 {-1, rep_prefix_4_byte, false}}},
1122 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1123 {-1, libcall, false}}}};
1124 static stringop_algs bdver3_memset[2] = {
1125 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1126 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1127 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1128 {-1, libcall, false}}}};
1129 struct processor_costs bdver3_cost = {
1130 COSTS_N_INSNS (1), /* cost of an add instruction */
1131 COSTS_N_INSNS (1), /* cost of a lea instruction */
1132 COSTS_N_INSNS (1), /* variable shift costs */
1133 COSTS_N_INSNS (1), /* constant shift costs */
1134 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1135 COSTS_N_INSNS (4), /* HI */
1136 COSTS_N_INSNS (4), /* SI */
1137 COSTS_N_INSNS (6), /* DI */
1138 COSTS_N_INSNS (6)}, /* other */
1139 0, /* cost of multiply per each bit set */
1140 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1141 COSTS_N_INSNS (35), /* HI */
1142 COSTS_N_INSNS (51), /* SI */
1143 COSTS_N_INSNS (83), /* DI */
1144 COSTS_N_INSNS (83)}, /* other */
1145 COSTS_N_INSNS (1), /* cost of movsx */
1146 COSTS_N_INSNS (1), /* cost of movzx */
1147 8, /* "large" insn */
1148 9, /* MOVE_RATIO */
1149 4, /* cost for loading QImode using movzbl */
1150 {5, 5, 4}, /* cost of loading integer registers
1151 in QImode, HImode and SImode.
1152 Relative to reg-reg move (2). */
1153 {4, 4, 4}, /* cost of storing integer registers */
1154 2, /* cost of reg,reg fld/fst */
1155 {5, 5, 12}, /* cost of loading fp registers
1156 in SFmode, DFmode and XFmode */
1157 {4, 4, 8}, /* cost of storing fp registers
1158 in SFmode, DFmode and XFmode */
1159 2, /* cost of moving MMX register */
1160 {4, 4}, /* cost of loading MMX registers
1161 in SImode and DImode */
1162 {4, 4}, /* cost of storing MMX registers
1163 in SImode and DImode */
1164 2, /* cost of moving SSE register */
1165 {4, 4, 4}, /* cost of loading SSE registers
1166 in SImode, DImode and TImode */
1167 {4, 4, 4}, /* cost of storing SSE registers
1168 in SImode, DImode and TImode */
1169 2, /* MMX or SSE register to integer */
1170 16, /* size of l1 cache. */
1171 2048, /* size of l2 cache. */
1172 64, /* size of prefetch block */
1173 /* New AMD processors never drop prefetches; if they cannot be performed
1174 immediately, they are queued. We set number of simultaneous prefetches
1175 to a large constant to reflect this (it probably is not a good idea not
1176 to limit number of prefetches at all, as their execution also takes some
1177 time). */
1178 100, /* number of parallel prefetches */
1179 2, /* Branch cost */
1180 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1181 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1182 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1183 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1184 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1185 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1187 bdver3_memcpy,
1188 bdver3_memset,
1189 6, /* scalar_stmt_cost. */
1190 4, /* scalar load_cost. */
1191 4, /* scalar_store_cost. */
1192 6, /* vec_stmt_cost. */
1193 0, /* vec_to_scalar_cost. */
1194 2, /* scalar_to_vec_cost. */
1195 4, /* vec_align_load_cost. */
1196 4, /* vec_unalign_load_cost. */
1197 4, /* vec_store_cost. */
1198 2, /* cond_taken_branch_cost. */
1199 1, /* cond_not_taken_branch_cost. */
1202 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1203 very small blocks it is better to use loop. For large blocks, libcall
1204 can do nontemporary accesses and beat inline considerably. */
1205 static stringop_algs bdver4_memcpy[2] = {
1206 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1207 {-1, rep_prefix_4_byte, false}}},
1208 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1209 {-1, libcall, false}}}};
1210 static stringop_algs bdver4_memset[2] = {
1211 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1212 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1213 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1214 {-1, libcall, false}}}};
1215 struct processor_costs bdver4_cost = {
1216 COSTS_N_INSNS (1), /* cost of an add instruction */
1217 COSTS_N_INSNS (1), /* cost of a lea instruction */
1218 COSTS_N_INSNS (1), /* variable shift costs */
1219 COSTS_N_INSNS (1), /* constant shift costs */
1220 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1221 COSTS_N_INSNS (4), /* HI */
1222 COSTS_N_INSNS (4), /* SI */
1223 COSTS_N_INSNS (6), /* DI */
1224 COSTS_N_INSNS (6)}, /* other */
1225 0, /* cost of multiply per each bit set */
1226 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1227 COSTS_N_INSNS (35), /* HI */
1228 COSTS_N_INSNS (51), /* SI */
1229 COSTS_N_INSNS (83), /* DI */
1230 COSTS_N_INSNS (83)}, /* other */
1231 COSTS_N_INSNS (1), /* cost of movsx */
1232 COSTS_N_INSNS (1), /* cost of movzx */
1233 8, /* "large" insn */
1234 9, /* MOVE_RATIO */
1235 4, /* cost for loading QImode using movzbl */
1236 {5, 5, 4}, /* cost of loading integer registers
1237 in QImode, HImode and SImode.
1238 Relative to reg-reg move (2). */
1239 {4, 4, 4}, /* cost of storing integer registers */
1240 2, /* cost of reg,reg fld/fst */
1241 {5, 5, 12}, /* cost of loading fp registers
1242 in SFmode, DFmode and XFmode */
1243 {4, 4, 8}, /* cost of storing fp registers
1244 in SFmode, DFmode and XFmode */
1245 2, /* cost of moving MMX register */
1246 {4, 4}, /* cost of loading MMX registers
1247 in SImode and DImode */
1248 {4, 4}, /* cost of storing MMX registers
1249 in SImode and DImode */
1250 2, /* cost of moving SSE register */
1251 {4, 4, 4}, /* cost of loading SSE registers
1252 in SImode, DImode and TImode */
1253 {4, 4, 4}, /* cost of storing SSE registers
1254 in SImode, DImode and TImode */
1255 2, /* MMX or SSE register to integer */
1256 16, /* size of l1 cache. */
1257 2048, /* size of l2 cache. */
1258 64, /* size of prefetch block */
1259 /* New AMD processors never drop prefetches; if they cannot be performed
1260 immediately, they are queued. We set number of simultaneous prefetches
1261 to a large constant to reflect this (it probably is not a good idea not
1262 to limit number of prefetches at all, as their execution also takes some
1263 time). */
1264 100, /* number of parallel prefetches */
1265 2, /* Branch cost */
1266 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1267 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1268 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1269 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1270 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1271 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1273 bdver4_memcpy,
1274 bdver4_memset,
1275 6, /* scalar_stmt_cost. */
1276 4, /* scalar load_cost. */
1277 4, /* scalar_store_cost. */
1278 6, /* vec_stmt_cost. */
1279 0, /* vec_to_scalar_cost. */
1280 2, /* scalar_to_vec_cost. */
1281 4, /* vec_align_load_cost. */
1282 4, /* vec_unalign_load_cost. */
1283 4, /* vec_store_cost. */
1284 2, /* cond_taken_branch_cost. */
1285 1, /* cond_not_taken_branch_cost. */
1288 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1289 very small blocks it is better to use loop. For large blocks, libcall can
1290 do nontemporary accesses and beat inline considerably. */
1291 static stringop_algs btver1_memcpy[2] = {
1292 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1293 {-1, rep_prefix_4_byte, false}}},
1294 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1295 {-1, libcall, false}}}};
1296 static stringop_algs btver1_memset[2] = {
1297 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1298 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1299 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1300 {-1, libcall, false}}}};
1301 const struct processor_costs btver1_cost = {
1302 COSTS_N_INSNS (1), /* cost of an add instruction */
1303 COSTS_N_INSNS (2), /* cost of a lea instruction */
1304 COSTS_N_INSNS (1), /* variable shift costs */
1305 COSTS_N_INSNS (1), /* constant shift costs */
1306 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1307 COSTS_N_INSNS (4), /* HI */
1308 COSTS_N_INSNS (3), /* SI */
1309 COSTS_N_INSNS (4), /* DI */
1310 COSTS_N_INSNS (5)}, /* other */
1311 0, /* cost of multiply per each bit set */
1312 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1313 COSTS_N_INSNS (35), /* HI */
1314 COSTS_N_INSNS (51), /* SI */
1315 COSTS_N_INSNS (83), /* DI */
1316 COSTS_N_INSNS (83)}, /* other */
1317 COSTS_N_INSNS (1), /* cost of movsx */
1318 COSTS_N_INSNS (1), /* cost of movzx */
1319 8, /* "large" insn */
1320 9, /* MOVE_RATIO */
1321 4, /* cost for loading QImode using movzbl */
1322 {3, 4, 3}, /* cost of loading integer registers
1323 in QImode, HImode and SImode.
1324 Relative to reg-reg move (2). */
1325 {3, 4, 3}, /* cost of storing integer registers */
1326 4, /* cost of reg,reg fld/fst */
1327 {4, 4, 12}, /* cost of loading fp registers
1328 in SFmode, DFmode and XFmode */
1329 {6, 6, 8}, /* cost of storing fp registers
1330 in SFmode, DFmode and XFmode */
1331 2, /* cost of moving MMX register */
1332 {3, 3}, /* cost of loading MMX registers
1333 in SImode and DImode */
1334 {4, 4}, /* cost of storing MMX registers
1335 in SImode and DImode */
1336 2, /* cost of moving SSE register */
1337 {4, 4, 3}, /* cost of loading SSE registers
1338 in SImode, DImode and TImode */
1339 {4, 4, 5}, /* cost of storing SSE registers
1340 in SImode, DImode and TImode */
1341 3, /* MMX or SSE register to integer */
1342 /* On K8:
1343 MOVD reg64, xmmreg Double FSTORE 4
1344 MOVD reg32, xmmreg Double FSTORE 4
1345 On AMDFAM10:
1346 MOVD reg64, xmmreg Double FADD 3
1347 1/1 1/1
1348 MOVD reg32, xmmreg Double FADD 3
1349 1/1 1/1 */
1350 32, /* size of l1 cache. */
1351 512, /* size of l2 cache. */
1352 64, /* size of prefetch block */
1353 100, /* number of parallel prefetches */
1354 2, /* Branch cost */
1355 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1356 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1357 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1358 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1359 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1360 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1362 btver1_memcpy,
1363 btver1_memset,
1364 4, /* scalar_stmt_cost. */
1365 2, /* scalar load_cost. */
1366 2, /* scalar_store_cost. */
1367 6, /* vec_stmt_cost. */
1368 0, /* vec_to_scalar_cost. */
1369 2, /* scalar_to_vec_cost. */
1370 2, /* vec_align_load_cost. */
1371 2, /* vec_unalign_load_cost. */
1372 2, /* vec_store_cost. */
1373 2, /* cond_taken_branch_cost. */
1374 1, /* cond_not_taken_branch_cost. */
1377 static stringop_algs btver2_memcpy[2] = {
1378 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1379 {-1, rep_prefix_4_byte, false}}},
1380 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1381 {-1, libcall, false}}}};
1382 static stringop_algs btver2_memset[2] = {
1383 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1384 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1385 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1386 {-1, libcall, false}}}};
1387 const struct processor_costs btver2_cost = {
1388 COSTS_N_INSNS (1), /* cost of an add instruction */
1389 COSTS_N_INSNS (2), /* cost of a lea instruction */
1390 COSTS_N_INSNS (1), /* variable shift costs */
1391 COSTS_N_INSNS (1), /* constant shift costs */
1392 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1393 COSTS_N_INSNS (4), /* HI */
1394 COSTS_N_INSNS (3), /* SI */
1395 COSTS_N_INSNS (4), /* DI */
1396 COSTS_N_INSNS (5)}, /* other */
1397 0, /* cost of multiply per each bit set */
1398 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1399 COSTS_N_INSNS (35), /* HI */
1400 COSTS_N_INSNS (51), /* SI */
1401 COSTS_N_INSNS (83), /* DI */
1402 COSTS_N_INSNS (83)}, /* other */
1403 COSTS_N_INSNS (1), /* cost of movsx */
1404 COSTS_N_INSNS (1), /* cost of movzx */
1405 8, /* "large" insn */
1406 9, /* MOVE_RATIO */
1407 4, /* cost for loading QImode using movzbl */
1408 {3, 4, 3}, /* cost of loading integer registers
1409 in QImode, HImode and SImode.
1410 Relative to reg-reg move (2). */
1411 {3, 4, 3}, /* cost of storing integer registers */
1412 4, /* cost of reg,reg fld/fst */
1413 {4, 4, 12}, /* cost of loading fp registers
1414 in SFmode, DFmode and XFmode */
1415 {6, 6, 8}, /* cost of storing fp registers
1416 in SFmode, DFmode and XFmode */
1417 2, /* cost of moving MMX register */
1418 {3, 3}, /* cost of loading MMX registers
1419 in SImode and DImode */
1420 {4, 4}, /* cost of storing MMX registers
1421 in SImode and DImode */
1422 2, /* cost of moving SSE register */
1423 {4, 4, 3}, /* cost of loading SSE registers
1424 in SImode, DImode and TImode */
1425 {4, 4, 5}, /* cost of storing SSE registers
1426 in SImode, DImode and TImode */
1427 3, /* MMX or SSE register to integer */
1428 /* On K8:
1429 MOVD reg64, xmmreg Double FSTORE 4
1430 MOVD reg32, xmmreg Double FSTORE 4
1431 On AMDFAM10:
1432 MOVD reg64, xmmreg Double FADD 3
1433 1/1 1/1
1434 MOVD reg32, xmmreg Double FADD 3
1435 1/1 1/1 */
1436 32, /* size of l1 cache. */
1437 2048, /* size of l2 cache. */
1438 64, /* size of prefetch block */
1439 100, /* number of parallel prefetches */
1440 2, /* Branch cost */
1441 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1442 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1443 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1444 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1445 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1446 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1447 btver2_memcpy,
1448 btver2_memset,
1449 4, /* scalar_stmt_cost. */
1450 2, /* scalar load_cost. */
1451 2, /* scalar_store_cost. */
1452 6, /* vec_stmt_cost. */
1453 0, /* vec_to_scalar_cost. */
1454 2, /* scalar_to_vec_cost. */
1455 2, /* vec_align_load_cost. */
1456 2, /* vec_unalign_load_cost. */
1457 2, /* vec_store_cost. */
1458 2, /* cond_taken_branch_cost. */
1459 1, /* cond_not_taken_branch_cost. */
1462 static stringop_algs pentium4_memcpy[2] = {
1463 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1464 DUMMY_STRINGOP_ALGS};
1465 static stringop_algs pentium4_memset[2] = {
1466 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1467 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1468 DUMMY_STRINGOP_ALGS};
1470 static const
1471 struct processor_costs pentium4_cost = {
1472 COSTS_N_INSNS (1), /* cost of an add instruction */
1473 COSTS_N_INSNS (3), /* cost of a lea instruction */
1474 COSTS_N_INSNS (4), /* variable shift costs */
1475 COSTS_N_INSNS (4), /* constant shift costs */
1476 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1477 COSTS_N_INSNS (15), /* HI */
1478 COSTS_N_INSNS (15), /* SI */
1479 COSTS_N_INSNS (15), /* DI */
1480 COSTS_N_INSNS (15)}, /* other */
1481 0, /* cost of multiply per each bit set */
1482 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1483 COSTS_N_INSNS (56), /* HI */
1484 COSTS_N_INSNS (56), /* SI */
1485 COSTS_N_INSNS (56), /* DI */
1486 COSTS_N_INSNS (56)}, /* other */
1487 COSTS_N_INSNS (1), /* cost of movsx */
1488 COSTS_N_INSNS (1), /* cost of movzx */
1489 16, /* "large" insn */
1490 6, /* MOVE_RATIO */
1491 2, /* cost for loading QImode using movzbl */
1492 {4, 5, 4}, /* cost of loading integer registers
1493 in QImode, HImode and SImode.
1494 Relative to reg-reg move (2). */
1495 {2, 3, 2}, /* cost of storing integer registers */
1496 2, /* cost of reg,reg fld/fst */
1497 {2, 2, 6}, /* cost of loading fp registers
1498 in SFmode, DFmode and XFmode */
1499 {4, 4, 6}, /* cost of storing fp registers
1500 in SFmode, DFmode and XFmode */
1501 2, /* cost of moving MMX register */
1502 {2, 2}, /* cost of loading MMX registers
1503 in SImode and DImode */
1504 {2, 2}, /* cost of storing MMX registers
1505 in SImode and DImode */
1506 12, /* cost of moving SSE register */
1507 {12, 12, 12}, /* cost of loading SSE registers
1508 in SImode, DImode and TImode */
1509 {2, 2, 8}, /* cost of storing SSE registers
1510 in SImode, DImode and TImode */
1511 10, /* MMX or SSE register to integer */
1512 8, /* size of l1 cache. */
1513 256, /* size of l2 cache. */
1514 64, /* size of prefetch block */
1515 6, /* number of parallel prefetches */
1516 2, /* Branch cost */
1517 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1518 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1519 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1520 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1521 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1522 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1523 pentium4_memcpy,
1524 pentium4_memset,
1525 1, /* scalar_stmt_cost. */
1526 1, /* scalar load_cost. */
1527 1, /* scalar_store_cost. */
1528 1, /* vec_stmt_cost. */
1529 1, /* vec_to_scalar_cost. */
1530 1, /* scalar_to_vec_cost. */
1531 1, /* vec_align_load_cost. */
1532 2, /* vec_unalign_load_cost. */
1533 1, /* vec_store_cost. */
1534 3, /* cond_taken_branch_cost. */
1535 1, /* cond_not_taken_branch_cost. */
1538 static stringop_algs nocona_memcpy[2] = {
1539 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1540 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1541 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1543 static stringop_algs nocona_memset[2] = {
1544 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1545 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1546 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1547 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1549 static const
1550 struct processor_costs nocona_cost = {
1551 COSTS_N_INSNS (1), /* cost of an add instruction */
1552 COSTS_N_INSNS (1), /* cost of a lea instruction */
1553 COSTS_N_INSNS (1), /* variable shift costs */
1554 COSTS_N_INSNS (1), /* constant shift costs */
1555 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1556 COSTS_N_INSNS (10), /* HI */
1557 COSTS_N_INSNS (10), /* SI */
1558 COSTS_N_INSNS (10), /* DI */
1559 COSTS_N_INSNS (10)}, /* other */
1560 0, /* cost of multiply per each bit set */
1561 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1562 COSTS_N_INSNS (66), /* HI */
1563 COSTS_N_INSNS (66), /* SI */
1564 COSTS_N_INSNS (66), /* DI */
1565 COSTS_N_INSNS (66)}, /* other */
1566 COSTS_N_INSNS (1), /* cost of movsx */
1567 COSTS_N_INSNS (1), /* cost of movzx */
1568 16, /* "large" insn */
1569 17, /* MOVE_RATIO */
1570 4, /* cost for loading QImode using movzbl */
1571 {4, 4, 4}, /* cost of loading integer registers
1572 in QImode, HImode and SImode.
1573 Relative to reg-reg move (2). */
1574 {4, 4, 4}, /* cost of storing integer registers */
1575 3, /* cost of reg,reg fld/fst */
1576 {12, 12, 12}, /* cost of loading fp registers
1577 in SFmode, DFmode and XFmode */
1578 {4, 4, 4}, /* cost of storing fp registers
1579 in SFmode, DFmode and XFmode */
1580 6, /* cost of moving MMX register */
1581 {12, 12}, /* cost of loading MMX registers
1582 in SImode and DImode */
1583 {12, 12}, /* cost of storing MMX registers
1584 in SImode and DImode */
1585 6, /* cost of moving SSE register */
1586 {12, 12, 12}, /* cost of loading SSE registers
1587 in SImode, DImode and TImode */
1588 {12, 12, 12}, /* cost of storing SSE registers
1589 in SImode, DImode and TImode */
1590 8, /* MMX or SSE register to integer */
1591 8, /* size of l1 cache. */
1592 1024, /* size of l2 cache. */
1593 64, /* size of prefetch block */
1594 8, /* number of parallel prefetches */
1595 1, /* Branch cost */
1596 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1597 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1598 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1599 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1600 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1601 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1602 nocona_memcpy,
1603 nocona_memset,
1604 1, /* scalar_stmt_cost. */
1605 1, /* scalar load_cost. */
1606 1, /* scalar_store_cost. */
1607 1, /* vec_stmt_cost. */
1608 1, /* vec_to_scalar_cost. */
1609 1, /* scalar_to_vec_cost. */
1610 1, /* vec_align_load_cost. */
1611 2, /* vec_unalign_load_cost. */
1612 1, /* vec_store_cost. */
1613 3, /* cond_taken_branch_cost. */
1614 1, /* cond_not_taken_branch_cost. */
1617 static stringop_algs atom_memcpy[2] = {
1618 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1619 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1620 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1621 static stringop_algs atom_memset[2] = {
1622 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1623 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1624 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1625 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1626 static const
1627 struct processor_costs atom_cost = {
1628 COSTS_N_INSNS (1), /* cost of an add instruction */
1629 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1630 COSTS_N_INSNS (1), /* variable shift costs */
1631 COSTS_N_INSNS (1), /* constant shift costs */
1632 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1633 COSTS_N_INSNS (4), /* HI */
1634 COSTS_N_INSNS (3), /* SI */
1635 COSTS_N_INSNS (4), /* DI */
1636 COSTS_N_INSNS (2)}, /* other */
1637 0, /* cost of multiply per each bit set */
1638 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1639 COSTS_N_INSNS (26), /* HI */
1640 COSTS_N_INSNS (42), /* SI */
1641 COSTS_N_INSNS (74), /* DI */
1642 COSTS_N_INSNS (74)}, /* other */
1643 COSTS_N_INSNS (1), /* cost of movsx */
1644 COSTS_N_INSNS (1), /* cost of movzx */
1645 8, /* "large" insn */
1646 17, /* MOVE_RATIO */
1647 4, /* cost for loading QImode using movzbl */
1648 {4, 4, 4}, /* cost of loading integer registers
1649 in QImode, HImode and SImode.
1650 Relative to reg-reg move (2). */
1651 {4, 4, 4}, /* cost of storing integer registers */
1652 4, /* cost of reg,reg fld/fst */
1653 {12, 12, 12}, /* cost of loading fp registers
1654 in SFmode, DFmode and XFmode */
1655 {6, 6, 8}, /* cost of storing fp registers
1656 in SFmode, DFmode and XFmode */
1657 2, /* cost of moving MMX register */
1658 {8, 8}, /* cost of loading MMX registers
1659 in SImode and DImode */
1660 {8, 8}, /* cost of storing MMX registers
1661 in SImode and DImode */
1662 2, /* cost of moving SSE register */
1663 {8, 8, 8}, /* cost of loading SSE registers
1664 in SImode, DImode and TImode */
1665 {8, 8, 8}, /* cost of storing SSE registers
1666 in SImode, DImode and TImode */
1667 5, /* MMX or SSE register to integer */
1668 32, /* size of l1 cache. */
1669 256, /* size of l2 cache. */
1670 64, /* size of prefetch block */
1671 6, /* number of parallel prefetches */
1672 3, /* Branch cost */
1673 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1674 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1675 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1676 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1677 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1678 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1679 atom_memcpy,
1680 atom_memset,
1681 1, /* scalar_stmt_cost. */
1682 1, /* scalar load_cost. */
1683 1, /* scalar_store_cost. */
1684 1, /* vec_stmt_cost. */
1685 1, /* vec_to_scalar_cost. */
1686 1, /* scalar_to_vec_cost. */
1687 1, /* vec_align_load_cost. */
1688 2, /* vec_unalign_load_cost. */
1689 1, /* vec_store_cost. */
1690 3, /* cond_taken_branch_cost. */
1691 1, /* cond_not_taken_branch_cost. */
1694 static stringop_algs slm_memcpy[2] = {
1695 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1696 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1697 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1698 static stringop_algs slm_memset[2] = {
1699 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1700 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1701 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1702 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1703 static const
1704 struct processor_costs slm_cost = {
1705 COSTS_N_INSNS (1), /* cost of an add instruction */
1706 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1707 COSTS_N_INSNS (1), /* variable shift costs */
1708 COSTS_N_INSNS (1), /* constant shift costs */
1709 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1710 COSTS_N_INSNS (3), /* HI */
1711 COSTS_N_INSNS (3), /* SI */
1712 COSTS_N_INSNS (4), /* DI */
1713 COSTS_N_INSNS (2)}, /* other */
1714 0, /* cost of multiply per each bit set */
1715 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1716 COSTS_N_INSNS (26), /* HI */
1717 COSTS_N_INSNS (42), /* SI */
1718 COSTS_N_INSNS (74), /* DI */
1719 COSTS_N_INSNS (74)}, /* other */
1720 COSTS_N_INSNS (1), /* cost of movsx */
1721 COSTS_N_INSNS (1), /* cost of movzx */
1722 8, /* "large" insn */
1723 17, /* MOVE_RATIO */
1724 4, /* cost for loading QImode using movzbl */
1725 {4, 4, 4}, /* cost of loading integer registers
1726 in QImode, HImode and SImode.
1727 Relative to reg-reg move (2). */
1728 {4, 4, 4}, /* cost of storing integer registers */
1729 4, /* cost of reg,reg fld/fst */
1730 {12, 12, 12}, /* cost of loading fp registers
1731 in SFmode, DFmode and XFmode */
1732 {6, 6, 8}, /* cost of storing fp registers
1733 in SFmode, DFmode and XFmode */
1734 2, /* cost of moving MMX register */
1735 {8, 8}, /* cost of loading MMX registers
1736 in SImode and DImode */
1737 {8, 8}, /* cost of storing MMX registers
1738 in SImode and DImode */
1739 2, /* cost of moving SSE register */
1740 {8, 8, 8}, /* cost of loading SSE registers
1741 in SImode, DImode and TImode */
1742 {8, 8, 8}, /* cost of storing SSE registers
1743 in SImode, DImode and TImode */
1744 5, /* MMX or SSE register to integer */
1745 32, /* size of l1 cache. */
1746 256, /* size of l2 cache. */
1747 64, /* size of prefetch block */
1748 6, /* number of parallel prefetches */
1749 3, /* Branch cost */
1750 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1751 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1752 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1753 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1754 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1755 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1756 slm_memcpy,
1757 slm_memset,
1758 1, /* scalar_stmt_cost. */
1759 1, /* scalar load_cost. */
1760 1, /* scalar_store_cost. */
1761 1, /* vec_stmt_cost. */
1762 4, /* vec_to_scalar_cost. */
1763 1, /* scalar_to_vec_cost. */
1764 1, /* vec_align_load_cost. */
1765 2, /* vec_unalign_load_cost. */
1766 1, /* vec_store_cost. */
1767 3, /* cond_taken_branch_cost. */
1768 1, /* cond_not_taken_branch_cost. */
1771 static stringop_algs intel_memcpy[2] = {
1772 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1773 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1774 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1775 static stringop_algs intel_memset[2] = {
1776 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1777 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1778 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1779 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1780 static const
1781 struct processor_costs intel_cost = {
1782 COSTS_N_INSNS (1), /* cost of an add instruction */
1783 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1784 COSTS_N_INSNS (1), /* variable shift costs */
1785 COSTS_N_INSNS (1), /* constant shift costs */
1786 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1787 COSTS_N_INSNS (3), /* HI */
1788 COSTS_N_INSNS (3), /* SI */
1789 COSTS_N_INSNS (4), /* DI */
1790 COSTS_N_INSNS (2)}, /* other */
1791 0, /* cost of multiply per each bit set */
1792 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1793 COSTS_N_INSNS (26), /* HI */
1794 COSTS_N_INSNS (42), /* SI */
1795 COSTS_N_INSNS (74), /* DI */
1796 COSTS_N_INSNS (74)}, /* other */
1797 COSTS_N_INSNS (1), /* cost of movsx */
1798 COSTS_N_INSNS (1), /* cost of movzx */
1799 8, /* "large" insn */
1800 17, /* MOVE_RATIO */
1801 4, /* cost for loading QImode using movzbl */
1802 {4, 4, 4}, /* cost of loading integer registers
1803 in QImode, HImode and SImode.
1804 Relative to reg-reg move (2). */
1805 {4, 4, 4}, /* cost of storing integer registers */
1806 4, /* cost of reg,reg fld/fst */
1807 {12, 12, 12}, /* cost of loading fp registers
1808 in SFmode, DFmode and XFmode */
1809 {6, 6, 8}, /* cost of storing fp registers
1810 in SFmode, DFmode and XFmode */
1811 2, /* cost of moving MMX register */
1812 {8, 8}, /* cost of loading MMX registers
1813 in SImode and DImode */
1814 {8, 8}, /* cost of storing MMX registers
1815 in SImode and DImode */
1816 2, /* cost of moving SSE register */
1817 {8, 8, 8}, /* cost of loading SSE registers
1818 in SImode, DImode and TImode */
1819 {8, 8, 8}, /* cost of storing SSE registers
1820 in SImode, DImode and TImode */
1821 5, /* MMX or SSE register to integer */
1822 32, /* size of l1 cache. */
1823 256, /* size of l2 cache. */
1824 64, /* size of prefetch block */
1825 6, /* number of parallel prefetches */
1826 3, /* Branch cost */
1827 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1828 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1829 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1830 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1831 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1832 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1833 intel_memcpy,
1834 intel_memset,
1835 1, /* scalar_stmt_cost. */
1836 1, /* scalar load_cost. */
1837 1, /* scalar_store_cost. */
1838 1, /* vec_stmt_cost. */
1839 4, /* vec_to_scalar_cost. */
1840 1, /* scalar_to_vec_cost. */
1841 1, /* vec_align_load_cost. */
1842 2, /* vec_unalign_load_cost. */
1843 1, /* vec_store_cost. */
1844 3, /* cond_taken_branch_cost. */
1845 1, /* cond_not_taken_branch_cost. */
1848 /* Generic should produce code tuned for Core-i7 (and newer chips)
1849 and btver1 (and newer chips). */
1851 static stringop_algs generic_memcpy[2] = {
1852 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1853 {-1, libcall, false}}},
1854 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1855 {-1, libcall, false}}}};
1856 static stringop_algs generic_memset[2] = {
1857 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1858 {-1, libcall, false}}},
1859 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1860 {-1, libcall, false}}}};
1861 static const
1862 struct processor_costs generic_cost = {
1863 COSTS_N_INSNS (1), /* cost of an add instruction */
1864 /* On all chips taken into consideration lea is 2 cycles and more. With
1865 this cost however our current implementation of synth_mult results in
1866 use of unnecessary temporary registers causing regression on several
1867 SPECfp benchmarks. */
1868 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1869 COSTS_N_INSNS (1), /* variable shift costs */
1870 COSTS_N_INSNS (1), /* constant shift costs */
1871 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1872 COSTS_N_INSNS (4), /* HI */
1873 COSTS_N_INSNS (3), /* SI */
1874 COSTS_N_INSNS (4), /* DI */
1875 COSTS_N_INSNS (2)}, /* other */
1876 0, /* cost of multiply per each bit set */
1877 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1878 COSTS_N_INSNS (26), /* HI */
1879 COSTS_N_INSNS (42), /* SI */
1880 COSTS_N_INSNS (74), /* DI */
1881 COSTS_N_INSNS (74)}, /* other */
1882 COSTS_N_INSNS (1), /* cost of movsx */
1883 COSTS_N_INSNS (1), /* cost of movzx */
1884 8, /* "large" insn */
1885 17, /* MOVE_RATIO */
1886 4, /* cost for loading QImode using movzbl */
1887 {4, 4, 4}, /* cost of loading integer registers
1888 in QImode, HImode and SImode.
1889 Relative to reg-reg move (2). */
1890 {4, 4, 4}, /* cost of storing integer registers */
1891 4, /* cost of reg,reg fld/fst */
1892 {12, 12, 12}, /* cost of loading fp registers
1893 in SFmode, DFmode and XFmode */
1894 {6, 6, 8}, /* cost of storing fp registers
1895 in SFmode, DFmode and XFmode */
1896 2, /* cost of moving MMX register */
1897 {8, 8}, /* cost of loading MMX registers
1898 in SImode and DImode */
1899 {8, 8}, /* cost of storing MMX registers
1900 in SImode and DImode */
1901 2, /* cost of moving SSE register */
1902 {8, 8, 8}, /* cost of loading SSE registers
1903 in SImode, DImode and TImode */
1904 {8, 8, 8}, /* cost of storing SSE registers
1905 in SImode, DImode and TImode */
1906 5, /* MMX or SSE register to integer */
1907 32, /* size of l1 cache. */
1908 512, /* size of l2 cache. */
1909 64, /* size of prefetch block */
1910 6, /* number of parallel prefetches */
1911 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1912 value is increased to perhaps more appropriate value of 5. */
1913 3, /* Branch cost */
1914 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1915 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1916 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1917 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1918 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1919 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1920 generic_memcpy,
1921 generic_memset,
1922 1, /* scalar_stmt_cost. */
1923 1, /* scalar load_cost. */
1924 1, /* scalar_store_cost. */
1925 1, /* vec_stmt_cost. */
1926 1, /* vec_to_scalar_cost. */
1927 1, /* scalar_to_vec_cost. */
1928 1, /* vec_align_load_cost. */
1929 2, /* vec_unalign_load_cost. */
1930 1, /* vec_store_cost. */
1931 3, /* cond_taken_branch_cost. */
1932 1, /* cond_not_taken_branch_cost. */
1935 /* core_cost should produce code tuned for Core familly of CPUs. */
1936 static stringop_algs core_memcpy[2] = {
1937 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1938 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1939 {-1, libcall, false}}}};
1940 static stringop_algs core_memset[2] = {
1941 {libcall, {{6, loop_1_byte, true},
1942 {24, loop, true},
1943 {8192, rep_prefix_4_byte, true},
1944 {-1, libcall, false}}},
1945 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1946 {-1, libcall, false}}}};
1948 static const
1949 struct processor_costs core_cost = {
1950 COSTS_N_INSNS (1), /* cost of an add instruction */
1951 /* On all chips taken into consideration lea is 2 cycles and more. With
1952 this cost however our current implementation of synth_mult results in
1953 use of unnecessary temporary registers causing regression on several
1954 SPECfp benchmarks. */
1955 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1956 COSTS_N_INSNS (1), /* variable shift costs */
1957 COSTS_N_INSNS (1), /* constant shift costs */
1958 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1959 COSTS_N_INSNS (4), /* HI */
1960 COSTS_N_INSNS (3), /* SI */
1961 COSTS_N_INSNS (4), /* DI */
1962 COSTS_N_INSNS (2)}, /* other */
1963 0, /* cost of multiply per each bit set */
1964 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1965 COSTS_N_INSNS (26), /* HI */
1966 COSTS_N_INSNS (42), /* SI */
1967 COSTS_N_INSNS (74), /* DI */
1968 COSTS_N_INSNS (74)}, /* other */
1969 COSTS_N_INSNS (1), /* cost of movsx */
1970 COSTS_N_INSNS (1), /* cost of movzx */
1971 8, /* "large" insn */
1972 17, /* MOVE_RATIO */
1973 4, /* cost for loading QImode using movzbl */
1974 {4, 4, 4}, /* cost of loading integer registers
1975 in QImode, HImode and SImode.
1976 Relative to reg-reg move (2). */
1977 {4, 4, 4}, /* cost of storing integer registers */
1978 4, /* cost of reg,reg fld/fst */
1979 {12, 12, 12}, /* cost of loading fp registers
1980 in SFmode, DFmode and XFmode */
1981 {6, 6, 8}, /* cost of storing fp registers
1982 in SFmode, DFmode and XFmode */
1983 2, /* cost of moving MMX register */
1984 {8, 8}, /* cost of loading MMX registers
1985 in SImode and DImode */
1986 {8, 8}, /* cost of storing MMX registers
1987 in SImode and DImode */
1988 2, /* cost of moving SSE register */
1989 {8, 8, 8}, /* cost of loading SSE registers
1990 in SImode, DImode and TImode */
1991 {8, 8, 8}, /* cost of storing SSE registers
1992 in SImode, DImode and TImode */
1993 5, /* MMX or SSE register to integer */
1994 64, /* size of l1 cache. */
1995 512, /* size of l2 cache. */
1996 64, /* size of prefetch block */
1997 6, /* number of parallel prefetches */
1998 /* FIXME perhaps more appropriate value is 5. */
1999 3, /* Branch cost */
2000 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2001 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2002 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2003 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2004 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2005 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2006 core_memcpy,
2007 core_memset,
2008 1, /* scalar_stmt_cost. */
2009 1, /* scalar load_cost. */
2010 1, /* scalar_store_cost. */
2011 1, /* vec_stmt_cost. */
2012 1, /* vec_to_scalar_cost. */
2013 1, /* scalar_to_vec_cost. */
2014 1, /* vec_align_load_cost. */
2015 2, /* vec_unalign_load_cost. */
2016 1, /* vec_store_cost. */
2017 3, /* cond_taken_branch_cost. */
2018 1, /* cond_not_taken_branch_cost. */
2022 /* Set by -mtune. */
2023 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2025 /* Set by -mtune or -Os. */
2026 const struct processor_costs *ix86_cost = &pentium_cost;
2028 /* Processor feature/optimization bitmasks. */
2029 #define m_386 (1<<PROCESSOR_I386)
2030 #define m_486 (1<<PROCESSOR_I486)
2031 #define m_PENT (1<<PROCESSOR_PENTIUM)
2032 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2033 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2034 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2035 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2036 #define m_CORE2 (1<<PROCESSOR_CORE2)
2037 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2038 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2039 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2040 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2041 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2042 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2043 #define m_INTEL (1<<PROCESSOR_INTEL)
2045 #define m_GEODE (1<<PROCESSOR_GEODE)
2046 #define m_K6 (1<<PROCESSOR_K6)
2047 #define m_K6_GEODE (m_K6 | m_GEODE)
2048 #define m_K8 (1<<PROCESSOR_K8)
2049 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2050 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2051 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2052 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2053 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2054 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2055 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2056 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2057 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2058 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2059 #define m_BTVER (m_BTVER1 | m_BTVER2)
2060 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2062 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2064 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2065 #undef DEF_TUNE
2066 #define DEF_TUNE(tune, name, selector) name,
2067 #include "x86-tune.def"
2068 #undef DEF_TUNE
2071 /* Feature tests against the various tunings. */
2072 unsigned char ix86_tune_features[X86_TUNE_LAST];
2074 /* Feature tests against the various tunings used to create ix86_tune_features
2075 based on the processor mask. */
2076 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2077 #undef DEF_TUNE
2078 #define DEF_TUNE(tune, name, selector) selector,
2079 #include "x86-tune.def"
2080 #undef DEF_TUNE
2083 /* Feature tests against the various architecture variations. */
2084 unsigned char ix86_arch_features[X86_ARCH_LAST];
2086 /* Feature tests against the various architecture variations, used to create
2087 ix86_arch_features based on the processor mask. */
2088 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2089 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2090 ~(m_386 | m_486 | m_PENT | m_K6),
2092 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2093 ~m_386,
2095 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2096 ~(m_386 | m_486),
2098 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2099 ~m_386,
2101 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2102 ~m_386,
2105 /* In case the average insn count for single function invocation is
2106 lower than this constant, emit fast (but longer) prologue and
2107 epilogue code. */
2108 #define FAST_PROLOGUE_INSN_COUNT 20
2110 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2111 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2112 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2113 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2115 /* Array of the smallest class containing reg number REGNO, indexed by
2116 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2118 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2120 /* ax, dx, cx, bx */
2121 AREG, DREG, CREG, BREG,
2122 /* si, di, bp, sp */
2123 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2124 /* FP registers */
2125 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2126 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2127 /* arg pointer */
2128 NON_Q_REGS,
2129 /* flags, fpsr, fpcr, frame */
2130 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2131 /* SSE registers */
2132 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2133 SSE_REGS, SSE_REGS,
2134 /* MMX registers */
2135 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2136 MMX_REGS, MMX_REGS,
2137 /* REX registers */
2138 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2139 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2140 /* SSE REX registers */
2141 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2142 SSE_REGS, SSE_REGS,
2143 /* AVX-512 SSE registers */
2144 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2145 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2146 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2147 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2148 /* Mask registers. */
2149 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2150 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2151 /* MPX bound registers */
2152 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2155 /* The "default" register map used in 32bit mode. */
2157 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2159 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2160 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2161 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2162 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2163 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2164 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2165 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2166 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2167 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2168 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2169 101, 102, 103, 104, /* bound registers */
2172 /* The "default" register map used in 64bit mode. */
2174 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2176 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2177 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2178 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2179 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2180 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2181 8,9,10,11,12,13,14,15, /* extended integer registers */
2182 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2183 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2184 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2185 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2186 126, 127, 128, 129, /* bound registers */
2189 /* Define the register numbers to be used in Dwarf debugging information.
2190 The SVR4 reference port C compiler uses the following register numbers
2191 in its Dwarf output code:
2192 0 for %eax (gcc regno = 0)
2193 1 for %ecx (gcc regno = 2)
2194 2 for %edx (gcc regno = 1)
2195 3 for %ebx (gcc regno = 3)
2196 4 for %esp (gcc regno = 7)
2197 5 for %ebp (gcc regno = 6)
2198 6 for %esi (gcc regno = 4)
2199 7 for %edi (gcc regno = 5)
2200 The following three DWARF register numbers are never generated by
2201 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2202 believes these numbers have these meanings.
2203 8 for %eip (no gcc equivalent)
2204 9 for %eflags (gcc regno = 17)
2205 10 for %trapno (no gcc equivalent)
2206 It is not at all clear how we should number the FP stack registers
2207 for the x86 architecture. If the version of SDB on x86/svr4 were
2208 a bit less brain dead with respect to floating-point then we would
2209 have a precedent to follow with respect to DWARF register numbers
2210 for x86 FP registers, but the SDB on x86/svr4 is so completely
2211 broken with respect to FP registers that it is hardly worth thinking
2212 of it as something to strive for compatibility with.
2213 The version of x86/svr4 SDB I have at the moment does (partially)
2214 seem to believe that DWARF register number 11 is associated with
2215 the x86 register %st(0), but that's about all. Higher DWARF
2216 register numbers don't seem to be associated with anything in
2217 particular, and even for DWARF regno 11, SDB only seems to under-
2218 stand that it should say that a variable lives in %st(0) (when
2219 asked via an `=' command) if we said it was in DWARF regno 11,
2220 but SDB still prints garbage when asked for the value of the
2221 variable in question (via a `/' command).
2222 (Also note that the labels SDB prints for various FP stack regs
2223 when doing an `x' command are all wrong.)
2224 Note that these problems generally don't affect the native SVR4
2225 C compiler because it doesn't allow the use of -O with -g and
2226 because when it is *not* optimizing, it allocates a memory
2227 location for each floating-point variable, and the memory
2228 location is what gets described in the DWARF AT_location
2229 attribute for the variable in question.
2230 Regardless of the severe mental illness of the x86/svr4 SDB, we
2231 do something sensible here and we use the following DWARF
2232 register numbers. Note that these are all stack-top-relative
2233 numbers.
2234 11 for %st(0) (gcc regno = 8)
2235 12 for %st(1) (gcc regno = 9)
2236 13 for %st(2) (gcc regno = 10)
2237 14 for %st(3) (gcc regno = 11)
2238 15 for %st(4) (gcc regno = 12)
2239 16 for %st(5) (gcc regno = 13)
2240 17 for %st(6) (gcc regno = 14)
2241 18 for %st(7) (gcc regno = 15)
2243 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2245 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2246 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2247 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2248 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2249 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2250 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2251 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2252 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2253 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2254 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2255 101, 102, 103, 104, /* bound registers */
2258 /* Define parameter passing and return registers. */
2260 static int const x86_64_int_parameter_registers[6] =
2262 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2265 static int const x86_64_ms_abi_int_parameter_registers[4] =
2267 CX_REG, DX_REG, R8_REG, R9_REG
2270 static int const x86_64_int_return_registers[4] =
2272 AX_REG, DX_REG, DI_REG, SI_REG
2275 /* Additional registers that are clobbered by SYSV calls. */
2277 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2279 SI_REG, DI_REG,
2280 XMM6_REG, XMM7_REG,
2281 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2282 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2285 /* Define the structure for the machine field in struct function. */
2287 struct GTY(()) stack_local_entry {
2288 unsigned short mode;
2289 unsigned short n;
2290 rtx rtl;
2291 struct stack_local_entry *next;
2294 /* Structure describing stack frame layout.
2295 Stack grows downward:
2297 [arguments]
2298 <- ARG_POINTER
2299 saved pc
2301 saved static chain if ix86_static_chain_on_stack
2303 saved frame pointer if frame_pointer_needed
2304 <- HARD_FRAME_POINTER
2305 [saved regs]
2306 <- regs_save_offset
2307 [padding0]
2309 [saved SSE regs]
2310 <- sse_regs_save_offset
2311 [padding1] |
2312 | <- FRAME_POINTER
2313 [va_arg registers] |
2315 [frame] |
2317 [padding2] | = to_allocate
2318 <- STACK_POINTER
2320 struct ix86_frame
2322 int nsseregs;
2323 int nregs;
2324 int va_arg_size;
2325 int red_zone_size;
2326 int outgoing_arguments_size;
2328 /* The offsets relative to ARG_POINTER. */
2329 HOST_WIDE_INT frame_pointer_offset;
2330 HOST_WIDE_INT hard_frame_pointer_offset;
2331 HOST_WIDE_INT stack_pointer_offset;
2332 HOST_WIDE_INT hfp_save_offset;
2333 HOST_WIDE_INT reg_save_offset;
2334 HOST_WIDE_INT sse_reg_save_offset;
2336 /* When save_regs_using_mov is set, emit prologue using
2337 move instead of push instructions. */
2338 bool save_regs_using_mov;
2341 /* Which cpu are we scheduling for. */
2342 enum attr_cpu ix86_schedule;
2344 /* Which cpu are we optimizing for. */
2345 enum processor_type ix86_tune;
2347 /* Which instruction set architecture to use. */
2348 enum processor_type ix86_arch;
2350 /* True if processor has SSE prefetch instruction. */
2351 unsigned char x86_prefetch_sse;
2353 /* -mstackrealign option */
2354 static const char ix86_force_align_arg_pointer_string[]
2355 = "force_align_arg_pointer";
2357 static rtx (*ix86_gen_leave) (void);
2358 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2359 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2360 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2361 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2362 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2363 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2364 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2365 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2366 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2367 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2368 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2370 /* Preferred alignment for stack boundary in bits. */
2371 unsigned int ix86_preferred_stack_boundary;
2373 /* Alignment for incoming stack boundary in bits specified at
2374 command line. */
2375 static unsigned int ix86_user_incoming_stack_boundary;
2377 /* Default alignment for incoming stack boundary in bits. */
2378 static unsigned int ix86_default_incoming_stack_boundary;
2380 /* Alignment for incoming stack boundary in bits. */
2381 unsigned int ix86_incoming_stack_boundary;
2383 /* Calling abi specific va_list type nodes. */
2384 static GTY(()) tree sysv_va_list_type_node;
2385 static GTY(()) tree ms_va_list_type_node;
2387 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2388 char internal_label_prefix[16];
2389 int internal_label_prefix_len;
2391 /* Fence to use after loop using movnt. */
2392 tree x86_mfence;
2394 /* Register class used for passing given 64bit part of the argument.
2395 These represent classes as documented by the PS ABI, with the exception
2396 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2397 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2399 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2400 whenever possible (upper half does contain padding). */
2401 enum x86_64_reg_class
2403 X86_64_NO_CLASS,
2404 X86_64_INTEGER_CLASS,
2405 X86_64_INTEGERSI_CLASS,
2406 X86_64_SSE_CLASS,
2407 X86_64_SSESF_CLASS,
2408 X86_64_SSEDF_CLASS,
2409 X86_64_SSEUP_CLASS,
2410 X86_64_X87_CLASS,
2411 X86_64_X87UP_CLASS,
2412 X86_64_COMPLEX_X87_CLASS,
2413 X86_64_MEMORY_CLASS
2416 #define MAX_CLASSES 8
2418 /* Table of constants used by fldpi, fldln2, etc.... */
2419 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2420 static bool ext_80387_constants_init = 0;
2423 static struct machine_function * ix86_init_machine_status (void);
2424 static rtx ix86_function_value (const_tree, const_tree, bool);
2425 static bool ix86_function_value_regno_p (const unsigned int);
2426 static unsigned int ix86_function_arg_boundary (machine_mode,
2427 const_tree);
2428 static rtx ix86_static_chain (const_tree, bool);
2429 static int ix86_function_regparm (const_tree, const_tree);
2430 static void ix86_compute_frame_layout (struct ix86_frame *);
2431 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2432 rtx, rtx, int);
2433 static void ix86_add_new_builtins (HOST_WIDE_INT);
2434 static tree ix86_canonical_va_list_type (tree);
2435 static void predict_jump (int);
2436 static unsigned int split_stack_prologue_scratch_regno (void);
2437 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2439 enum ix86_function_specific_strings
2441 IX86_FUNCTION_SPECIFIC_ARCH,
2442 IX86_FUNCTION_SPECIFIC_TUNE,
2443 IX86_FUNCTION_SPECIFIC_MAX
2446 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2447 const char *, enum fpmath_unit, bool);
2448 static void ix86_function_specific_save (struct cl_target_option *,
2449 struct gcc_options *opts);
2450 static void ix86_function_specific_restore (struct gcc_options *opts,
2451 struct cl_target_option *);
2452 static void ix86_function_specific_print (FILE *, int,
2453 struct cl_target_option *);
2454 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2455 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2456 struct gcc_options *,
2457 struct gcc_options *,
2458 struct gcc_options *);
2459 static bool ix86_can_inline_p (tree, tree);
2460 static void ix86_set_current_function (tree);
2461 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2463 static enum calling_abi ix86_function_abi (const_tree);
2466 #ifndef SUBTARGET32_DEFAULT_CPU
2467 #define SUBTARGET32_DEFAULT_CPU "i386"
2468 #endif
2470 /* Whether -mtune= or -march= were specified */
2471 static int ix86_tune_defaulted;
2472 static int ix86_arch_specified;
2474 /* Vectorization library interface and handlers. */
2475 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2477 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2478 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2480 /* Processor target table, indexed by processor number */
2481 struct ptt
2483 const char *const name; /* processor name */
2484 const struct processor_costs *cost; /* Processor costs */
2485 const int align_loop; /* Default alignments. */
2486 const int align_loop_max_skip;
2487 const int align_jump;
2488 const int align_jump_max_skip;
2489 const int align_func;
2492 /* This table must be in sync with enum processor_type in i386.h. */
2493 static const struct ptt processor_target_table[PROCESSOR_max] =
2495 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2496 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2497 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2498 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2499 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2500 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2501 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2502 {"core2", &core_cost, 16, 10, 16, 10, 16},
2503 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2504 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2505 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2506 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2507 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2508 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2509 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2510 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2511 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2512 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2513 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2514 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2515 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2516 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2517 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2518 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2519 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2522 static unsigned int
2523 rest_of_handle_insert_vzeroupper (void)
2525 int i;
2527 /* vzeroupper instructions are inserted immediately after reload to
2528 account for possible spills from 256bit registers. The pass
2529 reuses mode switching infrastructure by re-running mode insertion
2530 pass, so disable entities that have already been processed. */
2531 for (i = 0; i < MAX_386_ENTITIES; i++)
2532 ix86_optimize_mode_switching[i] = 0;
2534 ix86_optimize_mode_switching[AVX_U128] = 1;
2536 /* Call optimize_mode_switching. */
2537 g->get_passes ()->execute_pass_mode_switching ();
2538 return 0;
2541 namespace {
2543 const pass_data pass_data_insert_vzeroupper =
2545 RTL_PASS, /* type */
2546 "vzeroupper", /* name */
2547 OPTGROUP_NONE, /* optinfo_flags */
2548 TV_NONE, /* tv_id */
2549 0, /* properties_required */
2550 0, /* properties_provided */
2551 0, /* properties_destroyed */
2552 0, /* todo_flags_start */
2553 TODO_df_finish, /* todo_flags_finish */
2556 class pass_insert_vzeroupper : public rtl_opt_pass
2558 public:
2559 pass_insert_vzeroupper(gcc::context *ctxt)
2560 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2563 /* opt_pass methods: */
2564 virtual bool gate (function *)
2566 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2569 virtual unsigned int execute (function *)
2571 return rest_of_handle_insert_vzeroupper ();
2574 }; // class pass_insert_vzeroupper
2576 } // anon namespace
2578 rtl_opt_pass *
2579 make_pass_insert_vzeroupper (gcc::context *ctxt)
2581 return new pass_insert_vzeroupper (ctxt);
2584 /* Return true if a red-zone is in use. */
2586 static inline bool
2587 ix86_using_red_zone (void)
2589 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2592 /* Return a string that documents the current -m options. The caller is
2593 responsible for freeing the string. */
2595 static char *
2596 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2597 const char *tune, enum fpmath_unit fpmath,
2598 bool add_nl_p)
2600 struct ix86_target_opts
2602 const char *option; /* option string */
2603 HOST_WIDE_INT mask; /* isa mask options */
2606 /* This table is ordered so that options like -msse4.2 that imply
2607 preceding options while match those first. */
2608 static struct ix86_target_opts isa_opts[] =
2610 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2611 { "-mfma", OPTION_MASK_ISA_FMA },
2612 { "-mxop", OPTION_MASK_ISA_XOP },
2613 { "-mlwp", OPTION_MASK_ISA_LWP },
2614 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2615 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2616 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2617 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2618 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2619 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2620 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2621 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2622 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2623 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2624 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2625 { "-msse3", OPTION_MASK_ISA_SSE3 },
2626 { "-msse2", OPTION_MASK_ISA_SSE2 },
2627 { "-msse", OPTION_MASK_ISA_SSE },
2628 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2629 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2630 { "-mmmx", OPTION_MASK_ISA_MMX },
2631 { "-mabm", OPTION_MASK_ISA_ABM },
2632 { "-mbmi", OPTION_MASK_ISA_BMI },
2633 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2634 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2635 { "-mhle", OPTION_MASK_ISA_HLE },
2636 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2637 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2638 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2639 { "-madx", OPTION_MASK_ISA_ADX },
2640 { "-mtbm", OPTION_MASK_ISA_TBM },
2641 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2642 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2643 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2644 { "-maes", OPTION_MASK_ISA_AES },
2645 { "-msha", OPTION_MASK_ISA_SHA },
2646 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2647 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2648 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2649 { "-mf16c", OPTION_MASK_ISA_F16C },
2650 { "-mrtm", OPTION_MASK_ISA_RTM },
2651 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2652 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2653 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2654 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2655 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2656 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2657 { "-mmpx", OPTION_MASK_ISA_MPX },
2660 /* Flag options. */
2661 static struct ix86_target_opts flag_opts[] =
2663 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2664 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2665 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2666 { "-m80387", MASK_80387 },
2667 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2668 { "-malign-double", MASK_ALIGN_DOUBLE },
2669 { "-mcld", MASK_CLD },
2670 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2671 { "-mieee-fp", MASK_IEEE_FP },
2672 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2673 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2674 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2675 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2676 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2677 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2678 { "-mno-red-zone", MASK_NO_RED_ZONE },
2679 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2680 { "-mrecip", MASK_RECIP },
2681 { "-mrtd", MASK_RTD },
2682 { "-msseregparm", MASK_SSEREGPARM },
2683 { "-mstack-arg-probe", MASK_STACK_PROBE },
2684 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2685 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2686 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2687 { "-mvzeroupper", MASK_VZEROUPPER },
2688 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2689 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2690 { "-mprefer-avx128", MASK_PREFER_AVX128},
2693 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2695 char isa_other[40];
2696 char target_other[40];
2697 unsigned num = 0;
2698 unsigned i, j;
2699 char *ret;
2700 char *ptr;
2701 size_t len;
2702 size_t line_len;
2703 size_t sep_len;
2704 const char *abi;
2706 memset (opts, '\0', sizeof (opts));
2708 /* Add -march= option. */
2709 if (arch)
2711 opts[num][0] = "-march=";
2712 opts[num++][1] = arch;
2715 /* Add -mtune= option. */
2716 if (tune)
2718 opts[num][0] = "-mtune=";
2719 opts[num++][1] = tune;
2722 /* Add -m32/-m64/-mx32. */
2723 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2725 if ((isa & OPTION_MASK_ABI_64) != 0)
2726 abi = "-m64";
2727 else
2728 abi = "-mx32";
2729 isa &= ~ (OPTION_MASK_ISA_64BIT
2730 | OPTION_MASK_ABI_64
2731 | OPTION_MASK_ABI_X32);
2733 else
2734 abi = "-m32";
2735 opts[num++][0] = abi;
2737 /* Pick out the options in isa options. */
2738 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2740 if ((isa & isa_opts[i].mask) != 0)
2742 opts[num++][0] = isa_opts[i].option;
2743 isa &= ~ isa_opts[i].mask;
2747 if (isa && add_nl_p)
2749 opts[num++][0] = isa_other;
2750 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2751 isa);
2754 /* Add flag options. */
2755 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2757 if ((flags & flag_opts[i].mask) != 0)
2759 opts[num++][0] = flag_opts[i].option;
2760 flags &= ~ flag_opts[i].mask;
2764 if (flags && add_nl_p)
2766 opts[num++][0] = target_other;
2767 sprintf (target_other, "(other flags: %#x)", flags);
2770 /* Add -fpmath= option. */
2771 if (fpmath)
2773 opts[num][0] = "-mfpmath=";
2774 switch ((int) fpmath)
2776 case FPMATH_387:
2777 opts[num++][1] = "387";
2778 break;
2780 case FPMATH_SSE:
2781 opts[num++][1] = "sse";
2782 break;
2784 case FPMATH_387 | FPMATH_SSE:
2785 opts[num++][1] = "sse+387";
2786 break;
2788 default:
2789 gcc_unreachable ();
2793 /* Any options? */
2794 if (num == 0)
2795 return NULL;
2797 gcc_assert (num < ARRAY_SIZE (opts));
2799 /* Size the string. */
2800 len = 0;
2801 sep_len = (add_nl_p) ? 3 : 1;
2802 for (i = 0; i < num; i++)
2804 len += sep_len;
2805 for (j = 0; j < 2; j++)
2806 if (opts[i][j])
2807 len += strlen (opts[i][j]);
2810 /* Build the string. */
2811 ret = ptr = (char *) xmalloc (len);
2812 line_len = 0;
2814 for (i = 0; i < num; i++)
2816 size_t len2[2];
2818 for (j = 0; j < 2; j++)
2819 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2821 if (i != 0)
2823 *ptr++ = ' ';
2824 line_len++;
2826 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2828 *ptr++ = '\\';
2829 *ptr++ = '\n';
2830 line_len = 0;
2834 for (j = 0; j < 2; j++)
2835 if (opts[i][j])
2837 memcpy (ptr, opts[i][j], len2[j]);
2838 ptr += len2[j];
2839 line_len += len2[j];
2843 *ptr = '\0';
2844 gcc_assert (ret + len >= ptr);
2846 return ret;
2849 /* Return true, if profiling code should be emitted before
2850 prologue. Otherwise it returns false.
2851 Note: For x86 with "hotfix" it is sorried. */
2852 static bool
2853 ix86_profile_before_prologue (void)
2855 return flag_fentry != 0;
2858 /* Function that is callable from the debugger to print the current
2859 options. */
2860 void ATTRIBUTE_UNUSED
2861 ix86_debug_options (void)
2863 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2864 ix86_arch_string, ix86_tune_string,
2865 ix86_fpmath, true);
2867 if (opts)
2869 fprintf (stderr, "%s\n\n", opts);
2870 free (opts);
2872 else
2873 fputs ("<no options>\n\n", stderr);
2875 return;
2878 static const char *stringop_alg_names[] = {
2879 #define DEF_ENUM
2880 #define DEF_ALG(alg, name) #name,
2881 #include "stringop.def"
2882 #undef DEF_ENUM
2883 #undef DEF_ALG
2886 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2887 The string is of the following form (or comma separated list of it):
2889 strategy_alg:max_size:[align|noalign]
2891 where the full size range for the strategy is either [0, max_size] or
2892 [min_size, max_size], in which min_size is the max_size + 1 of the
2893 preceding range. The last size range must have max_size == -1.
2895 Examples:
2898 -mmemcpy-strategy=libcall:-1:noalign
2900 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2904 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2906 This is to tell the compiler to use the following strategy for memset
2907 1) when the expected size is between [1, 16], use rep_8byte strategy;
2908 2) when the size is between [17, 2048], use vector_loop;
2909 3) when the size is > 2048, use libcall. */
2911 struct stringop_size_range
2913 int max;
2914 stringop_alg alg;
2915 bool noalign;
2918 static void
2919 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2921 const struct stringop_algs *default_algs;
2922 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2923 char *curr_range_str, *next_range_str;
2924 int i = 0, n = 0;
2926 if (is_memset)
2927 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2928 else
2929 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2931 curr_range_str = strategy_str;
2935 int maxs;
2936 char alg_name[128];
2937 char align[16];
2938 next_range_str = strchr (curr_range_str, ',');
2939 if (next_range_str)
2940 *next_range_str++ = '\0';
2942 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2943 alg_name, &maxs, align))
2945 error ("wrong arg %s to option %s", curr_range_str,
2946 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2947 return;
2950 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2952 error ("size ranges of option %s should be increasing",
2953 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2954 return;
2957 for (i = 0; i < last_alg; i++)
2958 if (!strcmp (alg_name, stringop_alg_names[i]))
2959 break;
2961 if (i == last_alg)
2963 error ("wrong stringop strategy name %s specified for option %s",
2964 alg_name,
2965 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2966 return;
2969 input_ranges[n].max = maxs;
2970 input_ranges[n].alg = (stringop_alg) i;
2971 if (!strcmp (align, "align"))
2972 input_ranges[n].noalign = false;
2973 else if (!strcmp (align, "noalign"))
2974 input_ranges[n].noalign = true;
2975 else
2977 error ("unknown alignment %s specified for option %s",
2978 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2979 return;
2981 n++;
2982 curr_range_str = next_range_str;
2984 while (curr_range_str);
2986 if (input_ranges[n - 1].max != -1)
2988 error ("the max value for the last size range should be -1"
2989 " for option %s",
2990 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2991 return;
2994 if (n > MAX_STRINGOP_ALGS)
2996 error ("too many size ranges specified in option %s",
2997 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2998 return;
3001 /* Now override the default algs array. */
3002 for (i = 0; i < n; i++)
3004 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3005 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3006 = input_ranges[i].alg;
3007 *const_cast<int *>(&default_algs->size[i].noalign)
3008 = input_ranges[i].noalign;
3013 /* parse -mtune-ctrl= option. When DUMP is true,
3014 print the features that are explicitly set. */
3016 static void
3017 parse_mtune_ctrl_str (bool dump)
3019 if (!ix86_tune_ctrl_string)
3020 return;
3022 char *next_feature_string = NULL;
3023 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3024 char *orig = curr_feature_string;
3025 int i;
3028 bool clear = false;
3030 next_feature_string = strchr (curr_feature_string, ',');
3031 if (next_feature_string)
3032 *next_feature_string++ = '\0';
3033 if (*curr_feature_string == '^')
3035 curr_feature_string++;
3036 clear = true;
3038 for (i = 0; i < X86_TUNE_LAST; i++)
3040 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3042 ix86_tune_features[i] = !clear;
3043 if (dump)
3044 fprintf (stderr, "Explicitly %s feature %s\n",
3045 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3046 break;
3049 if (i == X86_TUNE_LAST)
3050 error ("Unknown parameter to option -mtune-ctrl: %s",
3051 clear ? curr_feature_string - 1 : curr_feature_string);
3052 curr_feature_string = next_feature_string;
3054 while (curr_feature_string);
3055 free (orig);
3058 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3059 processor type. */
3061 static void
3062 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3064 unsigned int ix86_tune_mask = 1u << ix86_tune;
3065 int i;
3067 for (i = 0; i < X86_TUNE_LAST; ++i)
3069 if (ix86_tune_no_default)
3070 ix86_tune_features[i] = 0;
3071 else
3072 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3075 if (dump)
3077 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3078 for (i = 0; i < X86_TUNE_LAST; i++)
3079 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3080 ix86_tune_features[i] ? "on" : "off");
3083 parse_mtune_ctrl_str (dump);
3087 /* Override various settings based on options. If MAIN_ARGS_P, the
3088 options are from the command line, otherwise they are from
3089 attributes. */
3091 static void
3092 ix86_option_override_internal (bool main_args_p,
3093 struct gcc_options *opts,
3094 struct gcc_options *opts_set)
3096 int i;
3097 unsigned int ix86_arch_mask;
3098 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3099 const char *prefix;
3100 const char *suffix;
3101 const char *sw;
3103 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3104 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3105 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3106 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3107 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3108 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3109 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3110 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3111 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3112 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3113 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3114 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3115 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3116 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3117 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3118 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3119 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3120 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3121 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3122 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3123 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3124 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3125 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3126 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3127 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3128 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3129 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3130 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3131 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3132 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3133 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3134 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3135 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3136 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3137 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3138 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3139 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3140 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3141 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3142 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3143 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3144 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3145 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3146 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3147 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3148 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3149 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3150 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3151 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3152 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3153 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3154 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3155 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3157 #define PTA_CORE2 \
3158 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3159 | PTA_CX16 | PTA_FXSR)
3160 #define PTA_NEHALEM \
3161 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3162 #define PTA_WESTMERE \
3163 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3164 #define PTA_SANDYBRIDGE \
3165 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3166 #define PTA_IVYBRIDGE \
3167 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3168 #define PTA_HASWELL \
3169 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3170 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3171 #define PTA_BROADWELL \
3172 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3173 #define PTA_BONNELL \
3174 (PTA_CORE2 | PTA_MOVBE)
3175 #define PTA_SILVERMONT \
3176 (PTA_WESTMERE | PTA_MOVBE)
3178 /* if this reaches 64, need to widen struct pta flags below */
3180 static struct pta
3182 const char *const name; /* processor name or nickname. */
3183 const enum processor_type processor;
3184 const enum attr_cpu schedule;
3185 const unsigned HOST_WIDE_INT flags;
3187 const processor_alias_table[] =
3189 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3190 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3191 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3192 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3193 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3194 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3195 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3196 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3197 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3198 PTA_MMX | PTA_SSE | PTA_FXSR},
3199 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3200 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3201 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3202 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3203 PTA_MMX | PTA_SSE | PTA_FXSR},
3204 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3205 PTA_MMX | PTA_SSE | PTA_FXSR},
3206 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3207 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3208 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3209 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3210 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3211 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3212 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3213 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3214 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3215 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3216 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3217 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3218 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3219 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3220 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3221 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3222 PTA_SANDYBRIDGE},
3223 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3224 PTA_SANDYBRIDGE},
3225 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3226 PTA_IVYBRIDGE},
3227 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3228 PTA_IVYBRIDGE},
3229 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3230 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3231 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3232 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3233 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3234 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3235 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3236 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3237 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3238 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3239 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3240 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3241 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3242 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3243 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3244 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3245 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3246 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3247 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3248 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3249 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3250 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3251 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3252 {"x86-64", PROCESSOR_K8, CPU_K8,
3253 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3254 {"k8", PROCESSOR_K8, CPU_K8,
3255 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3256 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3257 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3258 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3259 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3260 {"opteron", PROCESSOR_K8, CPU_K8,
3261 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3262 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3263 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3264 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3265 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3266 {"athlon64", PROCESSOR_K8, CPU_K8,
3267 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3268 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3269 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3270 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3271 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3272 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3273 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3274 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3275 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3276 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3277 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3278 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3279 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3280 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3281 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3282 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3283 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3284 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3285 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3286 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3287 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3288 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3289 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3290 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3291 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3292 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3293 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3294 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3295 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3296 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3297 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3298 | PTA_XSAVEOPT | PTA_FSGSBASE},
3299 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3300 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3301 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3302 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3303 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3304 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3305 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3306 | PTA_MOVBE},
3307 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3308 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3309 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3310 | PTA_FXSR | PTA_XSAVE},
3311 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3312 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3313 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3314 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3315 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3316 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3318 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3319 PTA_64BIT
3320 | PTA_HLE /* flags are only used for -march switch. */ },
3323 /* -mrecip options. */
3324 static struct
3326 const char *string; /* option name */
3327 unsigned int mask; /* mask bits to set */
3329 const recip_options[] =
3331 { "all", RECIP_MASK_ALL },
3332 { "none", RECIP_MASK_NONE },
3333 { "div", RECIP_MASK_DIV },
3334 { "sqrt", RECIP_MASK_SQRT },
3335 { "vec-div", RECIP_MASK_VEC_DIV },
3336 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3339 int const pta_size = ARRAY_SIZE (processor_alias_table);
3341 /* Set up prefix/suffix so the error messages refer to either the command
3342 line argument, or the attribute(target). */
3343 if (main_args_p)
3345 prefix = "-m";
3346 suffix = "";
3347 sw = "switch";
3349 else
3351 prefix = "option(\"";
3352 suffix = "\")";
3353 sw = "attribute";
3356 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3357 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3358 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3359 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3360 #ifdef TARGET_BI_ARCH
3361 else
3363 #if TARGET_BI_ARCH == 1
3364 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3365 is on and OPTION_MASK_ABI_X32 is off. We turn off
3366 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3367 -mx32. */
3368 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3369 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3370 #else
3371 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3372 on and OPTION_MASK_ABI_64 is off. We turn off
3373 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3374 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3375 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3376 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3377 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3378 #endif
3380 #endif
3382 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3384 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3385 OPTION_MASK_ABI_64 for TARGET_X32. */
3386 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3387 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3389 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3390 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3391 | OPTION_MASK_ABI_X32
3392 | OPTION_MASK_ABI_64);
3393 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3395 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3396 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3397 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3398 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3401 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3402 SUBTARGET_OVERRIDE_OPTIONS;
3403 #endif
3405 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3406 SUBSUBTARGET_OVERRIDE_OPTIONS;
3407 #endif
3409 /* -fPIC is the default for x86_64. */
3410 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3411 opts->x_flag_pic = 2;
3413 /* Need to check -mtune=generic first. */
3414 if (opts->x_ix86_tune_string)
3416 /* As special support for cross compilers we read -mtune=native
3417 as -mtune=generic. With native compilers we won't see the
3418 -mtune=native, as it was changed by the driver. */
3419 if (!strcmp (opts->x_ix86_tune_string, "native"))
3421 opts->x_ix86_tune_string = "generic";
3423 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3424 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3425 "%stune=k8%s or %stune=generic%s instead as appropriate",
3426 prefix, suffix, prefix, suffix, prefix, suffix);
3428 else
3430 if (opts->x_ix86_arch_string)
3431 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3432 if (!opts->x_ix86_tune_string)
3434 opts->x_ix86_tune_string
3435 = processor_target_table[TARGET_CPU_DEFAULT].name;
3436 ix86_tune_defaulted = 1;
3439 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3440 or defaulted. We need to use a sensible tune option. */
3441 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3443 opts->x_ix86_tune_string = "generic";
3447 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3448 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3450 /* rep; movq isn't available in 32-bit code. */
3451 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3452 opts->x_ix86_stringop_alg = no_stringop;
3455 if (!opts->x_ix86_arch_string)
3456 opts->x_ix86_arch_string
3457 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3458 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3459 else
3460 ix86_arch_specified = 1;
3462 if (opts_set->x_ix86_pmode)
3464 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3465 && opts->x_ix86_pmode == PMODE_SI)
3466 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3467 && opts->x_ix86_pmode == PMODE_DI))
3468 error ("address mode %qs not supported in the %s bit mode",
3469 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3470 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3472 else
3473 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3474 ? PMODE_DI : PMODE_SI;
3476 if (!opts_set->x_ix86_abi)
3477 opts->x_ix86_abi = DEFAULT_ABI;
3479 /* For targets using ms ABI enable ms-extensions, if not
3480 explicit turned off. For non-ms ABI we turn off this
3481 option. */
3482 if (!opts_set->x_flag_ms_extensions)
3483 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3485 if (opts_set->x_ix86_cmodel)
3487 switch (opts->x_ix86_cmodel)
3489 case CM_SMALL:
3490 case CM_SMALL_PIC:
3491 if (opts->x_flag_pic)
3492 opts->x_ix86_cmodel = CM_SMALL_PIC;
3493 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3494 error ("code model %qs not supported in the %s bit mode",
3495 "small", "32");
3496 break;
3498 case CM_MEDIUM:
3499 case CM_MEDIUM_PIC:
3500 if (opts->x_flag_pic)
3501 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3502 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3503 error ("code model %qs not supported in the %s bit mode",
3504 "medium", "32");
3505 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3506 error ("code model %qs not supported in x32 mode",
3507 "medium");
3508 break;
3510 case CM_LARGE:
3511 case CM_LARGE_PIC:
3512 if (opts->x_flag_pic)
3513 opts->x_ix86_cmodel = CM_LARGE_PIC;
3514 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3515 error ("code model %qs not supported in the %s bit mode",
3516 "large", "32");
3517 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3518 error ("code model %qs not supported in x32 mode",
3519 "large");
3520 break;
3522 case CM_32:
3523 if (opts->x_flag_pic)
3524 error ("code model %s does not support PIC mode", "32");
3525 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3526 error ("code model %qs not supported in the %s bit mode",
3527 "32", "64");
3528 break;
3530 case CM_KERNEL:
3531 if (opts->x_flag_pic)
3533 error ("code model %s does not support PIC mode", "kernel");
3534 opts->x_ix86_cmodel = CM_32;
3536 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3537 error ("code model %qs not supported in the %s bit mode",
3538 "kernel", "32");
3539 break;
3541 default:
3542 gcc_unreachable ();
3545 else
3547 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3548 use of rip-relative addressing. This eliminates fixups that
3549 would otherwise be needed if this object is to be placed in a
3550 DLL, and is essentially just as efficient as direct addressing. */
3551 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3552 && (TARGET_RDOS || TARGET_PECOFF))
3553 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3554 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3555 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3556 else
3557 opts->x_ix86_cmodel = CM_32;
3559 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3561 error ("-masm=intel not supported in this configuration");
3562 opts->x_ix86_asm_dialect = ASM_ATT;
3564 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3565 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3566 sorry ("%i-bit mode not compiled in",
3567 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3569 for (i = 0; i < pta_size; i++)
3570 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3572 ix86_schedule = processor_alias_table[i].schedule;
3573 ix86_arch = processor_alias_table[i].processor;
3574 /* Default cpu tuning to the architecture. */
3575 ix86_tune = ix86_arch;
3577 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3578 && !(processor_alias_table[i].flags & PTA_64BIT))
3579 error ("CPU you selected does not support x86-64 "
3580 "instruction set");
3582 if (processor_alias_table[i].flags & PTA_MMX
3583 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3584 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3585 if (processor_alias_table[i].flags & PTA_3DNOW
3586 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3587 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3588 if (processor_alias_table[i].flags & PTA_3DNOW_A
3589 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3590 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3591 if (processor_alias_table[i].flags & PTA_SSE
3592 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3593 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3594 if (processor_alias_table[i].flags & PTA_SSE2
3595 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3596 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3597 if (processor_alias_table[i].flags & PTA_SSE3
3598 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3599 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3600 if (processor_alias_table[i].flags & PTA_SSSE3
3601 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3602 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3603 if (processor_alias_table[i].flags & PTA_SSE4_1
3604 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3605 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3606 if (processor_alias_table[i].flags & PTA_SSE4_2
3607 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3608 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3609 if (processor_alias_table[i].flags & PTA_AVX
3610 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3611 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3612 if (processor_alias_table[i].flags & PTA_AVX2
3613 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3614 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3615 if (processor_alias_table[i].flags & PTA_FMA
3616 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3617 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3618 if (processor_alias_table[i].flags & PTA_SSE4A
3619 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3620 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3621 if (processor_alias_table[i].flags & PTA_FMA4
3622 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3623 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3624 if (processor_alias_table[i].flags & PTA_XOP
3625 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3626 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3627 if (processor_alias_table[i].flags & PTA_LWP
3628 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3629 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3630 if (processor_alias_table[i].flags & PTA_ABM
3631 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3632 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3633 if (processor_alias_table[i].flags & PTA_BMI
3634 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3635 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3636 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3637 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3638 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3639 if (processor_alias_table[i].flags & PTA_TBM
3640 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3641 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3642 if (processor_alias_table[i].flags & PTA_BMI2
3643 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3644 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3645 if (processor_alias_table[i].flags & PTA_CX16
3646 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3647 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3648 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3649 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3650 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3651 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3652 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3653 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3654 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3655 if (processor_alias_table[i].flags & PTA_MOVBE
3656 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3657 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3658 if (processor_alias_table[i].flags & PTA_AES
3659 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3660 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3661 if (processor_alias_table[i].flags & PTA_SHA
3662 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3663 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3664 if (processor_alias_table[i].flags & PTA_PCLMUL
3665 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3666 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3667 if (processor_alias_table[i].flags & PTA_FSGSBASE
3668 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3669 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3670 if (processor_alias_table[i].flags & PTA_RDRND
3671 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3672 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3673 if (processor_alias_table[i].flags & PTA_F16C
3674 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3675 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3676 if (processor_alias_table[i].flags & PTA_RTM
3677 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3678 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3679 if (processor_alias_table[i].flags & PTA_HLE
3680 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3681 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3682 if (processor_alias_table[i].flags & PTA_PRFCHW
3683 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3684 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3685 if (processor_alias_table[i].flags & PTA_RDSEED
3686 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3687 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3688 if (processor_alias_table[i].flags & PTA_ADX
3689 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3690 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3691 if (processor_alias_table[i].flags & PTA_FXSR
3692 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3693 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3694 if (processor_alias_table[i].flags & PTA_XSAVE
3695 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3696 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3697 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3698 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3699 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3700 if (processor_alias_table[i].flags & PTA_AVX512F
3701 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3702 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3703 if (processor_alias_table[i].flags & PTA_AVX512ER
3704 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3705 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3706 if (processor_alias_table[i].flags & PTA_AVX512PF
3707 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3708 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3709 if (processor_alias_table[i].flags & PTA_AVX512CD
3710 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3711 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3712 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3713 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3714 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3715 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3716 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3717 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3718 if (processor_alias_table[i].flags & PTA_XSAVEC
3719 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3720 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3721 if (processor_alias_table[i].flags & PTA_XSAVES
3722 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3723 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3724 if (processor_alias_table[i].flags & PTA_AVX512DQ
3725 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3726 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3727 if (processor_alias_table[i].flags & PTA_AVX512BW
3728 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3729 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3730 if (processor_alias_table[i].flags & PTA_AVX512VL
3731 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3732 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3733 if (processor_alias_table[i].flags & PTA_MPX
3734 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3735 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3736 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3737 x86_prefetch_sse = true;
3739 break;
3742 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3743 error ("Intel MPX does not support x32");
3745 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3746 error ("Intel MPX does not support x32");
3748 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3749 error ("generic CPU can be used only for %stune=%s %s",
3750 prefix, suffix, sw);
3751 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3752 error ("intel CPU can be used only for %stune=%s %s",
3753 prefix, suffix, sw);
3754 else if (i == pta_size)
3755 error ("bad value (%s) for %sarch=%s %s",
3756 opts->x_ix86_arch_string, prefix, suffix, sw);
3758 ix86_arch_mask = 1u << ix86_arch;
3759 for (i = 0; i < X86_ARCH_LAST; ++i)
3760 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3762 for (i = 0; i < pta_size; i++)
3763 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3765 ix86_schedule = processor_alias_table[i].schedule;
3766 ix86_tune = processor_alias_table[i].processor;
3767 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3769 if (!(processor_alias_table[i].flags & PTA_64BIT))
3771 if (ix86_tune_defaulted)
3773 opts->x_ix86_tune_string = "x86-64";
3774 for (i = 0; i < pta_size; i++)
3775 if (! strcmp (opts->x_ix86_tune_string,
3776 processor_alias_table[i].name))
3777 break;
3778 ix86_schedule = processor_alias_table[i].schedule;
3779 ix86_tune = processor_alias_table[i].processor;
3781 else
3782 error ("CPU you selected does not support x86-64 "
3783 "instruction set");
3786 /* Intel CPUs have always interpreted SSE prefetch instructions as
3787 NOPs; so, we can enable SSE prefetch instructions even when
3788 -mtune (rather than -march) points us to a processor that has them.
3789 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3790 higher processors. */
3791 if (TARGET_CMOV
3792 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3793 x86_prefetch_sse = true;
3794 break;
3797 if (ix86_tune_specified && i == pta_size)
3798 error ("bad value (%s) for %stune=%s %s",
3799 opts->x_ix86_tune_string, prefix, suffix, sw);
3801 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3803 #ifndef USE_IX86_FRAME_POINTER
3804 #define USE_IX86_FRAME_POINTER 0
3805 #endif
3807 #ifndef USE_X86_64_FRAME_POINTER
3808 #define USE_X86_64_FRAME_POINTER 0
3809 #endif
3811 /* Set the default values for switches whose default depends on TARGET_64BIT
3812 in case they weren't overwritten by command line options. */
3813 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3815 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3816 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3817 if (opts->x_flag_asynchronous_unwind_tables
3818 && !opts_set->x_flag_unwind_tables
3819 && TARGET_64BIT_MS_ABI)
3820 opts->x_flag_unwind_tables = 1;
3821 if (opts->x_flag_asynchronous_unwind_tables == 2)
3822 opts->x_flag_unwind_tables
3823 = opts->x_flag_asynchronous_unwind_tables = 1;
3824 if (opts->x_flag_pcc_struct_return == 2)
3825 opts->x_flag_pcc_struct_return = 0;
3827 else
3829 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3830 opts->x_flag_omit_frame_pointer
3831 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3832 if (opts->x_flag_asynchronous_unwind_tables == 2)
3833 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3834 if (opts->x_flag_pcc_struct_return == 2)
3835 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3838 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3839 if (opts->x_optimize_size)
3840 ix86_cost = &ix86_size_cost;
3841 else
3842 ix86_cost = ix86_tune_cost;
3844 /* Arrange to set up i386_stack_locals for all functions. */
3845 init_machine_status = ix86_init_machine_status;
3847 /* Validate -mregparm= value. */
3848 if (opts_set->x_ix86_regparm)
3850 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3851 warning (0, "-mregparm is ignored in 64-bit mode");
3852 if (opts->x_ix86_regparm > REGPARM_MAX)
3854 error ("-mregparm=%d is not between 0 and %d",
3855 opts->x_ix86_regparm, REGPARM_MAX);
3856 opts->x_ix86_regparm = 0;
3859 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3860 opts->x_ix86_regparm = REGPARM_MAX;
3862 /* Default align_* from the processor table. */
3863 if (opts->x_align_loops == 0)
3865 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3866 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3868 if (opts->x_align_jumps == 0)
3870 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3871 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3873 if (opts->x_align_functions == 0)
3875 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3878 /* Provide default for -mbranch-cost= value. */
3879 if (!opts_set->x_ix86_branch_cost)
3880 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3882 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3884 opts->x_target_flags
3885 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3887 /* Enable by default the SSE and MMX builtins. Do allow the user to
3888 explicitly disable any of these. In particular, disabling SSE and
3889 MMX for kernel code is extremely useful. */
3890 if (!ix86_arch_specified)
3891 opts->x_ix86_isa_flags
3892 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3893 | TARGET_SUBTARGET64_ISA_DEFAULT)
3894 & ~opts->x_ix86_isa_flags_explicit);
3896 if (TARGET_RTD_P (opts->x_target_flags))
3897 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3899 else
3901 opts->x_target_flags
3902 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3904 if (!ix86_arch_specified)
3905 opts->x_ix86_isa_flags
3906 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3908 /* i386 ABI does not specify red zone. It still makes sense to use it
3909 when programmer takes care to stack from being destroyed. */
3910 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3911 opts->x_target_flags |= MASK_NO_RED_ZONE;
3914 /* Keep nonleaf frame pointers. */
3915 if (opts->x_flag_omit_frame_pointer)
3916 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3917 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3918 opts->x_flag_omit_frame_pointer = 1;
3920 /* If we're doing fast math, we don't care about comparison order
3921 wrt NaNs. This lets us use a shorter comparison sequence. */
3922 if (opts->x_flag_finite_math_only)
3923 opts->x_target_flags &= ~MASK_IEEE_FP;
3925 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3926 since the insns won't need emulation. */
3927 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3928 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3930 /* Likewise, if the target doesn't have a 387, or we've specified
3931 software floating point, don't use 387 inline intrinsics. */
3932 if (!TARGET_80387_P (opts->x_target_flags))
3933 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3935 /* Turn on MMX builtins for -msse. */
3936 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3937 opts->x_ix86_isa_flags
3938 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3940 /* Enable SSE prefetch. */
3941 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3942 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3943 x86_prefetch_sse = true;
3945 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3946 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3947 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3948 opts->x_ix86_isa_flags
3949 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3951 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3952 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3953 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3954 opts->x_ix86_isa_flags
3955 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3957 /* Enable lzcnt instruction for -mabm. */
3958 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3959 opts->x_ix86_isa_flags
3960 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3962 /* Validate -mpreferred-stack-boundary= value or default it to
3963 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3964 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3965 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3967 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3968 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3969 int max = (TARGET_SEH ? 4 : 12);
3971 if (opts->x_ix86_preferred_stack_boundary_arg < min
3972 || opts->x_ix86_preferred_stack_boundary_arg > max)
3974 if (min == max)
3975 error ("-mpreferred-stack-boundary is not supported "
3976 "for this target");
3977 else
3978 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3979 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3981 else
3982 ix86_preferred_stack_boundary
3983 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3986 /* Set the default value for -mstackrealign. */
3987 if (opts->x_ix86_force_align_arg_pointer == -1)
3988 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3990 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3992 /* Validate -mincoming-stack-boundary= value or default it to
3993 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3994 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3995 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3997 if (opts->x_ix86_incoming_stack_boundary_arg
3998 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3999 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4000 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4001 opts->x_ix86_incoming_stack_boundary_arg,
4002 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
4003 else
4005 ix86_user_incoming_stack_boundary
4006 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4007 ix86_incoming_stack_boundary
4008 = ix86_user_incoming_stack_boundary;
4012 #ifndef NO_PROFILE_COUNTERS
4013 if (flag_nop_mcount)
4014 error ("-mnop-mcount is not compatible with this target");
4015 #endif
4016 if (flag_nop_mcount && flag_pic)
4017 error ("-mnop-mcount is not implemented for -fPIC");
4019 /* Accept -msseregparm only if at least SSE support is enabled. */
4020 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4021 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4022 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4024 if (opts_set->x_ix86_fpmath)
4026 if (opts->x_ix86_fpmath & FPMATH_SSE)
4028 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4030 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4031 opts->x_ix86_fpmath = FPMATH_387;
4033 else if ((opts->x_ix86_fpmath & FPMATH_387)
4034 && !TARGET_80387_P (opts->x_target_flags))
4036 warning (0, "387 instruction set disabled, using SSE arithmetics");
4037 opts->x_ix86_fpmath = FPMATH_SSE;
4041 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4042 fpmath=387. The second is however default at many targets since the
4043 extra 80bit precision of temporaries is considered to be part of ABI.
4044 Overwrite the default at least for -ffast-math.
4045 TODO: -mfpmath=both seems to produce same performing code with bit
4046 smaller binaries. It is however not clear if register allocation is
4047 ready for this setting.
4048 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4049 codegen. We may switch to 387 with -ffast-math for size optimized
4050 functions. */
4051 else if (fast_math_flags_set_p (&global_options)
4052 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4053 opts->x_ix86_fpmath = FPMATH_SSE;
4054 else
4055 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4057 /* If the i387 is disabled, then do not return values in it. */
4058 if (!TARGET_80387_P (opts->x_target_flags))
4059 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4061 /* Use external vectorized library in vectorizing intrinsics. */
4062 if (opts_set->x_ix86_veclibabi_type)
4063 switch (opts->x_ix86_veclibabi_type)
4065 case ix86_veclibabi_type_svml:
4066 ix86_veclib_handler = ix86_veclibabi_svml;
4067 break;
4069 case ix86_veclibabi_type_acml:
4070 ix86_veclib_handler = ix86_veclibabi_acml;
4071 break;
4073 default:
4074 gcc_unreachable ();
4077 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4078 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4079 && !opts->x_optimize_size)
4080 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4082 /* If stack probes are required, the space used for large function
4083 arguments on the stack must also be probed, so enable
4084 -maccumulate-outgoing-args so this happens in the prologue. */
4085 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4086 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4088 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4089 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4090 "for correctness", prefix, suffix);
4091 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4094 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4096 char *p;
4097 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4098 p = strchr (internal_label_prefix, 'X');
4099 internal_label_prefix_len = p - internal_label_prefix;
4100 *p = '\0';
4103 /* When scheduling description is not available, disable scheduler pass
4104 so it won't slow down the compilation and make x87 code slower. */
4105 if (!TARGET_SCHEDULE)
4106 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4108 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4109 ix86_tune_cost->simultaneous_prefetches,
4110 opts->x_param_values,
4111 opts_set->x_param_values);
4112 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4113 ix86_tune_cost->prefetch_block,
4114 opts->x_param_values,
4115 opts_set->x_param_values);
4116 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4117 ix86_tune_cost->l1_cache_size,
4118 opts->x_param_values,
4119 opts_set->x_param_values);
4120 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4121 ix86_tune_cost->l2_cache_size,
4122 opts->x_param_values,
4123 opts_set->x_param_values);
4125 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4126 if (opts->x_flag_prefetch_loop_arrays < 0
4127 && HAVE_prefetch
4128 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4129 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4130 opts->x_flag_prefetch_loop_arrays = 1;
4132 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4133 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4134 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4135 targetm.expand_builtin_va_start = NULL;
4137 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4139 ix86_gen_leave = gen_leave_rex64;
4140 if (Pmode == DImode)
4142 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4143 ix86_gen_tls_local_dynamic_base_64
4144 = gen_tls_local_dynamic_base_64_di;
4146 else
4148 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4149 ix86_gen_tls_local_dynamic_base_64
4150 = gen_tls_local_dynamic_base_64_si;
4153 else
4154 ix86_gen_leave = gen_leave;
4156 if (Pmode == DImode)
4158 ix86_gen_add3 = gen_adddi3;
4159 ix86_gen_sub3 = gen_subdi3;
4160 ix86_gen_sub3_carry = gen_subdi3_carry;
4161 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4162 ix86_gen_andsp = gen_anddi3;
4163 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4164 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4165 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4166 ix86_gen_monitor = gen_sse3_monitor_di;
4168 else
4170 ix86_gen_add3 = gen_addsi3;
4171 ix86_gen_sub3 = gen_subsi3;
4172 ix86_gen_sub3_carry = gen_subsi3_carry;
4173 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4174 ix86_gen_andsp = gen_andsi3;
4175 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4176 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4177 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4178 ix86_gen_monitor = gen_sse3_monitor_si;
4181 #ifdef USE_IX86_CLD
4182 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4183 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4184 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4185 #endif
4187 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4189 if (opts->x_flag_fentry > 0)
4190 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4191 "with -fpic");
4192 opts->x_flag_fentry = 0;
4194 else if (TARGET_SEH)
4196 if (opts->x_flag_fentry == 0)
4197 sorry ("-mno-fentry isn%'t compatible with SEH");
4198 opts->x_flag_fentry = 1;
4200 else if (opts->x_flag_fentry < 0)
4202 #if defined(PROFILE_BEFORE_PROLOGUE)
4203 opts->x_flag_fentry = 1;
4204 #else
4205 opts->x_flag_fentry = 0;
4206 #endif
4209 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4210 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4211 AVX unaligned load/store. */
4212 if (!opts->x_optimize_size)
4214 if (flag_expensive_optimizations
4215 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4216 opts->x_target_flags |= MASK_VZEROUPPER;
4217 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4218 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4219 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4220 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4221 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4222 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4223 /* Enable 128-bit AVX instruction generation
4224 for the auto-vectorizer. */
4225 if (TARGET_AVX128_OPTIMAL
4226 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4227 opts->x_target_flags |= MASK_PREFER_AVX128;
4230 if (opts->x_ix86_recip_name)
4232 char *p = ASTRDUP (opts->x_ix86_recip_name);
4233 char *q;
4234 unsigned int mask, i;
4235 bool invert;
4237 while ((q = strtok (p, ",")) != NULL)
4239 p = NULL;
4240 if (*q == '!')
4242 invert = true;
4243 q++;
4245 else
4246 invert = false;
4248 if (!strcmp (q, "default"))
4249 mask = RECIP_MASK_ALL;
4250 else
4252 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4253 if (!strcmp (q, recip_options[i].string))
4255 mask = recip_options[i].mask;
4256 break;
4259 if (i == ARRAY_SIZE (recip_options))
4261 error ("unknown option for -mrecip=%s", q);
4262 invert = false;
4263 mask = RECIP_MASK_NONE;
4267 opts->x_recip_mask_explicit |= mask;
4268 if (invert)
4269 opts->x_recip_mask &= ~mask;
4270 else
4271 opts->x_recip_mask |= mask;
4275 if (TARGET_RECIP_P (opts->x_target_flags))
4276 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4277 else if (opts_set->x_target_flags & MASK_RECIP)
4278 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4280 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4281 for 64-bit Bionic. */
4282 if (TARGET_HAS_BIONIC
4283 && !(opts_set->x_target_flags
4284 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4285 opts->x_target_flags |= (TARGET_64BIT
4286 ? MASK_LONG_DOUBLE_128
4287 : MASK_LONG_DOUBLE_64);
4289 /* Only one of them can be active. */
4290 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4291 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4293 /* Save the initial options in case the user does function specific
4294 options. */
4295 if (main_args_p)
4296 target_option_default_node = target_option_current_node
4297 = build_target_option_node (opts);
4299 /* Handle stack protector */
4300 if (!opts_set->x_ix86_stack_protector_guard)
4301 opts->x_ix86_stack_protector_guard
4302 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4304 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4305 if (opts->x_ix86_tune_memcpy_strategy)
4307 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4308 ix86_parse_stringop_strategy_string (str, false);
4309 free (str);
4312 if (opts->x_ix86_tune_memset_strategy)
4314 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4315 ix86_parse_stringop_strategy_string (str, true);
4316 free (str);
4320 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4322 static void
4323 ix86_option_override (void)
4325 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4326 struct register_pass_info insert_vzeroupper_info
4327 = { pass_insert_vzeroupper, "reload",
4328 1, PASS_POS_INSERT_AFTER
4331 ix86_option_override_internal (true, &global_options, &global_options_set);
4334 /* This needs to be done at start up. It's convenient to do it here. */
4335 register_pass (&insert_vzeroupper_info);
4338 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4339 static char *
4340 ix86_offload_options (void)
4342 if (TARGET_LP64)
4343 return xstrdup ("-foffload-abi=lp64");
4344 return xstrdup ("-foffload-abi=ilp32");
4347 /* Update register usage after having seen the compiler flags. */
4349 static void
4350 ix86_conditional_register_usage (void)
4352 int i, c_mask;
4353 unsigned int j;
4355 /* The PIC register, if it exists, is fixed. */
4356 j = PIC_OFFSET_TABLE_REGNUM;
4357 if (j != INVALID_REGNUM)
4358 fixed_regs[j] = call_used_regs[j] = 1;
4360 /* For 32-bit targets, squash the REX registers. */
4361 if (! TARGET_64BIT)
4363 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4364 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4365 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4366 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4367 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4368 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4371 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4372 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4373 : TARGET_64BIT ? (1 << 2)
4374 : (1 << 1));
4376 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4378 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4380 /* Set/reset conditionally defined registers from
4381 CALL_USED_REGISTERS initializer. */
4382 if (call_used_regs[i] > 1)
4383 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4385 /* Calculate registers of CLOBBERED_REGS register set
4386 as call used registers from GENERAL_REGS register set. */
4387 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4388 && call_used_regs[i])
4389 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4392 /* If MMX is disabled, squash the registers. */
4393 if (! TARGET_MMX)
4394 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4395 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4396 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4398 /* If SSE is disabled, squash the registers. */
4399 if (! TARGET_SSE)
4400 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4401 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4402 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4404 /* If the FPU is disabled, squash the registers. */
4405 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4406 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4407 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4408 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4410 /* If AVX512F is disabled, squash the registers. */
4411 if (! TARGET_AVX512F)
4413 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4414 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4416 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4417 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4420 /* If MPX is disabled, squash the registers. */
4421 if (! TARGET_MPX)
4422 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4423 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4427 /* Save the current options */
4429 static void
4430 ix86_function_specific_save (struct cl_target_option *ptr,
4431 struct gcc_options *opts)
4433 ptr->arch = ix86_arch;
4434 ptr->schedule = ix86_schedule;
4435 ptr->tune = ix86_tune;
4436 ptr->branch_cost = ix86_branch_cost;
4437 ptr->tune_defaulted = ix86_tune_defaulted;
4438 ptr->arch_specified = ix86_arch_specified;
4439 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4440 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4441 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4442 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4443 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4444 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4445 ptr->x_ix86_abi = opts->x_ix86_abi;
4446 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4447 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4448 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4449 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4450 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4451 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4452 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4453 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4454 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4455 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4456 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4457 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4458 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4459 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4460 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4461 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4462 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4463 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4464 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4465 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4467 /* The fields are char but the variables are not; make sure the
4468 values fit in the fields. */
4469 gcc_assert (ptr->arch == ix86_arch);
4470 gcc_assert (ptr->schedule == ix86_schedule);
4471 gcc_assert (ptr->tune == ix86_tune);
4472 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4475 /* Restore the current options */
4477 static void
4478 ix86_function_specific_restore (struct gcc_options *opts,
4479 struct cl_target_option *ptr)
4481 enum processor_type old_tune = ix86_tune;
4482 enum processor_type old_arch = ix86_arch;
4483 unsigned int ix86_arch_mask;
4484 int i;
4486 /* We don't change -fPIC. */
4487 opts->x_flag_pic = flag_pic;
4489 ix86_arch = (enum processor_type) ptr->arch;
4490 ix86_schedule = (enum attr_cpu) ptr->schedule;
4491 ix86_tune = (enum processor_type) ptr->tune;
4492 opts->x_ix86_branch_cost = ptr->branch_cost;
4493 ix86_tune_defaulted = ptr->tune_defaulted;
4494 ix86_arch_specified = ptr->arch_specified;
4495 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4496 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4497 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4498 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4499 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4500 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4501 opts->x_ix86_abi = ptr->x_ix86_abi;
4502 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4503 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4504 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4505 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4506 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4507 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4508 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4509 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4510 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4511 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4512 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4513 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4514 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4515 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4516 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4517 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4518 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4519 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4520 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4521 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4523 /* Recreate the arch feature tests if the arch changed */
4524 if (old_arch != ix86_arch)
4526 ix86_arch_mask = 1u << ix86_arch;
4527 for (i = 0; i < X86_ARCH_LAST; ++i)
4528 ix86_arch_features[i]
4529 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4532 /* Recreate the tune optimization tests */
4533 if (old_tune != ix86_tune)
4534 set_ix86_tune_features (ix86_tune, false);
4537 /* Print the current options */
4539 static void
4540 ix86_function_specific_print (FILE *file, int indent,
4541 struct cl_target_option *ptr)
4543 char *target_string
4544 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4545 NULL, NULL, ptr->x_ix86_fpmath, false);
4547 gcc_assert (ptr->arch < PROCESSOR_max);
4548 fprintf (file, "%*sarch = %d (%s)\n",
4549 indent, "",
4550 ptr->arch, processor_target_table[ptr->arch].name);
4552 gcc_assert (ptr->tune < PROCESSOR_max);
4553 fprintf (file, "%*stune = %d (%s)\n",
4554 indent, "",
4555 ptr->tune, processor_target_table[ptr->tune].name);
4557 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4559 if (target_string)
4561 fprintf (file, "%*s%s\n", indent, "", target_string);
4562 free (target_string);
4567 /* Inner function to process the attribute((target(...))), take an argument and
4568 set the current options from the argument. If we have a list, recursively go
4569 over the list. */
4571 static bool
4572 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4573 struct gcc_options *opts,
4574 struct gcc_options *opts_set,
4575 struct gcc_options *enum_opts_set)
4577 char *next_optstr;
4578 bool ret = true;
4580 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4581 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4582 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4583 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4584 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4586 enum ix86_opt_type
4588 ix86_opt_unknown,
4589 ix86_opt_yes,
4590 ix86_opt_no,
4591 ix86_opt_str,
4592 ix86_opt_enum,
4593 ix86_opt_isa
4596 static const struct
4598 const char *string;
4599 size_t len;
4600 enum ix86_opt_type type;
4601 int opt;
4602 int mask;
4603 } attrs[] = {
4604 /* isa options */
4605 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4606 IX86_ATTR_ISA ("abm", OPT_mabm),
4607 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4608 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4609 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4610 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4611 IX86_ATTR_ISA ("aes", OPT_maes),
4612 IX86_ATTR_ISA ("sha", OPT_msha),
4613 IX86_ATTR_ISA ("avx", OPT_mavx),
4614 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4615 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4616 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4617 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4618 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4619 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4620 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4621 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4622 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4623 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4624 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4625 IX86_ATTR_ISA ("sse", OPT_msse),
4626 IX86_ATTR_ISA ("sse2", OPT_msse2),
4627 IX86_ATTR_ISA ("sse3", OPT_msse3),
4628 IX86_ATTR_ISA ("sse4", OPT_msse4),
4629 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4630 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4631 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4632 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4633 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4634 IX86_ATTR_ISA ("fma", OPT_mfma),
4635 IX86_ATTR_ISA ("xop", OPT_mxop),
4636 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4637 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4638 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4639 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4640 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4641 IX86_ATTR_ISA ("hle", OPT_mhle),
4642 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4643 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4644 IX86_ATTR_ISA ("adx", OPT_madx),
4645 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4646 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4647 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4648 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4649 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4650 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4651 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4653 /* enum options */
4654 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4656 /* string options */
4657 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4658 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4660 /* flag options */
4661 IX86_ATTR_YES ("cld",
4662 OPT_mcld,
4663 MASK_CLD),
4665 IX86_ATTR_NO ("fancy-math-387",
4666 OPT_mfancy_math_387,
4667 MASK_NO_FANCY_MATH_387),
4669 IX86_ATTR_YES ("ieee-fp",
4670 OPT_mieee_fp,
4671 MASK_IEEE_FP),
4673 IX86_ATTR_YES ("inline-all-stringops",
4674 OPT_minline_all_stringops,
4675 MASK_INLINE_ALL_STRINGOPS),
4677 IX86_ATTR_YES ("inline-stringops-dynamically",
4678 OPT_minline_stringops_dynamically,
4679 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4681 IX86_ATTR_NO ("align-stringops",
4682 OPT_mno_align_stringops,
4683 MASK_NO_ALIGN_STRINGOPS),
4685 IX86_ATTR_YES ("recip",
4686 OPT_mrecip,
4687 MASK_RECIP),
4691 /* If this is a list, recurse to get the options. */
4692 if (TREE_CODE (args) == TREE_LIST)
4694 bool ret = true;
4696 for (; args; args = TREE_CHAIN (args))
4697 if (TREE_VALUE (args)
4698 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4699 p_strings, opts, opts_set,
4700 enum_opts_set))
4701 ret = false;
4703 return ret;
4706 else if (TREE_CODE (args) != STRING_CST)
4708 error ("attribute %<target%> argument not a string");
4709 return false;
4712 /* Handle multiple arguments separated by commas. */
4713 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4715 while (next_optstr && *next_optstr != '\0')
4717 char *p = next_optstr;
4718 char *orig_p = p;
4719 char *comma = strchr (next_optstr, ',');
4720 const char *opt_string;
4721 size_t len, opt_len;
4722 int opt;
4723 bool opt_set_p;
4724 char ch;
4725 unsigned i;
4726 enum ix86_opt_type type = ix86_opt_unknown;
4727 int mask = 0;
4729 if (comma)
4731 *comma = '\0';
4732 len = comma - next_optstr;
4733 next_optstr = comma + 1;
4735 else
4737 len = strlen (p);
4738 next_optstr = NULL;
4741 /* Recognize no-xxx. */
4742 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4744 opt_set_p = false;
4745 p += 3;
4746 len -= 3;
4748 else
4749 opt_set_p = true;
4751 /* Find the option. */
4752 ch = *p;
4753 opt = N_OPTS;
4754 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4756 type = attrs[i].type;
4757 opt_len = attrs[i].len;
4758 if (ch == attrs[i].string[0]
4759 && ((type != ix86_opt_str && type != ix86_opt_enum)
4760 ? len == opt_len
4761 : len > opt_len)
4762 && memcmp (p, attrs[i].string, opt_len) == 0)
4764 opt = attrs[i].opt;
4765 mask = attrs[i].mask;
4766 opt_string = attrs[i].string;
4767 break;
4771 /* Process the option. */
4772 if (opt == N_OPTS)
4774 error ("attribute(target(\"%s\")) is unknown", orig_p);
4775 ret = false;
4778 else if (type == ix86_opt_isa)
4780 struct cl_decoded_option decoded;
4782 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4783 ix86_handle_option (opts, opts_set,
4784 &decoded, input_location);
4787 else if (type == ix86_opt_yes || type == ix86_opt_no)
4789 if (type == ix86_opt_no)
4790 opt_set_p = !opt_set_p;
4792 if (opt_set_p)
4793 opts->x_target_flags |= mask;
4794 else
4795 opts->x_target_flags &= ~mask;
4798 else if (type == ix86_opt_str)
4800 if (p_strings[opt])
4802 error ("option(\"%s\") was already specified", opt_string);
4803 ret = false;
4805 else
4806 p_strings[opt] = xstrdup (p + opt_len);
4809 else if (type == ix86_opt_enum)
4811 bool arg_ok;
4812 int value;
4814 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4815 if (arg_ok)
4816 set_option (opts, enum_opts_set, opt, value,
4817 p + opt_len, DK_UNSPECIFIED, input_location,
4818 global_dc);
4819 else
4821 error ("attribute(target(\"%s\")) is unknown", orig_p);
4822 ret = false;
4826 else
4827 gcc_unreachable ();
4830 return ret;
4833 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4835 tree
4836 ix86_valid_target_attribute_tree (tree args,
4837 struct gcc_options *opts,
4838 struct gcc_options *opts_set)
4840 const char *orig_arch_string = opts->x_ix86_arch_string;
4841 const char *orig_tune_string = opts->x_ix86_tune_string;
4842 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4843 int orig_tune_defaulted = ix86_tune_defaulted;
4844 int orig_arch_specified = ix86_arch_specified;
4845 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4846 tree t = NULL_TREE;
4847 int i;
4848 struct cl_target_option *def
4849 = TREE_TARGET_OPTION (target_option_default_node);
4850 struct gcc_options enum_opts_set;
4852 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4854 /* Process each of the options on the chain. */
4855 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4856 opts_set, &enum_opts_set))
4857 return error_mark_node;
4859 /* If the changed options are different from the default, rerun
4860 ix86_option_override_internal, and then save the options away.
4861 The string options are are attribute options, and will be undone
4862 when we copy the save structure. */
4863 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4864 || opts->x_target_flags != def->x_target_flags
4865 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4866 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4867 || enum_opts_set.x_ix86_fpmath)
4869 /* If we are using the default tune= or arch=, undo the string assigned,
4870 and use the default. */
4871 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4872 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4873 else if (!orig_arch_specified)
4874 opts->x_ix86_arch_string = NULL;
4876 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4877 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4878 else if (orig_tune_defaulted)
4879 opts->x_ix86_tune_string = NULL;
4881 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4882 if (enum_opts_set.x_ix86_fpmath)
4883 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4884 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4885 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4887 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4888 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4891 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4892 ix86_option_override_internal (false, opts, opts_set);
4894 /* Add any builtin functions with the new isa if any. */
4895 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4897 /* Save the current options unless we are validating options for
4898 #pragma. */
4899 t = build_target_option_node (opts);
4901 opts->x_ix86_arch_string = orig_arch_string;
4902 opts->x_ix86_tune_string = orig_tune_string;
4903 opts_set->x_ix86_fpmath = orig_fpmath_set;
4905 /* Free up memory allocated to hold the strings */
4906 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4907 free (option_strings[i]);
4910 return t;
4913 /* Hook to validate attribute((target("string"))). */
4915 static bool
4916 ix86_valid_target_attribute_p (tree fndecl,
4917 tree ARG_UNUSED (name),
4918 tree args,
4919 int ARG_UNUSED (flags))
4921 struct gcc_options func_options;
4922 tree new_target, new_optimize;
4923 bool ret = true;
4925 /* attribute((target("default"))) does nothing, beyond
4926 affecting multi-versioning. */
4927 if (TREE_VALUE (args)
4928 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4929 && TREE_CHAIN (args) == NULL_TREE
4930 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4931 return true;
4933 tree old_optimize = build_optimization_node (&global_options);
4935 /* Get the optimization options of the current function. */
4936 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4938 if (!func_optimize)
4939 func_optimize = old_optimize;
4941 /* Init func_options. */
4942 memset (&func_options, 0, sizeof (func_options));
4943 init_options_struct (&func_options, NULL);
4944 lang_hooks.init_options_struct (&func_options);
4946 cl_optimization_restore (&func_options,
4947 TREE_OPTIMIZATION (func_optimize));
4949 /* Initialize func_options to the default before its target options can
4950 be set. */
4951 cl_target_option_restore (&func_options,
4952 TREE_TARGET_OPTION (target_option_default_node));
4954 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4955 &global_options_set);
4957 new_optimize = build_optimization_node (&func_options);
4959 if (new_target == error_mark_node)
4960 ret = false;
4962 else if (fndecl && new_target)
4964 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4966 if (old_optimize != new_optimize)
4967 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4970 return ret;
4974 /* Hook to determine if one function can safely inline another. */
4976 static bool
4977 ix86_can_inline_p (tree caller, tree callee)
4979 bool ret = false;
4980 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4981 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4983 /* If callee has no option attributes, then it is ok to inline. */
4984 if (!callee_tree)
4985 ret = true;
4987 /* If caller has no option attributes, but callee does then it is not ok to
4988 inline. */
4989 else if (!caller_tree)
4990 ret = false;
4992 else
4994 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4995 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4997 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4998 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4999 function. */
5000 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5001 != callee_opts->x_ix86_isa_flags)
5002 ret = false;
5004 /* See if we have the same non-isa options. */
5005 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5006 ret = false;
5008 /* See if arch, tune, etc. are the same. */
5009 else if (caller_opts->arch != callee_opts->arch)
5010 ret = false;
5012 else if (caller_opts->tune != callee_opts->tune)
5013 ret = false;
5015 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
5016 ret = false;
5018 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5019 ret = false;
5021 else
5022 ret = true;
5025 return ret;
5029 /* Remember the last target of ix86_set_current_function. */
5030 static GTY(()) tree ix86_previous_fndecl;
5032 /* Set target globals to default. */
5034 static void
5035 ix86_reset_to_default_globals (void)
5037 tree old_tree = (ix86_previous_fndecl
5038 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
5039 : NULL_TREE);
5041 if (old_tree)
5043 tree new_tree = target_option_current_node;
5044 cl_target_option_restore (&global_options,
5045 TREE_TARGET_OPTION (new_tree));
5046 if (TREE_TARGET_GLOBALS (new_tree))
5047 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5048 else if (new_tree == target_option_default_node)
5049 restore_target_globals (&default_target_globals);
5050 else
5051 TREE_TARGET_GLOBALS (new_tree)
5052 = save_target_globals_default_opts ();
5056 /* Invalidate ix86_previous_fndecl cache. */
5057 void
5058 ix86_reset_previous_fndecl (void)
5060 ix86_reset_to_default_globals ();
5061 ix86_previous_fndecl = NULL_TREE;
5064 /* Establish appropriate back-end context for processing the function
5065 FNDECL. The argument might be NULL to indicate processing at top
5066 level, outside of any function scope. */
5067 static void
5068 ix86_set_current_function (tree fndecl)
5070 /* Only change the context if the function changes. This hook is called
5071 several times in the course of compiling a function, and we don't want to
5072 slow things down too much or call target_reinit when it isn't safe. */
5073 if (fndecl && fndecl != ix86_previous_fndecl)
5075 tree old_tree = (ix86_previous_fndecl
5076 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
5077 : NULL_TREE);
5079 tree new_tree = (fndecl
5080 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
5081 : NULL_TREE);
5083 if (old_tree == new_tree)
5086 else if (new_tree && new_tree != target_option_default_node)
5088 cl_target_option_restore (&global_options,
5089 TREE_TARGET_OPTION (new_tree));
5090 if (TREE_TARGET_GLOBALS (new_tree))
5091 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5092 else
5093 TREE_TARGET_GLOBALS (new_tree)
5094 = save_target_globals_default_opts ();
5097 else if (old_tree && old_tree != target_option_default_node)
5098 ix86_reset_to_default_globals ();
5099 ix86_previous_fndecl = fndecl;
5104 /* Return true if this goes in large data/bss. */
5106 static bool
5107 ix86_in_large_data_p (tree exp)
5109 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5110 return false;
5112 /* Functions are never large data. */
5113 if (TREE_CODE (exp) == FUNCTION_DECL)
5114 return false;
5116 /* Automatic variables are never large data. */
5117 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5118 return false;
5120 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5122 const char *section = DECL_SECTION_NAME (exp);
5123 if (strcmp (section, ".ldata") == 0
5124 || strcmp (section, ".lbss") == 0)
5125 return true;
5126 return false;
5128 else
5130 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5132 /* If this is an incomplete type with size 0, then we can't put it
5133 in data because it might be too big when completed. Also,
5134 int_size_in_bytes returns -1 if size can vary or is larger than
5135 an integer in which case also it is safer to assume that it goes in
5136 large data. */
5137 if (size <= 0 || size > ix86_section_threshold)
5138 return true;
5141 return false;
5144 /* Switch to the appropriate section for output of DECL.
5145 DECL is either a `VAR_DECL' node or a constant of some sort.
5146 RELOC indicates whether forming the initial value of DECL requires
5147 link-time relocations. */
5149 ATTRIBUTE_UNUSED static section *
5150 x86_64_elf_select_section (tree decl, int reloc,
5151 unsigned HOST_WIDE_INT align)
5153 if (ix86_in_large_data_p (decl))
5155 const char *sname = NULL;
5156 unsigned int flags = SECTION_WRITE;
5157 switch (categorize_decl_for_section (decl, reloc))
5159 case SECCAT_DATA:
5160 sname = ".ldata";
5161 break;
5162 case SECCAT_DATA_REL:
5163 sname = ".ldata.rel";
5164 break;
5165 case SECCAT_DATA_REL_LOCAL:
5166 sname = ".ldata.rel.local";
5167 break;
5168 case SECCAT_DATA_REL_RO:
5169 sname = ".ldata.rel.ro";
5170 break;
5171 case SECCAT_DATA_REL_RO_LOCAL:
5172 sname = ".ldata.rel.ro.local";
5173 break;
5174 case SECCAT_BSS:
5175 sname = ".lbss";
5176 flags |= SECTION_BSS;
5177 break;
5178 case SECCAT_RODATA:
5179 case SECCAT_RODATA_MERGE_STR:
5180 case SECCAT_RODATA_MERGE_STR_INIT:
5181 case SECCAT_RODATA_MERGE_CONST:
5182 sname = ".lrodata";
5183 flags = 0;
5184 break;
5185 case SECCAT_SRODATA:
5186 case SECCAT_SDATA:
5187 case SECCAT_SBSS:
5188 gcc_unreachable ();
5189 case SECCAT_TEXT:
5190 case SECCAT_TDATA:
5191 case SECCAT_TBSS:
5192 /* We don't split these for medium model. Place them into
5193 default sections and hope for best. */
5194 break;
5196 if (sname)
5198 /* We might get called with string constants, but get_named_section
5199 doesn't like them as they are not DECLs. Also, we need to set
5200 flags in that case. */
5201 if (!DECL_P (decl))
5202 return get_section (sname, flags, NULL);
5203 return get_named_section (decl, sname, reloc);
5206 return default_elf_select_section (decl, reloc, align);
5209 /* Select a set of attributes for section NAME based on the properties
5210 of DECL and whether or not RELOC indicates that DECL's initializer
5211 might contain runtime relocations. */
5213 static unsigned int ATTRIBUTE_UNUSED
5214 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5216 unsigned int flags = default_section_type_flags (decl, name, reloc);
5218 if (decl == NULL_TREE
5219 && (strcmp (name, ".ldata.rel.ro") == 0
5220 || strcmp (name, ".ldata.rel.ro.local") == 0))
5221 flags |= SECTION_RELRO;
5223 if (strcmp (name, ".lbss") == 0
5224 || strncmp (name, ".lbss.", 5) == 0
5225 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5226 flags |= SECTION_BSS;
5228 return flags;
5231 /* Build up a unique section name, expressed as a
5232 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5233 RELOC indicates whether the initial value of EXP requires
5234 link-time relocations. */
5236 static void ATTRIBUTE_UNUSED
5237 x86_64_elf_unique_section (tree decl, int reloc)
5239 if (ix86_in_large_data_p (decl))
5241 const char *prefix = NULL;
5242 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5243 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5245 switch (categorize_decl_for_section (decl, reloc))
5247 case SECCAT_DATA:
5248 case SECCAT_DATA_REL:
5249 case SECCAT_DATA_REL_LOCAL:
5250 case SECCAT_DATA_REL_RO:
5251 case SECCAT_DATA_REL_RO_LOCAL:
5252 prefix = one_only ? ".ld" : ".ldata";
5253 break;
5254 case SECCAT_BSS:
5255 prefix = one_only ? ".lb" : ".lbss";
5256 break;
5257 case SECCAT_RODATA:
5258 case SECCAT_RODATA_MERGE_STR:
5259 case SECCAT_RODATA_MERGE_STR_INIT:
5260 case SECCAT_RODATA_MERGE_CONST:
5261 prefix = one_only ? ".lr" : ".lrodata";
5262 break;
5263 case SECCAT_SRODATA:
5264 case SECCAT_SDATA:
5265 case SECCAT_SBSS:
5266 gcc_unreachable ();
5267 case SECCAT_TEXT:
5268 case SECCAT_TDATA:
5269 case SECCAT_TBSS:
5270 /* We don't split these for medium model. Place them into
5271 default sections and hope for best. */
5272 break;
5274 if (prefix)
5276 const char *name, *linkonce;
5277 char *string;
5279 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5280 name = targetm.strip_name_encoding (name);
5282 /* If we're using one_only, then there needs to be a .gnu.linkonce
5283 prefix to the section name. */
5284 linkonce = one_only ? ".gnu.linkonce" : "";
5286 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5288 set_decl_section_name (decl, string);
5289 return;
5292 default_unique_section (decl, reloc);
5295 #ifdef COMMON_ASM_OP
5296 /* This says how to output assembler code to declare an
5297 uninitialized external linkage data object.
5299 For medium model x86-64 we need to use .largecomm opcode for
5300 large objects. */
5301 void
5302 x86_elf_aligned_common (FILE *file,
5303 const char *name, unsigned HOST_WIDE_INT size,
5304 int align)
5306 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5307 && size > (unsigned int)ix86_section_threshold)
5308 fputs ("\t.largecomm\t", file);
5309 else
5310 fputs (COMMON_ASM_OP, file);
5311 assemble_name (file, name);
5312 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5313 size, align / BITS_PER_UNIT);
5315 #endif
5317 /* Utility function for targets to use in implementing
5318 ASM_OUTPUT_ALIGNED_BSS. */
5320 void
5321 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5322 unsigned HOST_WIDE_INT size, int align)
5324 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5325 && size > (unsigned int)ix86_section_threshold)
5326 switch_to_section (get_named_section (decl, ".lbss", 0));
5327 else
5328 switch_to_section (bss_section);
5329 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5330 #ifdef ASM_DECLARE_OBJECT_NAME
5331 last_assemble_variable_decl = decl;
5332 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5333 #else
5334 /* Standard thing is just output label for the object. */
5335 ASM_OUTPUT_LABEL (file, name);
5336 #endif /* ASM_DECLARE_OBJECT_NAME */
5337 ASM_OUTPUT_SKIP (file, size ? size : 1);
5340 /* Decide whether we must probe the stack before any space allocation
5341 on this target. It's essentially TARGET_STACK_PROBE except when
5342 -fstack-check causes the stack to be already probed differently. */
5344 bool
5345 ix86_target_stack_probe (void)
5347 /* Do not probe the stack twice if static stack checking is enabled. */
5348 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5349 return false;
5351 return TARGET_STACK_PROBE;
5354 /* Decide whether we can make a sibling call to a function. DECL is the
5355 declaration of the function being targeted by the call and EXP is the
5356 CALL_EXPR representing the call. */
5358 static bool
5359 ix86_function_ok_for_sibcall (tree decl, tree exp)
5361 tree type, decl_or_type;
5362 rtx a, b;
5364 /* If we are generating position-independent code, we cannot sibcall
5365 optimize any indirect call, or a direct call to a global function,
5366 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5367 if (!TARGET_MACHO
5368 && !TARGET_64BIT
5369 && flag_pic
5370 && (!decl || !targetm.binds_local_p (decl)))
5371 return false;
5373 /* If we need to align the outgoing stack, then sibcalling would
5374 unalign the stack, which may break the called function. */
5375 if (ix86_minimum_incoming_stack_boundary (true)
5376 < PREFERRED_STACK_BOUNDARY)
5377 return false;
5379 if (decl)
5381 decl_or_type = decl;
5382 type = TREE_TYPE (decl);
5384 else
5386 /* We're looking at the CALL_EXPR, we need the type of the function. */
5387 type = CALL_EXPR_FN (exp); /* pointer expression */
5388 type = TREE_TYPE (type); /* pointer type */
5389 type = TREE_TYPE (type); /* function type */
5390 decl_or_type = type;
5393 /* Check that the return value locations are the same. Like
5394 if we are returning floats on the 80387 register stack, we cannot
5395 make a sibcall from a function that doesn't return a float to a
5396 function that does or, conversely, from a function that does return
5397 a float to a function that doesn't; the necessary stack adjustment
5398 would not be executed. This is also the place we notice
5399 differences in the return value ABI. Note that it is ok for one
5400 of the functions to have void return type as long as the return
5401 value of the other is passed in a register. */
5402 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5403 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5404 cfun->decl, false);
5405 if (STACK_REG_P (a) || STACK_REG_P (b))
5407 if (!rtx_equal_p (a, b))
5408 return false;
5410 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5412 else if (!rtx_equal_p (a, b))
5413 return false;
5415 if (TARGET_64BIT)
5417 /* The SYSV ABI has more call-clobbered registers;
5418 disallow sibcalls from MS to SYSV. */
5419 if (cfun->machine->call_abi == MS_ABI
5420 && ix86_function_type_abi (type) == SYSV_ABI)
5421 return false;
5423 else
5425 /* If this call is indirect, we'll need to be able to use a
5426 call-clobbered register for the address of the target function.
5427 Make sure that all such registers are not used for passing
5428 parameters. Note that DLLIMPORT functions are indirect. */
5429 if (!decl
5430 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5432 if (ix86_function_regparm (type, NULL) >= 3)
5434 /* ??? Need to count the actual number of registers to be used,
5435 not the possible number of registers. Fix later. */
5436 return false;
5441 /* Otherwise okay. That also includes certain types of indirect calls. */
5442 return true;
5445 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5446 and "sseregparm" calling convention attributes;
5447 arguments as in struct attribute_spec.handler. */
5449 static tree
5450 ix86_handle_cconv_attribute (tree *node, tree name,
5451 tree args,
5452 int,
5453 bool *no_add_attrs)
5455 if (TREE_CODE (*node) != FUNCTION_TYPE
5456 && TREE_CODE (*node) != METHOD_TYPE
5457 && TREE_CODE (*node) != FIELD_DECL
5458 && TREE_CODE (*node) != TYPE_DECL)
5460 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5461 name);
5462 *no_add_attrs = true;
5463 return NULL_TREE;
5466 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5467 if (is_attribute_p ("regparm", name))
5469 tree cst;
5471 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5473 error ("fastcall and regparm attributes are not compatible");
5476 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5478 error ("regparam and thiscall attributes are not compatible");
5481 cst = TREE_VALUE (args);
5482 if (TREE_CODE (cst) != INTEGER_CST)
5484 warning (OPT_Wattributes,
5485 "%qE attribute requires an integer constant argument",
5486 name);
5487 *no_add_attrs = true;
5489 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5491 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5492 name, REGPARM_MAX);
5493 *no_add_attrs = true;
5496 return NULL_TREE;
5499 if (TARGET_64BIT)
5501 /* Do not warn when emulating the MS ABI. */
5502 if ((TREE_CODE (*node) != FUNCTION_TYPE
5503 && TREE_CODE (*node) != METHOD_TYPE)
5504 || ix86_function_type_abi (*node) != MS_ABI)
5505 warning (OPT_Wattributes, "%qE attribute ignored",
5506 name);
5507 *no_add_attrs = true;
5508 return NULL_TREE;
5511 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5512 if (is_attribute_p ("fastcall", name))
5514 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5516 error ("fastcall and cdecl attributes are not compatible");
5518 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5520 error ("fastcall and stdcall attributes are not compatible");
5522 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5524 error ("fastcall and regparm attributes are not compatible");
5526 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5528 error ("fastcall and thiscall attributes are not compatible");
5532 /* Can combine stdcall with fastcall (redundant), regparm and
5533 sseregparm. */
5534 else if (is_attribute_p ("stdcall", name))
5536 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5538 error ("stdcall and cdecl attributes are not compatible");
5540 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5542 error ("stdcall and fastcall attributes are not compatible");
5544 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5546 error ("stdcall and thiscall attributes are not compatible");
5550 /* Can combine cdecl with regparm and sseregparm. */
5551 else if (is_attribute_p ("cdecl", name))
5553 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5555 error ("stdcall and cdecl attributes are not compatible");
5557 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5559 error ("fastcall and cdecl attributes are not compatible");
5561 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5563 error ("cdecl and thiscall attributes are not compatible");
5566 else if (is_attribute_p ("thiscall", name))
5568 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5569 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5570 name);
5571 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5573 error ("stdcall and thiscall attributes are not compatible");
5575 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5577 error ("fastcall and thiscall attributes are not compatible");
5579 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5581 error ("cdecl and thiscall attributes are not compatible");
5585 /* Can combine sseregparm with all attributes. */
5587 return NULL_TREE;
5590 /* The transactional memory builtins are implicitly regparm or fastcall
5591 depending on the ABI. Override the generic do-nothing attribute that
5592 these builtins were declared with, and replace it with one of the two
5593 attributes that we expect elsewhere. */
5595 static tree
5596 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5597 int flags, bool *no_add_attrs)
5599 tree alt;
5601 /* In no case do we want to add the placeholder attribute. */
5602 *no_add_attrs = true;
5604 /* The 64-bit ABI is unchanged for transactional memory. */
5605 if (TARGET_64BIT)
5606 return NULL_TREE;
5608 /* ??? Is there a better way to validate 32-bit windows? We have
5609 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5610 if (CHECK_STACK_LIMIT > 0)
5611 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5612 else
5614 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5615 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5617 decl_attributes (node, alt, flags);
5619 return NULL_TREE;
5622 /* This function determines from TYPE the calling-convention. */
5624 unsigned int
5625 ix86_get_callcvt (const_tree type)
5627 unsigned int ret = 0;
5628 bool is_stdarg;
5629 tree attrs;
5631 if (TARGET_64BIT)
5632 return IX86_CALLCVT_CDECL;
5634 attrs = TYPE_ATTRIBUTES (type);
5635 if (attrs != NULL_TREE)
5637 if (lookup_attribute ("cdecl", attrs))
5638 ret |= IX86_CALLCVT_CDECL;
5639 else if (lookup_attribute ("stdcall", attrs))
5640 ret |= IX86_CALLCVT_STDCALL;
5641 else if (lookup_attribute ("fastcall", attrs))
5642 ret |= IX86_CALLCVT_FASTCALL;
5643 else if (lookup_attribute ("thiscall", attrs))
5644 ret |= IX86_CALLCVT_THISCALL;
5646 /* Regparam isn't allowed for thiscall and fastcall. */
5647 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5649 if (lookup_attribute ("regparm", attrs))
5650 ret |= IX86_CALLCVT_REGPARM;
5651 if (lookup_attribute ("sseregparm", attrs))
5652 ret |= IX86_CALLCVT_SSEREGPARM;
5655 if (IX86_BASE_CALLCVT(ret) != 0)
5656 return ret;
5659 is_stdarg = stdarg_p (type);
5660 if (TARGET_RTD && !is_stdarg)
5661 return IX86_CALLCVT_STDCALL | ret;
5663 if (ret != 0
5664 || is_stdarg
5665 || TREE_CODE (type) != METHOD_TYPE
5666 || ix86_function_type_abi (type) != MS_ABI)
5667 return IX86_CALLCVT_CDECL | ret;
5669 return IX86_CALLCVT_THISCALL;
5672 /* Return 0 if the attributes for two types are incompatible, 1 if they
5673 are compatible, and 2 if they are nearly compatible (which causes a
5674 warning to be generated). */
5676 static int
5677 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5679 unsigned int ccvt1, ccvt2;
5681 if (TREE_CODE (type1) != FUNCTION_TYPE
5682 && TREE_CODE (type1) != METHOD_TYPE)
5683 return 1;
5685 ccvt1 = ix86_get_callcvt (type1);
5686 ccvt2 = ix86_get_callcvt (type2);
5687 if (ccvt1 != ccvt2)
5688 return 0;
5689 if (ix86_function_regparm (type1, NULL)
5690 != ix86_function_regparm (type2, NULL))
5691 return 0;
5693 return 1;
5696 /* Return the regparm value for a function with the indicated TYPE and DECL.
5697 DECL may be NULL when calling function indirectly
5698 or considering a libcall. */
5700 static int
5701 ix86_function_regparm (const_tree type, const_tree decl)
5703 tree attr;
5704 int regparm;
5705 unsigned int ccvt;
5707 if (TARGET_64BIT)
5708 return (ix86_function_type_abi (type) == SYSV_ABI
5709 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5710 ccvt = ix86_get_callcvt (type);
5711 regparm = ix86_regparm;
5713 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5715 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5716 if (attr)
5718 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5719 return regparm;
5722 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5723 return 2;
5724 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5725 return 1;
5727 /* Use register calling convention for local functions when possible. */
5728 if (decl
5729 && TREE_CODE (decl) == FUNCTION_DECL
5730 /* Caller and callee must agree on the calling convention, so
5731 checking here just optimize means that with
5732 __attribute__((optimize (...))) caller could use regparm convention
5733 and callee not, or vice versa. Instead look at whether the callee
5734 is optimized or not. */
5735 && opt_for_fn (decl, optimize)
5736 && !(profile_flag && !flag_fentry))
5738 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5739 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE (decl));
5740 if (i && i->local && i->can_change_signature)
5742 int local_regparm, globals = 0, regno;
5744 /* Make sure no regparm register is taken by a
5745 fixed register variable. */
5746 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5747 if (fixed_regs[local_regparm])
5748 break;
5750 /* We don't want to use regparm(3) for nested functions as
5751 these use a static chain pointer in the third argument. */
5752 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5753 local_regparm = 2;
5755 /* In 32-bit mode save a register for the split stack. */
5756 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5757 local_regparm = 2;
5759 /* Each fixed register usage increases register pressure,
5760 so less registers should be used for argument passing.
5761 This functionality can be overriden by an explicit
5762 regparm value. */
5763 for (regno = AX_REG; regno <= DI_REG; regno++)
5764 if (fixed_regs[regno])
5765 globals++;
5767 local_regparm
5768 = globals < local_regparm ? local_regparm - globals : 0;
5770 if (local_regparm > regparm)
5771 regparm = local_regparm;
5775 return regparm;
5778 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5779 DFmode (2) arguments in SSE registers for a function with the
5780 indicated TYPE and DECL. DECL may be NULL when calling function
5781 indirectly or considering a libcall. Otherwise return 0. */
5783 static int
5784 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5786 gcc_assert (!TARGET_64BIT);
5788 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5789 by the sseregparm attribute. */
5790 if (TARGET_SSEREGPARM
5791 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5793 if (!TARGET_SSE)
5795 if (warn)
5797 if (decl)
5798 error ("calling %qD with attribute sseregparm without "
5799 "SSE/SSE2 enabled", decl);
5800 else
5801 error ("calling %qT with attribute sseregparm without "
5802 "SSE/SSE2 enabled", type);
5804 return 0;
5807 return 2;
5810 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5811 (and DFmode for SSE2) arguments in SSE registers. */
5812 if (decl && TARGET_SSE_MATH && optimize
5813 && !(profile_flag && !flag_fentry))
5815 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5816 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5817 if (i && i->local && i->can_change_signature)
5818 return TARGET_SSE2 ? 2 : 1;
5821 return 0;
5824 /* Return true if EAX is live at the start of the function. Used by
5825 ix86_expand_prologue to determine if we need special help before
5826 calling allocate_stack_worker. */
5828 static bool
5829 ix86_eax_live_at_start_p (void)
5831 /* Cheat. Don't bother working forward from ix86_function_regparm
5832 to the function type to whether an actual argument is located in
5833 eax. Instead just look at cfg info, which is still close enough
5834 to correct at this point. This gives false positives for broken
5835 functions that might use uninitialized data that happens to be
5836 allocated in eax, but who cares? */
5837 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5840 static bool
5841 ix86_keep_aggregate_return_pointer (tree fntype)
5843 tree attr;
5845 if (!TARGET_64BIT)
5847 attr = lookup_attribute ("callee_pop_aggregate_return",
5848 TYPE_ATTRIBUTES (fntype));
5849 if (attr)
5850 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5852 /* For 32-bit MS-ABI the default is to keep aggregate
5853 return pointer. */
5854 if (ix86_function_type_abi (fntype) == MS_ABI)
5855 return true;
5857 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5860 /* Value is the number of bytes of arguments automatically
5861 popped when returning from a subroutine call.
5862 FUNDECL is the declaration node of the function (as a tree),
5863 FUNTYPE is the data type of the function (as a tree),
5864 or for a library call it is an identifier node for the subroutine name.
5865 SIZE is the number of bytes of arguments passed on the stack.
5867 On the 80386, the RTD insn may be used to pop them if the number
5868 of args is fixed, but if the number is variable then the caller
5869 must pop them all. RTD can't be used for library calls now
5870 because the library is compiled with the Unix compiler.
5871 Use of RTD is a selectable option, since it is incompatible with
5872 standard Unix calling sequences. If the option is not selected,
5873 the caller must always pop the args.
5875 The attribute stdcall is equivalent to RTD on a per module basis. */
5877 static int
5878 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5880 unsigned int ccvt;
5882 /* None of the 64-bit ABIs pop arguments. */
5883 if (TARGET_64BIT)
5884 return 0;
5886 ccvt = ix86_get_callcvt (funtype);
5888 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5889 | IX86_CALLCVT_THISCALL)) != 0
5890 && ! stdarg_p (funtype))
5891 return size;
5893 /* Lose any fake structure return argument if it is passed on the stack. */
5894 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5895 && !ix86_keep_aggregate_return_pointer (funtype))
5897 int nregs = ix86_function_regparm (funtype, fundecl);
5898 if (nregs == 0)
5899 return GET_MODE_SIZE (Pmode);
5902 return 0;
5905 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5907 static bool
5908 ix86_legitimate_combined_insn (rtx_insn *insn)
5910 /* Check operand constraints in case hard registers were propagated
5911 into insn pattern. This check prevents combine pass from
5912 generating insn patterns with invalid hard register operands.
5913 These invalid insns can eventually confuse reload to error out
5914 with a spill failure. See also PRs 46829 and 46843. */
5915 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5917 int i;
5919 extract_insn (insn);
5920 preprocess_constraints (insn);
5922 int n_operands = recog_data.n_operands;
5923 int n_alternatives = recog_data.n_alternatives;
5924 for (i = 0; i < n_operands; i++)
5926 rtx op = recog_data.operand[i];
5927 machine_mode mode = GET_MODE (op);
5928 const operand_alternative *op_alt;
5929 int offset = 0;
5930 bool win;
5931 int j;
5933 /* For pre-AVX disallow unaligned loads/stores where the
5934 instructions don't support it. */
5935 if (!TARGET_AVX
5936 && VECTOR_MODE_P (GET_MODE (op))
5937 && misaligned_operand (op, GET_MODE (op)))
5939 int min_align = get_attr_ssememalign (insn);
5940 if (min_align == 0)
5941 return false;
5944 /* A unary operator may be accepted by the predicate, but it
5945 is irrelevant for matching constraints. */
5946 if (UNARY_P (op))
5947 op = XEXP (op, 0);
5949 if (GET_CODE (op) == SUBREG)
5951 if (REG_P (SUBREG_REG (op))
5952 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5953 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5954 GET_MODE (SUBREG_REG (op)),
5955 SUBREG_BYTE (op),
5956 GET_MODE (op));
5957 op = SUBREG_REG (op);
5960 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5961 continue;
5963 op_alt = recog_op_alt;
5965 /* Operand has no constraints, anything is OK. */
5966 win = !n_alternatives;
5968 alternative_mask preferred = get_preferred_alternatives (insn);
5969 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
5971 if (!TEST_BIT (preferred, j))
5972 continue;
5973 if (op_alt[i].anything_ok
5974 || (op_alt[i].matches != -1
5975 && operands_match_p
5976 (recog_data.operand[i],
5977 recog_data.operand[op_alt[i].matches]))
5978 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
5980 win = true;
5981 break;
5985 if (!win)
5986 return false;
5990 return true;
5993 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5995 static unsigned HOST_WIDE_INT
5996 ix86_asan_shadow_offset (void)
5998 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5999 : HOST_WIDE_INT_C (0x7fff8000))
6000 : (HOST_WIDE_INT_1 << 29);
6003 /* Argument support functions. */
6005 /* Return true when register may be used to pass function parameters. */
6006 bool
6007 ix86_function_arg_regno_p (int regno)
6009 int i;
6010 const int *parm_regs;
6012 if (!TARGET_64BIT)
6014 if (TARGET_MACHO)
6015 return (regno < REGPARM_MAX
6016 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6017 else
6018 return (regno < REGPARM_MAX
6019 || (TARGET_MMX && MMX_REGNO_P (regno)
6020 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6021 || (TARGET_SSE && SSE_REGNO_P (regno)
6022 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6025 if (TARGET_SSE && SSE_REGNO_P (regno)
6026 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6027 return true;
6029 /* TODO: The function should depend on current function ABI but
6030 builtins.c would need updating then. Therefore we use the
6031 default ABI. */
6033 /* RAX is used as hidden argument to va_arg functions. */
6034 if (ix86_abi == SYSV_ABI && regno == AX_REG)
6035 return true;
6037 if (ix86_abi == MS_ABI)
6038 parm_regs = x86_64_ms_abi_int_parameter_registers;
6039 else
6040 parm_regs = x86_64_int_parameter_registers;
6041 for (i = 0; i < (ix86_abi == MS_ABI
6042 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6043 if (regno == parm_regs[i])
6044 return true;
6045 return false;
6048 /* Return if we do not know how to pass TYPE solely in registers. */
6050 static bool
6051 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6053 if (must_pass_in_stack_var_size_or_pad (mode, type))
6054 return true;
6056 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6057 The layout_type routine is crafty and tries to trick us into passing
6058 currently unsupported vector types on the stack by using TImode. */
6059 return (!TARGET_64BIT && mode == TImode
6060 && type && TREE_CODE (type) != VECTOR_TYPE);
6063 /* It returns the size, in bytes, of the area reserved for arguments passed
6064 in registers for the function represented by fndecl dependent to the used
6065 abi format. */
6067 ix86_reg_parm_stack_space (const_tree fndecl)
6069 enum calling_abi call_abi = SYSV_ABI;
6070 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6071 call_abi = ix86_function_abi (fndecl);
6072 else
6073 call_abi = ix86_function_type_abi (fndecl);
6074 if (TARGET_64BIT && call_abi == MS_ABI)
6075 return 32;
6076 return 0;
6079 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6080 call abi used. */
6081 enum calling_abi
6082 ix86_function_type_abi (const_tree fntype)
6084 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6086 enum calling_abi abi = ix86_abi;
6087 if (abi == SYSV_ABI)
6089 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6090 abi = MS_ABI;
6092 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6093 abi = SYSV_ABI;
6094 return abi;
6096 return ix86_abi;
6099 /* We add this as a workaround in order to use libc_has_function
6100 hook in i386.md. */
6101 bool
6102 ix86_libc_has_function (enum function_class fn_class)
6104 return targetm.libc_has_function (fn_class);
6107 static bool
6108 ix86_function_ms_hook_prologue (const_tree fn)
6110 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6112 if (decl_function_context (fn) != NULL_TREE)
6113 error_at (DECL_SOURCE_LOCATION (fn),
6114 "ms_hook_prologue is not compatible with nested function");
6115 else
6116 return true;
6118 return false;
6121 static enum calling_abi
6122 ix86_function_abi (const_tree fndecl)
6124 if (! fndecl)
6125 return ix86_abi;
6126 return ix86_function_type_abi (TREE_TYPE (fndecl));
6129 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6130 call abi used. */
6131 enum calling_abi
6132 ix86_cfun_abi (void)
6134 if (! cfun)
6135 return ix86_abi;
6136 return cfun->machine->call_abi;
6139 /* Write the extra assembler code needed to declare a function properly. */
6141 void
6142 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6143 tree decl)
6145 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6147 if (is_ms_hook)
6149 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6150 unsigned int filler_cc = 0xcccccccc;
6152 for (i = 0; i < filler_count; i += 4)
6153 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6156 #ifdef SUBTARGET_ASM_UNWIND_INIT
6157 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6158 #endif
6160 ASM_OUTPUT_LABEL (asm_out_file, fname);
6162 /* Output magic byte marker, if hot-patch attribute is set. */
6163 if (is_ms_hook)
6165 if (TARGET_64BIT)
6167 /* leaq [%rsp + 0], %rsp */
6168 asm_fprintf (asm_out_file, ASM_BYTE
6169 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6171 else
6173 /* movl.s %edi, %edi
6174 push %ebp
6175 movl.s %esp, %ebp */
6176 asm_fprintf (asm_out_file, ASM_BYTE
6177 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6182 /* regclass.c */
6183 extern void init_regs (void);
6185 /* Implementation of call abi switching target hook. Specific to FNDECL
6186 the specific call register sets are set. See also
6187 ix86_conditional_register_usage for more details. */
6188 void
6189 ix86_call_abi_override (const_tree fndecl)
6191 if (fndecl == NULL_TREE)
6192 cfun->machine->call_abi = ix86_abi;
6193 else
6194 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6197 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6198 expensive re-initialization of init_regs each time we switch function context
6199 since this is needed only during RTL expansion. */
6200 static void
6201 ix86_maybe_switch_abi (void)
6203 if (TARGET_64BIT &&
6204 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6205 reinit_regs ();
6208 /* Return 1 if pseudo register should be created and used to hold
6209 GOT address for PIC code. */
6210 static bool
6211 ix86_use_pseudo_pic_reg (void)
6213 if ((TARGET_64BIT
6214 && (ix86_cmodel == CM_SMALL_PIC
6215 || TARGET_PECOFF))
6216 || !flag_pic)
6217 return false;
6218 return true;
6221 /* Initialize large model PIC register. */
6223 static void
6224 ix86_init_large_pic_reg (unsigned int tmp_regno)
6226 rtx_code_label *label;
6227 rtx tmp_reg;
6229 gcc_assert (Pmode == DImode);
6230 label = gen_label_rtx ();
6231 emit_label (label);
6232 LABEL_PRESERVE_P (label) = 1;
6233 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
6234 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
6235 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6236 label));
6237 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6238 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6239 pic_offset_table_rtx, tmp_reg));
6242 /* Create and initialize PIC register if required. */
6243 static void
6244 ix86_init_pic_reg (void)
6246 edge entry_edge;
6247 rtx_insn *seq;
6249 if (!ix86_use_pseudo_pic_reg ())
6250 return;
6252 start_sequence ();
6254 if (TARGET_64BIT)
6256 if (ix86_cmodel == CM_LARGE_PIC)
6257 ix86_init_large_pic_reg (R11_REG);
6258 else
6259 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6261 else
6263 /* If there is future mcount call in the function it is more profitable
6264 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
6265 rtx reg = crtl->profile
6266 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
6267 : pic_offset_table_rtx;
6268 rtx insn = emit_insn (gen_set_got (reg));
6269 RTX_FRAME_RELATED_P (insn) = 1;
6270 if (crtl->profile)
6271 emit_move_insn (pic_offset_table_rtx, reg);
6272 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6275 seq = get_insns ();
6276 end_sequence ();
6278 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6279 insert_insn_on_edge (seq, entry_edge);
6280 commit_one_edge_insertion (entry_edge);
6283 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6284 for a call to a function whose data type is FNTYPE.
6285 For a library call, FNTYPE is 0. */
6287 void
6288 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6289 tree fntype, /* tree ptr for function decl */
6290 rtx libname, /* SYMBOL_REF of library name or 0 */
6291 tree fndecl,
6292 int caller)
6294 struct cgraph_local_info *i;
6296 memset (cum, 0, sizeof (*cum));
6298 if (fndecl)
6300 i = cgraph_node::local_info (fndecl);
6301 cum->call_abi = ix86_function_abi (fndecl);
6303 else
6305 i = NULL;
6306 cum->call_abi = ix86_function_type_abi (fntype);
6309 cum->caller = caller;
6311 /* Set up the number of registers to use for passing arguments. */
6312 cum->nregs = ix86_regparm;
6313 if (TARGET_64BIT)
6315 cum->nregs = (cum->call_abi == SYSV_ABI
6316 ? X86_64_REGPARM_MAX
6317 : X86_64_MS_REGPARM_MAX);
6319 if (TARGET_SSE)
6321 cum->sse_nregs = SSE_REGPARM_MAX;
6322 if (TARGET_64BIT)
6324 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6325 ? X86_64_SSE_REGPARM_MAX
6326 : X86_64_MS_SSE_REGPARM_MAX);
6329 if (TARGET_MMX)
6330 cum->mmx_nregs = MMX_REGPARM_MAX;
6331 cum->warn_avx512f = true;
6332 cum->warn_avx = true;
6333 cum->warn_sse = true;
6334 cum->warn_mmx = true;
6336 /* Because type might mismatch in between caller and callee, we need to
6337 use actual type of function for local calls.
6338 FIXME: cgraph_analyze can be told to actually record if function uses
6339 va_start so for local functions maybe_vaarg can be made aggressive
6340 helping K&R code.
6341 FIXME: once typesytem is fixed, we won't need this code anymore. */
6342 if (i && i->local && i->can_change_signature)
6343 fntype = TREE_TYPE (fndecl);
6344 cum->stdarg = stdarg_p (fntype);
6345 cum->maybe_vaarg = (fntype
6346 ? (!prototype_p (fntype) || stdarg_p (fntype))
6347 : !libname);
6349 cum->bnd_regno = FIRST_BND_REG;
6350 cum->bnds_in_bt = 0;
6351 cum->force_bnd_pass = 0;
6353 if (!TARGET_64BIT)
6355 /* If there are variable arguments, then we won't pass anything
6356 in registers in 32-bit mode. */
6357 if (stdarg_p (fntype))
6359 cum->nregs = 0;
6360 cum->sse_nregs = 0;
6361 cum->mmx_nregs = 0;
6362 cum->warn_avx512f = false;
6363 cum->warn_avx = false;
6364 cum->warn_sse = false;
6365 cum->warn_mmx = false;
6366 return;
6369 /* Use ecx and edx registers if function has fastcall attribute,
6370 else look for regparm information. */
6371 if (fntype)
6373 unsigned int ccvt = ix86_get_callcvt (fntype);
6374 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6376 cum->nregs = 1;
6377 cum->fastcall = 1; /* Same first register as in fastcall. */
6379 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6381 cum->nregs = 2;
6382 cum->fastcall = 1;
6384 else
6385 cum->nregs = ix86_function_regparm (fntype, fndecl);
6388 /* Set up the number of SSE registers used for passing SFmode
6389 and DFmode arguments. Warn for mismatching ABI. */
6390 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6394 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6395 But in the case of vector types, it is some vector mode.
6397 When we have only some of our vector isa extensions enabled, then there
6398 are some modes for which vector_mode_supported_p is false. For these
6399 modes, the generic vector support in gcc will choose some non-vector mode
6400 in order to implement the type. By computing the natural mode, we'll
6401 select the proper ABI location for the operand and not depend on whatever
6402 the middle-end decides to do with these vector types.
6404 The midde-end can't deal with the vector types > 16 bytes. In this
6405 case, we return the original mode and warn ABI change if CUM isn't
6406 NULL.
6408 If INT_RETURN is true, warn ABI change if the vector mode isn't
6409 available for function return value. */
6411 static machine_mode
6412 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6413 bool in_return)
6415 machine_mode mode = TYPE_MODE (type);
6417 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6419 HOST_WIDE_INT size = int_size_in_bytes (type);
6420 if ((size == 8 || size == 16 || size == 32 || size == 64)
6421 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6422 && TYPE_VECTOR_SUBPARTS (type) > 1)
6424 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6426 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6427 mode = MIN_MODE_VECTOR_FLOAT;
6428 else
6429 mode = MIN_MODE_VECTOR_INT;
6431 /* Get the mode which has this inner mode and number of units. */
6432 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6433 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6434 && GET_MODE_INNER (mode) == innermode)
6436 if (size == 64 && !TARGET_AVX512F)
6438 static bool warnedavx512f;
6439 static bool warnedavx512f_ret;
6441 if (cum && cum->warn_avx512f && !warnedavx512f)
6443 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6444 "without AVX512F enabled changes the ABI"))
6445 warnedavx512f = true;
6447 else if (in_return && !warnedavx512f_ret)
6449 if (warning (OPT_Wpsabi, "AVX512F vector return "
6450 "without AVX512F enabled changes the ABI"))
6451 warnedavx512f_ret = true;
6454 return TYPE_MODE (type);
6456 else if (size == 32 && !TARGET_AVX)
6458 static bool warnedavx;
6459 static bool warnedavx_ret;
6461 if (cum && cum->warn_avx && !warnedavx)
6463 if (warning (OPT_Wpsabi, "AVX vector argument "
6464 "without AVX enabled changes the ABI"))
6465 warnedavx = true;
6467 else if (in_return && !warnedavx_ret)
6469 if (warning (OPT_Wpsabi, "AVX vector return "
6470 "without AVX enabled changes the ABI"))
6471 warnedavx_ret = true;
6474 return TYPE_MODE (type);
6476 else if (((size == 8 && TARGET_64BIT) || size == 16)
6477 && !TARGET_SSE)
6479 static bool warnedsse;
6480 static bool warnedsse_ret;
6482 if (cum && cum->warn_sse && !warnedsse)
6484 if (warning (OPT_Wpsabi, "SSE vector argument "
6485 "without SSE enabled changes the ABI"))
6486 warnedsse = true;
6488 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6490 if (warning (OPT_Wpsabi, "SSE vector return "
6491 "without SSE enabled changes the ABI"))
6492 warnedsse_ret = true;
6495 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6497 static bool warnedmmx;
6498 static bool warnedmmx_ret;
6500 if (cum && cum->warn_mmx && !warnedmmx)
6502 if (warning (OPT_Wpsabi, "MMX vector argument "
6503 "without MMX enabled changes the ABI"))
6504 warnedmmx = true;
6506 else if (in_return && !warnedmmx_ret)
6508 if (warning (OPT_Wpsabi, "MMX vector return "
6509 "without MMX enabled changes the ABI"))
6510 warnedmmx_ret = true;
6513 return mode;
6516 gcc_unreachable ();
6520 return mode;
6523 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6524 this may not agree with the mode that the type system has chosen for the
6525 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6526 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6528 static rtx
6529 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
6530 unsigned int regno)
6532 rtx tmp;
6534 if (orig_mode != BLKmode)
6535 tmp = gen_rtx_REG (orig_mode, regno);
6536 else
6538 tmp = gen_rtx_REG (mode, regno);
6539 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6540 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6543 return tmp;
6546 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6547 of this code is to classify each 8bytes of incoming argument by the register
6548 class and assign registers accordingly. */
6550 /* Return the union class of CLASS1 and CLASS2.
6551 See the x86-64 PS ABI for details. */
6553 static enum x86_64_reg_class
6554 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6556 /* Rule #1: If both classes are equal, this is the resulting class. */
6557 if (class1 == class2)
6558 return class1;
6560 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6561 the other class. */
6562 if (class1 == X86_64_NO_CLASS)
6563 return class2;
6564 if (class2 == X86_64_NO_CLASS)
6565 return class1;
6567 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6568 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6569 return X86_64_MEMORY_CLASS;
6571 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6572 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6573 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6574 return X86_64_INTEGERSI_CLASS;
6575 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6576 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6577 return X86_64_INTEGER_CLASS;
6579 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6580 MEMORY is used. */
6581 if (class1 == X86_64_X87_CLASS
6582 || class1 == X86_64_X87UP_CLASS
6583 || class1 == X86_64_COMPLEX_X87_CLASS
6584 || class2 == X86_64_X87_CLASS
6585 || class2 == X86_64_X87UP_CLASS
6586 || class2 == X86_64_COMPLEX_X87_CLASS)
6587 return X86_64_MEMORY_CLASS;
6589 /* Rule #6: Otherwise class SSE is used. */
6590 return X86_64_SSE_CLASS;
6593 /* Classify the argument of type TYPE and mode MODE.
6594 CLASSES will be filled by the register class used to pass each word
6595 of the operand. The number of words is returned. In case the parameter
6596 should be passed in memory, 0 is returned. As a special case for zero
6597 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6599 BIT_OFFSET is used internally for handling records and specifies offset
6600 of the offset in bits modulo 512 to avoid overflow cases.
6602 See the x86-64 PS ABI for details.
6605 static int
6606 classify_argument (machine_mode mode, const_tree type,
6607 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6609 HOST_WIDE_INT bytes =
6610 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6611 int words
6612 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6614 /* Variable sized entities are always passed/returned in memory. */
6615 if (bytes < 0)
6616 return 0;
6618 if (mode != VOIDmode
6619 && targetm.calls.must_pass_in_stack (mode, type))
6620 return 0;
6622 if (type && AGGREGATE_TYPE_P (type))
6624 int i;
6625 tree field;
6626 enum x86_64_reg_class subclasses[MAX_CLASSES];
6628 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6629 if (bytes > 64)
6630 return 0;
6632 for (i = 0; i < words; i++)
6633 classes[i] = X86_64_NO_CLASS;
6635 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6636 signalize memory class, so handle it as special case. */
6637 if (!words)
6639 classes[0] = X86_64_NO_CLASS;
6640 return 1;
6643 /* Classify each field of record and merge classes. */
6644 switch (TREE_CODE (type))
6646 case RECORD_TYPE:
6647 /* And now merge the fields of structure. */
6648 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6650 if (TREE_CODE (field) == FIELD_DECL)
6652 int num;
6654 if (TREE_TYPE (field) == error_mark_node)
6655 continue;
6657 /* Bitfields are always classified as integer. Handle them
6658 early, since later code would consider them to be
6659 misaligned integers. */
6660 if (DECL_BIT_FIELD (field))
6662 for (i = (int_bit_position (field)
6663 + (bit_offset % 64)) / 8 / 8;
6664 i < ((int_bit_position (field) + (bit_offset % 64))
6665 + tree_to_shwi (DECL_SIZE (field))
6666 + 63) / 8 / 8; i++)
6667 classes[i] =
6668 merge_classes (X86_64_INTEGER_CLASS,
6669 classes[i]);
6671 else
6673 int pos;
6675 type = TREE_TYPE (field);
6677 /* Flexible array member is ignored. */
6678 if (TYPE_MODE (type) == BLKmode
6679 && TREE_CODE (type) == ARRAY_TYPE
6680 && TYPE_SIZE (type) == NULL_TREE
6681 && TYPE_DOMAIN (type) != NULL_TREE
6682 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6683 == NULL_TREE))
6685 static bool warned;
6687 if (!warned && warn_psabi)
6689 warned = true;
6690 inform (input_location,
6691 "the ABI of passing struct with"
6692 " a flexible array member has"
6693 " changed in GCC 4.4");
6695 continue;
6697 num = classify_argument (TYPE_MODE (type), type,
6698 subclasses,
6699 (int_bit_position (field)
6700 + bit_offset) % 512);
6701 if (!num)
6702 return 0;
6703 pos = (int_bit_position (field)
6704 + (bit_offset % 64)) / 8 / 8;
6705 for (i = 0; i < num && (i + pos) < words; i++)
6706 classes[i + pos] =
6707 merge_classes (subclasses[i], classes[i + pos]);
6711 break;
6713 case ARRAY_TYPE:
6714 /* Arrays are handled as small records. */
6716 int num;
6717 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6718 TREE_TYPE (type), subclasses, bit_offset);
6719 if (!num)
6720 return 0;
6722 /* The partial classes are now full classes. */
6723 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6724 subclasses[0] = X86_64_SSE_CLASS;
6725 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6726 && !((bit_offset % 64) == 0 && bytes == 4))
6727 subclasses[0] = X86_64_INTEGER_CLASS;
6729 for (i = 0; i < words; i++)
6730 classes[i] = subclasses[i % num];
6732 break;
6734 case UNION_TYPE:
6735 case QUAL_UNION_TYPE:
6736 /* Unions are similar to RECORD_TYPE but offset is always 0.
6738 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6740 if (TREE_CODE (field) == FIELD_DECL)
6742 int num;
6744 if (TREE_TYPE (field) == error_mark_node)
6745 continue;
6747 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6748 TREE_TYPE (field), subclasses,
6749 bit_offset);
6750 if (!num)
6751 return 0;
6752 for (i = 0; i < num && i < words; i++)
6753 classes[i] = merge_classes (subclasses[i], classes[i]);
6756 break;
6758 default:
6759 gcc_unreachable ();
6762 if (words > 2)
6764 /* When size > 16 bytes, if the first one isn't
6765 X86_64_SSE_CLASS or any other ones aren't
6766 X86_64_SSEUP_CLASS, everything should be passed in
6767 memory. */
6768 if (classes[0] != X86_64_SSE_CLASS)
6769 return 0;
6771 for (i = 1; i < words; i++)
6772 if (classes[i] != X86_64_SSEUP_CLASS)
6773 return 0;
6776 /* Final merger cleanup. */
6777 for (i = 0; i < words; i++)
6779 /* If one class is MEMORY, everything should be passed in
6780 memory. */
6781 if (classes[i] == X86_64_MEMORY_CLASS)
6782 return 0;
6784 /* The X86_64_SSEUP_CLASS should be always preceded by
6785 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6786 if (classes[i] == X86_64_SSEUP_CLASS
6787 && classes[i - 1] != X86_64_SSE_CLASS
6788 && classes[i - 1] != X86_64_SSEUP_CLASS)
6790 /* The first one should never be X86_64_SSEUP_CLASS. */
6791 gcc_assert (i != 0);
6792 classes[i] = X86_64_SSE_CLASS;
6795 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6796 everything should be passed in memory. */
6797 if (classes[i] == X86_64_X87UP_CLASS
6798 && (classes[i - 1] != X86_64_X87_CLASS))
6800 static bool warned;
6802 /* The first one should never be X86_64_X87UP_CLASS. */
6803 gcc_assert (i != 0);
6804 if (!warned && warn_psabi)
6806 warned = true;
6807 inform (input_location,
6808 "the ABI of passing union with long double"
6809 " has changed in GCC 4.4");
6811 return 0;
6814 return words;
6817 /* Compute alignment needed. We align all types to natural boundaries with
6818 exception of XFmode that is aligned to 64bits. */
6819 if (mode != VOIDmode && mode != BLKmode)
6821 int mode_alignment = GET_MODE_BITSIZE (mode);
6823 if (mode == XFmode)
6824 mode_alignment = 128;
6825 else if (mode == XCmode)
6826 mode_alignment = 256;
6827 if (COMPLEX_MODE_P (mode))
6828 mode_alignment /= 2;
6829 /* Misaligned fields are always returned in memory. */
6830 if (bit_offset % mode_alignment)
6831 return 0;
6834 /* for V1xx modes, just use the base mode */
6835 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6836 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6837 mode = GET_MODE_INNER (mode);
6839 /* Classification of atomic types. */
6840 switch (mode)
6842 case SDmode:
6843 case DDmode:
6844 classes[0] = X86_64_SSE_CLASS;
6845 return 1;
6846 case TDmode:
6847 classes[0] = X86_64_SSE_CLASS;
6848 classes[1] = X86_64_SSEUP_CLASS;
6849 return 2;
6850 case DImode:
6851 case SImode:
6852 case HImode:
6853 case QImode:
6854 case CSImode:
6855 case CHImode:
6856 case CQImode:
6858 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6860 /* Analyze last 128 bits only. */
6861 size = (size - 1) & 0x7f;
6863 if (size < 32)
6865 classes[0] = X86_64_INTEGERSI_CLASS;
6866 return 1;
6868 else if (size < 64)
6870 classes[0] = X86_64_INTEGER_CLASS;
6871 return 1;
6873 else if (size < 64+32)
6875 classes[0] = X86_64_INTEGER_CLASS;
6876 classes[1] = X86_64_INTEGERSI_CLASS;
6877 return 2;
6879 else if (size < 64+64)
6881 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6882 return 2;
6884 else
6885 gcc_unreachable ();
6887 case CDImode:
6888 case TImode:
6889 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6890 return 2;
6891 case COImode:
6892 case OImode:
6893 /* OImode shouldn't be used directly. */
6894 gcc_unreachable ();
6895 case CTImode:
6896 return 0;
6897 case SFmode:
6898 if (!(bit_offset % 64))
6899 classes[0] = X86_64_SSESF_CLASS;
6900 else
6901 classes[0] = X86_64_SSE_CLASS;
6902 return 1;
6903 case DFmode:
6904 classes[0] = X86_64_SSEDF_CLASS;
6905 return 1;
6906 case XFmode:
6907 classes[0] = X86_64_X87_CLASS;
6908 classes[1] = X86_64_X87UP_CLASS;
6909 return 2;
6910 case TFmode:
6911 classes[0] = X86_64_SSE_CLASS;
6912 classes[1] = X86_64_SSEUP_CLASS;
6913 return 2;
6914 case SCmode:
6915 classes[0] = X86_64_SSE_CLASS;
6916 if (!(bit_offset % 64))
6917 return 1;
6918 else
6920 static bool warned;
6922 if (!warned && warn_psabi)
6924 warned = true;
6925 inform (input_location,
6926 "the ABI of passing structure with complex float"
6927 " member has changed in GCC 4.4");
6929 classes[1] = X86_64_SSESF_CLASS;
6930 return 2;
6932 case DCmode:
6933 classes[0] = X86_64_SSEDF_CLASS;
6934 classes[1] = X86_64_SSEDF_CLASS;
6935 return 2;
6936 case XCmode:
6937 classes[0] = X86_64_COMPLEX_X87_CLASS;
6938 return 1;
6939 case TCmode:
6940 /* This modes is larger than 16 bytes. */
6941 return 0;
6942 case V8SFmode:
6943 case V8SImode:
6944 case V32QImode:
6945 case V16HImode:
6946 case V4DFmode:
6947 case V4DImode:
6948 classes[0] = X86_64_SSE_CLASS;
6949 classes[1] = X86_64_SSEUP_CLASS;
6950 classes[2] = X86_64_SSEUP_CLASS;
6951 classes[3] = X86_64_SSEUP_CLASS;
6952 return 4;
6953 case V8DFmode:
6954 case V16SFmode:
6955 case V8DImode:
6956 case V16SImode:
6957 case V32HImode:
6958 case V64QImode:
6959 classes[0] = X86_64_SSE_CLASS;
6960 classes[1] = X86_64_SSEUP_CLASS;
6961 classes[2] = X86_64_SSEUP_CLASS;
6962 classes[3] = X86_64_SSEUP_CLASS;
6963 classes[4] = X86_64_SSEUP_CLASS;
6964 classes[5] = X86_64_SSEUP_CLASS;
6965 classes[6] = X86_64_SSEUP_CLASS;
6966 classes[7] = X86_64_SSEUP_CLASS;
6967 return 8;
6968 case V4SFmode:
6969 case V4SImode:
6970 case V16QImode:
6971 case V8HImode:
6972 case V2DFmode:
6973 case V2DImode:
6974 classes[0] = X86_64_SSE_CLASS;
6975 classes[1] = X86_64_SSEUP_CLASS;
6976 return 2;
6977 case V1TImode:
6978 case V1DImode:
6979 case V2SFmode:
6980 case V2SImode:
6981 case V4HImode:
6982 case V8QImode:
6983 classes[0] = X86_64_SSE_CLASS;
6984 return 1;
6985 case BLKmode:
6986 case VOIDmode:
6987 return 0;
6988 default:
6989 gcc_assert (VECTOR_MODE_P (mode));
6991 if (bytes > 16)
6992 return 0;
6994 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6996 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6997 classes[0] = X86_64_INTEGERSI_CLASS;
6998 else
6999 classes[0] = X86_64_INTEGER_CLASS;
7000 classes[1] = X86_64_INTEGER_CLASS;
7001 return 1 + (bytes > 8);
7005 /* Examine the argument and return set number of register required in each
7006 class. Return true iff parameter should be passed in memory. */
7008 static bool
7009 examine_argument (machine_mode mode, const_tree type, int in_return,
7010 int *int_nregs, int *sse_nregs)
7012 enum x86_64_reg_class regclass[MAX_CLASSES];
7013 int n = classify_argument (mode, type, regclass, 0);
7015 *int_nregs = 0;
7016 *sse_nregs = 0;
7018 if (!n)
7019 return true;
7020 for (n--; n >= 0; n--)
7021 switch (regclass[n])
7023 case X86_64_INTEGER_CLASS:
7024 case X86_64_INTEGERSI_CLASS:
7025 (*int_nregs)++;
7026 break;
7027 case X86_64_SSE_CLASS:
7028 case X86_64_SSESF_CLASS:
7029 case X86_64_SSEDF_CLASS:
7030 (*sse_nregs)++;
7031 break;
7032 case X86_64_NO_CLASS:
7033 case X86_64_SSEUP_CLASS:
7034 break;
7035 case X86_64_X87_CLASS:
7036 case X86_64_X87UP_CLASS:
7037 case X86_64_COMPLEX_X87_CLASS:
7038 if (!in_return)
7039 return true;
7040 break;
7041 case X86_64_MEMORY_CLASS:
7042 gcc_unreachable ();
7045 return false;
7048 /* Construct container for the argument used by GCC interface. See
7049 FUNCTION_ARG for the detailed description. */
7051 static rtx
7052 construct_container (machine_mode mode, machine_mode orig_mode,
7053 const_tree type, int in_return, int nintregs, int nsseregs,
7054 const int *intreg, int sse_regno)
7056 /* The following variables hold the static issued_error state. */
7057 static bool issued_sse_arg_error;
7058 static bool issued_sse_ret_error;
7059 static bool issued_x87_ret_error;
7061 machine_mode tmpmode;
7062 int bytes =
7063 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7064 enum x86_64_reg_class regclass[MAX_CLASSES];
7065 int n;
7066 int i;
7067 int nexps = 0;
7068 int needed_sseregs, needed_intregs;
7069 rtx exp[MAX_CLASSES];
7070 rtx ret;
7072 n = classify_argument (mode, type, regclass, 0);
7073 if (!n)
7074 return NULL;
7075 if (examine_argument (mode, type, in_return, &needed_intregs,
7076 &needed_sseregs))
7077 return NULL;
7078 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7079 return NULL;
7081 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7082 some less clueful developer tries to use floating-point anyway. */
7083 if (needed_sseregs && !TARGET_SSE)
7085 if (in_return)
7087 if (!issued_sse_ret_error)
7089 error ("SSE register return with SSE disabled");
7090 issued_sse_ret_error = true;
7093 else if (!issued_sse_arg_error)
7095 error ("SSE register argument with SSE disabled");
7096 issued_sse_arg_error = true;
7098 return NULL;
7101 /* Likewise, error if the ABI requires us to return values in the
7102 x87 registers and the user specified -mno-80387. */
7103 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7104 for (i = 0; i < n; i++)
7105 if (regclass[i] == X86_64_X87_CLASS
7106 || regclass[i] == X86_64_X87UP_CLASS
7107 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7109 if (!issued_x87_ret_error)
7111 error ("x87 register return with x87 disabled");
7112 issued_x87_ret_error = true;
7114 return NULL;
7117 /* First construct simple cases. Avoid SCmode, since we want to use
7118 single register to pass this type. */
7119 if (n == 1 && mode != SCmode)
7120 switch (regclass[0])
7122 case X86_64_INTEGER_CLASS:
7123 case X86_64_INTEGERSI_CLASS:
7124 return gen_rtx_REG (mode, intreg[0]);
7125 case X86_64_SSE_CLASS:
7126 case X86_64_SSESF_CLASS:
7127 case X86_64_SSEDF_CLASS:
7128 if (mode != BLKmode)
7129 return gen_reg_or_parallel (mode, orig_mode,
7130 SSE_REGNO (sse_regno));
7131 break;
7132 case X86_64_X87_CLASS:
7133 case X86_64_COMPLEX_X87_CLASS:
7134 return gen_rtx_REG (mode, FIRST_STACK_REG);
7135 case X86_64_NO_CLASS:
7136 /* Zero sized array, struct or class. */
7137 return NULL;
7138 default:
7139 gcc_unreachable ();
7141 if (n == 2
7142 && regclass[0] == X86_64_SSE_CLASS
7143 && regclass[1] == X86_64_SSEUP_CLASS
7144 && mode != BLKmode)
7145 return gen_reg_or_parallel (mode, orig_mode,
7146 SSE_REGNO (sse_regno));
7147 if (n == 4
7148 && regclass[0] == X86_64_SSE_CLASS
7149 && regclass[1] == X86_64_SSEUP_CLASS
7150 && regclass[2] == X86_64_SSEUP_CLASS
7151 && regclass[3] == X86_64_SSEUP_CLASS
7152 && mode != BLKmode)
7153 return gen_reg_or_parallel (mode, orig_mode,
7154 SSE_REGNO (sse_regno));
7155 if (n == 8
7156 && regclass[0] == X86_64_SSE_CLASS
7157 && regclass[1] == X86_64_SSEUP_CLASS
7158 && regclass[2] == X86_64_SSEUP_CLASS
7159 && regclass[3] == X86_64_SSEUP_CLASS
7160 && regclass[4] == X86_64_SSEUP_CLASS
7161 && regclass[5] == X86_64_SSEUP_CLASS
7162 && regclass[6] == X86_64_SSEUP_CLASS
7163 && regclass[7] == X86_64_SSEUP_CLASS
7164 && mode != BLKmode)
7165 return gen_reg_or_parallel (mode, orig_mode,
7166 SSE_REGNO (sse_regno));
7167 if (n == 2
7168 && regclass[0] == X86_64_X87_CLASS
7169 && regclass[1] == X86_64_X87UP_CLASS)
7170 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7172 if (n == 2
7173 && regclass[0] == X86_64_INTEGER_CLASS
7174 && regclass[1] == X86_64_INTEGER_CLASS
7175 && (mode == CDImode || mode == TImode)
7176 && intreg[0] + 1 == intreg[1])
7177 return gen_rtx_REG (mode, intreg[0]);
7179 /* Otherwise figure out the entries of the PARALLEL. */
7180 for (i = 0; i < n; i++)
7182 int pos;
7184 switch (regclass[i])
7186 case X86_64_NO_CLASS:
7187 break;
7188 case X86_64_INTEGER_CLASS:
7189 case X86_64_INTEGERSI_CLASS:
7190 /* Merge TImodes on aligned occasions here too. */
7191 if (i * 8 + 8 > bytes)
7192 tmpmode
7193 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7194 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7195 tmpmode = SImode;
7196 else
7197 tmpmode = DImode;
7198 /* We've requested 24 bytes we
7199 don't have mode for. Use DImode. */
7200 if (tmpmode == BLKmode)
7201 tmpmode = DImode;
7202 exp [nexps++]
7203 = gen_rtx_EXPR_LIST (VOIDmode,
7204 gen_rtx_REG (tmpmode, *intreg),
7205 GEN_INT (i*8));
7206 intreg++;
7207 break;
7208 case X86_64_SSESF_CLASS:
7209 exp [nexps++]
7210 = gen_rtx_EXPR_LIST (VOIDmode,
7211 gen_rtx_REG (SFmode,
7212 SSE_REGNO (sse_regno)),
7213 GEN_INT (i*8));
7214 sse_regno++;
7215 break;
7216 case X86_64_SSEDF_CLASS:
7217 exp [nexps++]
7218 = gen_rtx_EXPR_LIST (VOIDmode,
7219 gen_rtx_REG (DFmode,
7220 SSE_REGNO (sse_regno)),
7221 GEN_INT (i*8));
7222 sse_regno++;
7223 break;
7224 case X86_64_SSE_CLASS:
7225 pos = i;
7226 switch (n)
7228 case 1:
7229 tmpmode = DImode;
7230 break;
7231 case 2:
7232 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7234 tmpmode = TImode;
7235 i++;
7237 else
7238 tmpmode = DImode;
7239 break;
7240 case 4:
7241 gcc_assert (i == 0
7242 && regclass[1] == X86_64_SSEUP_CLASS
7243 && regclass[2] == X86_64_SSEUP_CLASS
7244 && regclass[3] == X86_64_SSEUP_CLASS);
7245 tmpmode = OImode;
7246 i += 3;
7247 break;
7248 case 8:
7249 gcc_assert (i == 0
7250 && regclass[1] == X86_64_SSEUP_CLASS
7251 && regclass[2] == X86_64_SSEUP_CLASS
7252 && regclass[3] == X86_64_SSEUP_CLASS
7253 && regclass[4] == X86_64_SSEUP_CLASS
7254 && regclass[5] == X86_64_SSEUP_CLASS
7255 && regclass[6] == X86_64_SSEUP_CLASS
7256 && regclass[7] == X86_64_SSEUP_CLASS);
7257 tmpmode = XImode;
7258 i += 7;
7259 break;
7260 default:
7261 gcc_unreachable ();
7263 exp [nexps++]
7264 = gen_rtx_EXPR_LIST (VOIDmode,
7265 gen_rtx_REG (tmpmode,
7266 SSE_REGNO (sse_regno)),
7267 GEN_INT (pos*8));
7268 sse_regno++;
7269 break;
7270 default:
7271 gcc_unreachable ();
7275 /* Empty aligned struct, union or class. */
7276 if (nexps == 0)
7277 return NULL;
7279 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7280 for (i = 0; i < nexps; i++)
7281 XVECEXP (ret, 0, i) = exp [i];
7282 return ret;
7285 /* Update the data in CUM to advance over an argument of mode MODE
7286 and data type TYPE. (TYPE is null for libcalls where that information
7287 may not be available.)
7289 Return a number of integer regsiters advanced over. */
7291 static int
7292 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7293 const_tree type, HOST_WIDE_INT bytes,
7294 HOST_WIDE_INT words)
7296 int res = 0;
7298 switch (mode)
7300 default:
7301 break;
7303 case BLKmode:
7304 if (bytes < 0)
7305 break;
7306 /* FALLTHRU */
7308 case DImode:
7309 case SImode:
7310 case HImode:
7311 case QImode:
7312 cum->words += words;
7313 cum->nregs -= words;
7314 cum->regno += words;
7315 if (cum->nregs >= 0)
7316 res = words;
7317 if (cum->nregs <= 0)
7319 cum->nregs = 0;
7320 cum->regno = 0;
7322 break;
7324 case OImode:
7325 /* OImode shouldn't be used directly. */
7326 gcc_unreachable ();
7328 case DFmode:
7329 if (cum->float_in_sse < 2)
7330 break;
7331 case SFmode:
7332 if (cum->float_in_sse < 1)
7333 break;
7334 /* FALLTHRU */
7336 case V8SFmode:
7337 case V8SImode:
7338 case V64QImode:
7339 case V32HImode:
7340 case V16SImode:
7341 case V8DImode:
7342 case V16SFmode:
7343 case V8DFmode:
7344 case V32QImode:
7345 case V16HImode:
7346 case V4DFmode:
7347 case V4DImode:
7348 case TImode:
7349 case V16QImode:
7350 case V8HImode:
7351 case V4SImode:
7352 case V2DImode:
7353 case V4SFmode:
7354 case V2DFmode:
7355 if (!type || !AGGREGATE_TYPE_P (type))
7357 cum->sse_words += words;
7358 cum->sse_nregs -= 1;
7359 cum->sse_regno += 1;
7360 if (cum->sse_nregs <= 0)
7362 cum->sse_nregs = 0;
7363 cum->sse_regno = 0;
7366 break;
7368 case V8QImode:
7369 case V4HImode:
7370 case V2SImode:
7371 case V2SFmode:
7372 case V1TImode:
7373 case V1DImode:
7374 if (!type || !AGGREGATE_TYPE_P (type))
7376 cum->mmx_words += words;
7377 cum->mmx_nregs -= 1;
7378 cum->mmx_regno += 1;
7379 if (cum->mmx_nregs <= 0)
7381 cum->mmx_nregs = 0;
7382 cum->mmx_regno = 0;
7385 break;
7388 return res;
7391 static int
7392 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
7393 const_tree type, HOST_WIDE_INT words, bool named)
7395 int int_nregs, sse_nregs;
7397 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7398 if (!named && (VALID_AVX512F_REG_MODE (mode)
7399 || VALID_AVX256_REG_MODE (mode)))
7400 return 0;
7402 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7403 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7405 cum->nregs -= int_nregs;
7406 cum->sse_nregs -= sse_nregs;
7407 cum->regno += int_nregs;
7408 cum->sse_regno += sse_nregs;
7409 return int_nregs;
7411 else
7413 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7414 cum->words = (cum->words + align - 1) & ~(align - 1);
7415 cum->words += words;
7416 return 0;
7420 static int
7421 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7422 HOST_WIDE_INT words)
7424 /* Otherwise, this should be passed indirect. */
7425 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7427 cum->words += words;
7428 if (cum->nregs > 0)
7430 cum->nregs -= 1;
7431 cum->regno += 1;
7432 return 1;
7434 return 0;
7437 /* Update the data in CUM to advance over an argument of mode MODE and
7438 data type TYPE. (TYPE is null for libcalls where that information
7439 may not be available.) */
7441 static void
7442 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7443 const_tree type, bool named)
7445 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7446 HOST_WIDE_INT bytes, words;
7447 int nregs;
7449 if (mode == BLKmode)
7450 bytes = int_size_in_bytes (type);
7451 else
7452 bytes = GET_MODE_SIZE (mode);
7453 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7455 if (type)
7456 mode = type_natural_mode (type, NULL, false);
7458 if ((type && POINTER_BOUNDS_TYPE_P (type))
7459 || POINTER_BOUNDS_MODE_P (mode))
7461 /* If we pass bounds in BT then just update remained bounds count. */
7462 if (cum->bnds_in_bt)
7464 cum->bnds_in_bt--;
7465 return;
7468 /* Update remained number of bounds to force. */
7469 if (cum->force_bnd_pass)
7470 cum->force_bnd_pass--;
7472 cum->bnd_regno++;
7474 return;
7477 /* The first arg not going to Bounds Tables resets this counter. */
7478 cum->bnds_in_bt = 0;
7479 /* For unnamed args we always pass bounds to avoid bounds mess when
7480 passed and received types do not match. If bounds do not follow
7481 unnamed arg, still pretend required number of bounds were passed. */
7482 if (cum->force_bnd_pass)
7484 cum->bnd_regno += cum->force_bnd_pass;
7485 cum->force_bnd_pass = 0;
7488 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7489 nregs = function_arg_advance_ms_64 (cum, bytes, words);
7490 else if (TARGET_64BIT)
7491 nregs = function_arg_advance_64 (cum, mode, type, words, named);
7492 else
7493 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
7495 /* For stdarg we expect bounds to be passed for each value passed
7496 in register. */
7497 if (cum->stdarg)
7498 cum->force_bnd_pass = nregs;
7499 /* For pointers passed in memory we expect bounds passed in Bounds
7500 Table. */
7501 if (!nregs)
7502 cum->bnds_in_bt = chkp_type_bounds_count (type);
7505 /* Define where to put the arguments to a function.
7506 Value is zero to push the argument on the stack,
7507 or a hard register in which to store the argument.
7509 MODE is the argument's machine mode.
7510 TYPE is the data type of the argument (as a tree).
7511 This is null for libcalls where that information may
7512 not be available.
7513 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7514 the preceding args and about the function being called.
7515 NAMED is nonzero if this argument is a named parameter
7516 (otherwise it is an extra parameter matching an ellipsis). */
7518 static rtx
7519 function_arg_32 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7520 machine_mode orig_mode, const_tree type,
7521 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7523 /* Avoid the AL settings for the Unix64 ABI. */
7524 if (mode == VOIDmode)
7525 return constm1_rtx;
7527 switch (mode)
7529 default:
7530 break;
7532 case BLKmode:
7533 if (bytes < 0)
7534 break;
7535 /* FALLTHRU */
7536 case DImode:
7537 case SImode:
7538 case HImode:
7539 case QImode:
7540 if (words <= cum->nregs)
7542 int regno = cum->regno;
7544 /* Fastcall allocates the first two DWORD (SImode) or
7545 smaller arguments to ECX and EDX if it isn't an
7546 aggregate type . */
7547 if (cum->fastcall)
7549 if (mode == BLKmode
7550 || mode == DImode
7551 || (type && AGGREGATE_TYPE_P (type)))
7552 break;
7554 /* ECX not EAX is the first allocated register. */
7555 if (regno == AX_REG)
7556 regno = CX_REG;
7558 return gen_rtx_REG (mode, regno);
7560 break;
7562 case DFmode:
7563 if (cum->float_in_sse < 2)
7564 break;
7565 case SFmode:
7566 if (cum->float_in_sse < 1)
7567 break;
7568 /* FALLTHRU */
7569 case TImode:
7570 /* In 32bit, we pass TImode in xmm registers. */
7571 case V16QImode:
7572 case V8HImode:
7573 case V4SImode:
7574 case V2DImode:
7575 case V4SFmode:
7576 case V2DFmode:
7577 if (!type || !AGGREGATE_TYPE_P (type))
7579 if (cum->sse_nregs)
7580 return gen_reg_or_parallel (mode, orig_mode,
7581 cum->sse_regno + FIRST_SSE_REG);
7583 break;
7585 case OImode:
7586 case XImode:
7587 /* OImode and XImode shouldn't be used directly. */
7588 gcc_unreachable ();
7590 case V64QImode:
7591 case V32HImode:
7592 case V16SImode:
7593 case V8DImode:
7594 case V16SFmode:
7595 case V8DFmode:
7596 case V8SFmode:
7597 case V8SImode:
7598 case V32QImode:
7599 case V16HImode:
7600 case V4DFmode:
7601 case V4DImode:
7602 if (!type || !AGGREGATE_TYPE_P (type))
7604 if (cum->sse_nregs)
7605 return gen_reg_or_parallel (mode, orig_mode,
7606 cum->sse_regno + FIRST_SSE_REG);
7608 break;
7610 case V8QImode:
7611 case V4HImode:
7612 case V2SImode:
7613 case V2SFmode:
7614 case V1TImode:
7615 case V1DImode:
7616 if (!type || !AGGREGATE_TYPE_P (type))
7618 if (cum->mmx_nregs)
7619 return gen_reg_or_parallel (mode, orig_mode,
7620 cum->mmx_regno + FIRST_MMX_REG);
7622 break;
7625 return NULL_RTX;
7628 static rtx
7629 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7630 machine_mode orig_mode, const_tree type, bool named)
7632 /* Handle a hidden AL argument containing number of registers
7633 for varargs x86-64 functions. */
7634 if (mode == VOIDmode)
7635 return GEN_INT (cum->maybe_vaarg
7636 ? (cum->sse_nregs < 0
7637 ? X86_64_SSE_REGPARM_MAX
7638 : cum->sse_regno)
7639 : -1);
7641 switch (mode)
7643 default:
7644 break;
7646 case V8SFmode:
7647 case V8SImode:
7648 case V32QImode:
7649 case V16HImode:
7650 case V4DFmode:
7651 case V4DImode:
7652 case V16SFmode:
7653 case V16SImode:
7654 case V64QImode:
7655 case V32HImode:
7656 case V8DFmode:
7657 case V8DImode:
7658 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7659 if (!named)
7660 return NULL;
7661 break;
7664 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7665 cum->sse_nregs,
7666 &x86_64_int_parameter_registers [cum->regno],
7667 cum->sse_regno);
7670 static rtx
7671 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7672 machine_mode orig_mode, bool named,
7673 HOST_WIDE_INT bytes)
7675 unsigned int regno;
7677 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7678 We use value of -2 to specify that current function call is MSABI. */
7679 if (mode == VOIDmode)
7680 return GEN_INT (-2);
7682 /* If we've run out of registers, it goes on the stack. */
7683 if (cum->nregs == 0)
7684 return NULL_RTX;
7686 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7688 /* Only floating point modes are passed in anything but integer regs. */
7689 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7691 if (named)
7692 regno = cum->regno + FIRST_SSE_REG;
7693 else
7695 rtx t1, t2;
7697 /* Unnamed floating parameters are passed in both the
7698 SSE and integer registers. */
7699 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7700 t2 = gen_rtx_REG (mode, regno);
7701 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7702 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7703 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7706 /* Handle aggregated types passed in register. */
7707 if (orig_mode == BLKmode)
7709 if (bytes > 0 && bytes <= 8)
7710 mode = (bytes > 4 ? DImode : SImode);
7711 if (mode == BLKmode)
7712 mode = DImode;
7715 return gen_reg_or_parallel (mode, orig_mode, regno);
7718 /* Return where to put the arguments to a function.
7719 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7721 MODE is the argument's machine mode. TYPE is the data type of the
7722 argument. It is null for libcalls where that information may not be
7723 available. CUM gives information about the preceding args and about
7724 the function being called. NAMED is nonzero if this argument is a
7725 named parameter (otherwise it is an extra parameter matching an
7726 ellipsis). */
7728 static rtx
7729 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
7730 const_tree type, bool named)
7732 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7733 machine_mode mode = omode;
7734 HOST_WIDE_INT bytes, words;
7735 rtx arg;
7737 /* All pointer bounds argumntas are handled separately here. */
7738 if ((type && POINTER_BOUNDS_TYPE_P (type))
7739 || POINTER_BOUNDS_MODE_P (mode))
7741 /* Return NULL if bounds are forced to go in Bounds Table. */
7742 if (cum->bnds_in_bt)
7743 arg = NULL;
7744 /* Return the next available bound reg if any. */
7745 else if (cum->bnd_regno <= LAST_BND_REG)
7746 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
7747 /* Return the next special slot number otherwise. */
7748 else
7749 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
7751 return arg;
7754 if (mode == BLKmode)
7755 bytes = int_size_in_bytes (type);
7756 else
7757 bytes = GET_MODE_SIZE (mode);
7758 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7760 /* To simplify the code below, represent vector types with a vector mode
7761 even if MMX/SSE are not active. */
7762 if (type && TREE_CODE (type) == VECTOR_TYPE)
7763 mode = type_natural_mode (type, cum, false);
7765 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7766 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7767 else if (TARGET_64BIT)
7768 arg = function_arg_64 (cum, mode, omode, type, named);
7769 else
7770 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7772 return arg;
7775 /* A C expression that indicates when an argument must be passed by
7776 reference. If nonzero for an argument, a copy of that argument is
7777 made in memory and a pointer to the argument is passed instead of
7778 the argument itself. The pointer is passed in whatever way is
7779 appropriate for passing a pointer to that type. */
7781 static bool
7782 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7783 const_tree type, bool)
7785 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7787 /* See Windows x64 Software Convention. */
7788 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7790 int msize = (int) GET_MODE_SIZE (mode);
7791 if (type)
7793 /* Arrays are passed by reference. */
7794 if (TREE_CODE (type) == ARRAY_TYPE)
7795 return true;
7797 if (AGGREGATE_TYPE_P (type))
7799 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7800 are passed by reference. */
7801 msize = int_size_in_bytes (type);
7805 /* __m128 is passed by reference. */
7806 switch (msize) {
7807 case 1: case 2: case 4: case 8:
7808 break;
7809 default:
7810 return true;
7813 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7814 return 1;
7816 return 0;
7819 /* Return true when TYPE should be 128bit aligned for 32bit argument
7820 passing ABI. XXX: This function is obsolete and is only used for
7821 checking psABI compatibility with previous versions of GCC. */
7823 static bool
7824 ix86_compat_aligned_value_p (const_tree type)
7826 machine_mode mode = TYPE_MODE (type);
7827 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7828 || mode == TDmode
7829 || mode == TFmode
7830 || mode == TCmode)
7831 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7832 return true;
7833 if (TYPE_ALIGN (type) < 128)
7834 return false;
7836 if (AGGREGATE_TYPE_P (type))
7838 /* Walk the aggregates recursively. */
7839 switch (TREE_CODE (type))
7841 case RECORD_TYPE:
7842 case UNION_TYPE:
7843 case QUAL_UNION_TYPE:
7845 tree field;
7847 /* Walk all the structure fields. */
7848 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7850 if (TREE_CODE (field) == FIELD_DECL
7851 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7852 return true;
7854 break;
7857 case ARRAY_TYPE:
7858 /* Just for use if some languages passes arrays by value. */
7859 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7860 return true;
7861 break;
7863 default:
7864 gcc_unreachable ();
7867 return false;
7870 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7871 XXX: This function is obsolete and is only used for checking psABI
7872 compatibility with previous versions of GCC. */
7874 static unsigned int
7875 ix86_compat_function_arg_boundary (machine_mode mode,
7876 const_tree type, unsigned int align)
7878 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7879 natural boundaries. */
7880 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7882 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7883 make an exception for SSE modes since these require 128bit
7884 alignment.
7886 The handling here differs from field_alignment. ICC aligns MMX
7887 arguments to 4 byte boundaries, while structure fields are aligned
7888 to 8 byte boundaries. */
7889 if (!type)
7891 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7892 align = PARM_BOUNDARY;
7894 else
7896 if (!ix86_compat_aligned_value_p (type))
7897 align = PARM_BOUNDARY;
7900 if (align > BIGGEST_ALIGNMENT)
7901 align = BIGGEST_ALIGNMENT;
7902 return align;
7905 /* Return true when TYPE should be 128bit aligned for 32bit argument
7906 passing ABI. */
7908 static bool
7909 ix86_contains_aligned_value_p (const_tree type)
7911 machine_mode mode = TYPE_MODE (type);
7913 if (mode == XFmode || mode == XCmode)
7914 return false;
7916 if (TYPE_ALIGN (type) < 128)
7917 return false;
7919 if (AGGREGATE_TYPE_P (type))
7921 /* Walk the aggregates recursively. */
7922 switch (TREE_CODE (type))
7924 case RECORD_TYPE:
7925 case UNION_TYPE:
7926 case QUAL_UNION_TYPE:
7928 tree field;
7930 /* Walk all the structure fields. */
7931 for (field = TYPE_FIELDS (type);
7932 field;
7933 field = DECL_CHAIN (field))
7935 if (TREE_CODE (field) == FIELD_DECL
7936 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7937 return true;
7939 break;
7942 case ARRAY_TYPE:
7943 /* Just for use if some languages passes arrays by value. */
7944 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7945 return true;
7946 break;
7948 default:
7949 gcc_unreachable ();
7952 else
7953 return TYPE_ALIGN (type) >= 128;
7955 return false;
7958 /* Gives the alignment boundary, in bits, of an argument with the
7959 specified mode and type. */
7961 static unsigned int
7962 ix86_function_arg_boundary (machine_mode mode, const_tree type)
7964 unsigned int align;
7965 if (type)
7967 /* Since the main variant type is used for call, we convert it to
7968 the main variant type. */
7969 type = TYPE_MAIN_VARIANT (type);
7970 align = TYPE_ALIGN (type);
7972 else
7973 align = GET_MODE_ALIGNMENT (mode);
7974 if (align < PARM_BOUNDARY)
7975 align = PARM_BOUNDARY;
7976 else
7978 static bool warned;
7979 unsigned int saved_align = align;
7981 if (!TARGET_64BIT)
7983 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7984 if (!type)
7986 if (mode == XFmode || mode == XCmode)
7987 align = PARM_BOUNDARY;
7989 else if (!ix86_contains_aligned_value_p (type))
7990 align = PARM_BOUNDARY;
7992 if (align < 128)
7993 align = PARM_BOUNDARY;
7996 if (warn_psabi
7997 && !warned
7998 && align != ix86_compat_function_arg_boundary (mode, type,
7999 saved_align))
8001 warned = true;
8002 inform (input_location,
8003 "The ABI for passing parameters with %d-byte"
8004 " alignment has changed in GCC 4.6",
8005 align / BITS_PER_UNIT);
8009 return align;
8012 /* Return true if N is a possible register number of function value. */
8014 static bool
8015 ix86_function_value_regno_p (const unsigned int regno)
8017 switch (regno)
8019 case AX_REG:
8020 return true;
8021 case DX_REG:
8022 return (!TARGET_64BIT || ix86_abi != MS_ABI);
8023 case DI_REG:
8024 case SI_REG:
8025 return TARGET_64BIT && ix86_abi != MS_ABI;
8027 case FIRST_BND_REG:
8028 return chkp_function_instrumented_p (current_function_decl);
8030 /* Complex values are returned in %st(0)/%st(1) pair. */
8031 case ST0_REG:
8032 case ST1_REG:
8033 /* TODO: The function should depend on current function ABI but
8034 builtins.c would need updating then. Therefore we use the
8035 default ABI. */
8036 if (TARGET_64BIT && ix86_abi == MS_ABI)
8037 return false;
8038 return TARGET_FLOAT_RETURNS_IN_80387;
8040 /* Complex values are returned in %xmm0/%xmm1 pair. */
8041 case XMM0_REG:
8042 case XMM1_REG:
8043 return TARGET_SSE;
8045 case MM0_REG:
8046 if (TARGET_MACHO || TARGET_64BIT)
8047 return false;
8048 return TARGET_MMX;
8051 return false;
8054 /* Define how to find the value returned by a function.
8055 VALTYPE is the data type of the value (as a tree).
8056 If the precise function being called is known, FUNC is its FUNCTION_DECL;
8057 otherwise, FUNC is 0. */
8059 static rtx
8060 function_value_32 (machine_mode orig_mode, machine_mode mode,
8061 const_tree fntype, const_tree fn)
8063 unsigned int regno;
8065 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8066 we normally prevent this case when mmx is not available. However
8067 some ABIs may require the result to be returned like DImode. */
8068 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8069 regno = FIRST_MMX_REG;
8071 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
8072 we prevent this case when sse is not available. However some ABIs
8073 may require the result to be returned like integer TImode. */
8074 else if (mode == TImode
8075 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8076 regno = FIRST_SSE_REG;
8078 /* 32-byte vector modes in %ymm0. */
8079 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8080 regno = FIRST_SSE_REG;
8082 /* 64-byte vector modes in %zmm0. */
8083 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8084 regno = FIRST_SSE_REG;
8086 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
8087 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8088 regno = FIRST_FLOAT_REG;
8089 else
8090 /* Most things go in %eax. */
8091 regno = AX_REG;
8093 /* Override FP return register with %xmm0 for local functions when
8094 SSE math is enabled or for functions with sseregparm attribute. */
8095 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8097 int sse_level = ix86_function_sseregparm (fntype, fn, false);
8098 if ((sse_level >= 1 && mode == SFmode)
8099 || (sse_level == 2 && mode == DFmode))
8100 regno = FIRST_SSE_REG;
8103 /* OImode shouldn't be used directly. */
8104 gcc_assert (mode != OImode);
8106 return gen_rtx_REG (orig_mode, regno);
8109 static rtx
8110 function_value_64 (machine_mode orig_mode, machine_mode mode,
8111 const_tree valtype)
8113 rtx ret;
8115 /* Handle libcalls, which don't provide a type node. */
8116 if (valtype == NULL)
8118 unsigned int regno;
8120 switch (mode)
8122 case SFmode:
8123 case SCmode:
8124 case DFmode:
8125 case DCmode:
8126 case TFmode:
8127 case SDmode:
8128 case DDmode:
8129 case TDmode:
8130 regno = FIRST_SSE_REG;
8131 break;
8132 case XFmode:
8133 case XCmode:
8134 regno = FIRST_FLOAT_REG;
8135 break;
8136 case TCmode:
8137 return NULL;
8138 default:
8139 regno = AX_REG;
8142 return gen_rtx_REG (mode, regno);
8144 else if (POINTER_TYPE_P (valtype))
8146 /* Pointers are always returned in word_mode. */
8147 mode = word_mode;
8150 ret = construct_container (mode, orig_mode, valtype, 1,
8151 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8152 x86_64_int_return_registers, 0);
8154 /* For zero sized structures, construct_container returns NULL, but we
8155 need to keep rest of compiler happy by returning meaningful value. */
8156 if (!ret)
8157 ret = gen_rtx_REG (orig_mode, AX_REG);
8159 return ret;
8162 static rtx
8163 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
8164 const_tree valtype)
8166 unsigned int regno = AX_REG;
8168 if (TARGET_SSE)
8170 switch (GET_MODE_SIZE (mode))
8172 case 16:
8173 if (valtype != NULL_TREE
8174 && !VECTOR_INTEGER_TYPE_P (valtype)
8175 && !VECTOR_INTEGER_TYPE_P (valtype)
8176 && !INTEGRAL_TYPE_P (valtype)
8177 && !VECTOR_FLOAT_TYPE_P (valtype))
8178 break;
8179 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8180 && !COMPLEX_MODE_P (mode))
8181 regno = FIRST_SSE_REG;
8182 break;
8183 case 8:
8184 case 4:
8185 if (mode == SFmode || mode == DFmode)
8186 regno = FIRST_SSE_REG;
8187 break;
8188 default:
8189 break;
8192 return gen_rtx_REG (orig_mode, regno);
8195 static rtx
8196 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8197 machine_mode orig_mode, machine_mode mode)
8199 const_tree fn, fntype;
8201 fn = NULL_TREE;
8202 if (fntype_or_decl && DECL_P (fntype_or_decl))
8203 fn = fntype_or_decl;
8204 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8206 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8207 || POINTER_BOUNDS_MODE_P (mode))
8208 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8209 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8210 return function_value_ms_64 (orig_mode, mode, valtype);
8211 else if (TARGET_64BIT)
8212 return function_value_64 (orig_mode, mode, valtype);
8213 else
8214 return function_value_32 (orig_mode, mode, fntype, fn);
8217 static rtx
8218 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8220 machine_mode mode, orig_mode;
8222 orig_mode = TYPE_MODE (valtype);
8223 mode = type_natural_mode (valtype, NULL, true);
8224 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8227 /* Return an RTX representing a place where a function returns
8228 or recieves pointer bounds or NULL if no bounds are returned.
8230 VALTYPE is a data type of a value returned by the function.
8232 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
8233 or FUNCTION_TYPE of the function.
8235 If OUTGOING is false, return a place in which the caller will
8236 see the return value. Otherwise, return a place where a
8237 function returns a value. */
8239 static rtx
8240 ix86_function_value_bounds (const_tree valtype,
8241 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
8242 bool outgoing ATTRIBUTE_UNUSED)
8244 rtx res = NULL_RTX;
8246 if (BOUNDED_TYPE_P (valtype))
8247 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
8248 else if (chkp_type_has_pointer (valtype))
8250 bitmap slots;
8251 rtx bounds[2];
8252 bitmap_iterator bi;
8253 unsigned i, bnd_no = 0;
8255 bitmap_obstack_initialize (NULL);
8256 slots = BITMAP_ALLOC (NULL);
8257 chkp_find_bound_slots (valtype, slots);
8259 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
8261 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
8262 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
8263 gcc_assert (bnd_no < 2);
8264 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
8267 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
8269 BITMAP_FREE (slots);
8270 bitmap_obstack_release (NULL);
8272 else
8273 res = NULL_RTX;
8275 return res;
8278 /* Pointer function arguments and return values are promoted to
8279 word_mode. */
8281 static machine_mode
8282 ix86_promote_function_mode (const_tree type, machine_mode mode,
8283 int *punsignedp, const_tree fntype,
8284 int for_return)
8286 if (type != NULL_TREE && POINTER_TYPE_P (type))
8288 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8289 return word_mode;
8291 return default_promote_function_mode (type, mode, punsignedp, fntype,
8292 for_return);
8295 /* Return true if a structure, union or array with MODE containing FIELD
8296 should be accessed using BLKmode. */
8298 static bool
8299 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
8301 /* Union with XFmode must be in BLKmode. */
8302 return (mode == XFmode
8303 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8304 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8308 ix86_libcall_value (machine_mode mode)
8310 return ix86_function_value_1 (NULL, NULL, mode, mode);
8313 /* Return true iff type is returned in memory. */
8315 static bool
8316 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8318 #ifdef SUBTARGET_RETURN_IN_MEMORY
8319 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8320 #else
8321 const machine_mode mode = type_natural_mode (type, NULL, true);
8322 HOST_WIDE_INT size;
8324 if (POINTER_BOUNDS_TYPE_P (type))
8325 return false;
8327 if (TARGET_64BIT)
8329 if (ix86_function_type_abi (fntype) == MS_ABI)
8331 size = int_size_in_bytes (type);
8333 /* __m128 is returned in xmm0. */
8334 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8335 || INTEGRAL_TYPE_P (type)
8336 || VECTOR_FLOAT_TYPE_P (type))
8337 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8338 && !COMPLEX_MODE_P (mode)
8339 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8340 return false;
8342 /* Otherwise, the size must be exactly in [1248]. */
8343 return size != 1 && size != 2 && size != 4 && size != 8;
8345 else
8347 int needed_intregs, needed_sseregs;
8349 return examine_argument (mode, type, 1,
8350 &needed_intregs, &needed_sseregs);
8353 else
8355 if (mode == BLKmode)
8356 return true;
8358 size = int_size_in_bytes (type);
8360 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8361 return false;
8363 if (VECTOR_MODE_P (mode) || mode == TImode)
8365 /* User-created vectors small enough to fit in EAX. */
8366 if (size < 8)
8367 return false;
8369 /* Unless ABI prescibes otherwise,
8370 MMX/3dNow values are returned in MM0 if available. */
8372 if (size == 8)
8373 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8375 /* SSE values are returned in XMM0 if available. */
8376 if (size == 16)
8377 return !TARGET_SSE;
8379 /* AVX values are returned in YMM0 if available. */
8380 if (size == 32)
8381 return !TARGET_AVX;
8383 /* AVX512F values are returned in ZMM0 if available. */
8384 if (size == 64)
8385 return !TARGET_AVX512F;
8388 if (mode == XFmode)
8389 return false;
8391 if (size > 12)
8392 return true;
8394 /* OImode shouldn't be used directly. */
8395 gcc_assert (mode != OImode);
8397 return false;
8399 #endif
8403 /* Create the va_list data type. */
8405 /* Returns the calling convention specific va_list date type.
8406 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8408 static tree
8409 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8411 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8413 /* For i386 we use plain pointer to argument area. */
8414 if (!TARGET_64BIT || abi == MS_ABI)
8415 return build_pointer_type (char_type_node);
8417 record = lang_hooks.types.make_type (RECORD_TYPE);
8418 type_decl = build_decl (BUILTINS_LOCATION,
8419 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8421 f_gpr = build_decl (BUILTINS_LOCATION,
8422 FIELD_DECL, get_identifier ("gp_offset"),
8423 unsigned_type_node);
8424 f_fpr = build_decl (BUILTINS_LOCATION,
8425 FIELD_DECL, get_identifier ("fp_offset"),
8426 unsigned_type_node);
8427 f_ovf = build_decl (BUILTINS_LOCATION,
8428 FIELD_DECL, get_identifier ("overflow_arg_area"),
8429 ptr_type_node);
8430 f_sav = build_decl (BUILTINS_LOCATION,
8431 FIELD_DECL, get_identifier ("reg_save_area"),
8432 ptr_type_node);
8434 va_list_gpr_counter_field = f_gpr;
8435 va_list_fpr_counter_field = f_fpr;
8437 DECL_FIELD_CONTEXT (f_gpr) = record;
8438 DECL_FIELD_CONTEXT (f_fpr) = record;
8439 DECL_FIELD_CONTEXT (f_ovf) = record;
8440 DECL_FIELD_CONTEXT (f_sav) = record;
8442 TYPE_STUB_DECL (record) = type_decl;
8443 TYPE_NAME (record) = type_decl;
8444 TYPE_FIELDS (record) = f_gpr;
8445 DECL_CHAIN (f_gpr) = f_fpr;
8446 DECL_CHAIN (f_fpr) = f_ovf;
8447 DECL_CHAIN (f_ovf) = f_sav;
8449 layout_type (record);
8451 /* The correct type is an array type of one element. */
8452 return build_array_type (record, build_index_type (size_zero_node));
8455 /* Setup the builtin va_list data type and for 64-bit the additional
8456 calling convention specific va_list data types. */
8458 static tree
8459 ix86_build_builtin_va_list (void)
8461 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8463 /* Initialize abi specific va_list builtin types. */
8464 if (TARGET_64BIT)
8466 tree t;
8467 if (ix86_abi == MS_ABI)
8469 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8470 if (TREE_CODE (t) != RECORD_TYPE)
8471 t = build_variant_type_copy (t);
8472 sysv_va_list_type_node = t;
8474 else
8476 t = ret;
8477 if (TREE_CODE (t) != RECORD_TYPE)
8478 t = build_variant_type_copy (t);
8479 sysv_va_list_type_node = t;
8481 if (ix86_abi != MS_ABI)
8483 t = ix86_build_builtin_va_list_abi (MS_ABI);
8484 if (TREE_CODE (t) != RECORD_TYPE)
8485 t = build_variant_type_copy (t);
8486 ms_va_list_type_node = t;
8488 else
8490 t = ret;
8491 if (TREE_CODE (t) != RECORD_TYPE)
8492 t = build_variant_type_copy (t);
8493 ms_va_list_type_node = t;
8497 return ret;
8500 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8502 static void
8503 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8505 rtx save_area, mem;
8506 alias_set_type set;
8507 int i, max;
8509 /* GPR size of varargs save area. */
8510 if (cfun->va_list_gpr_size)
8511 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8512 else
8513 ix86_varargs_gpr_size = 0;
8515 /* FPR size of varargs save area. We don't need it if we don't pass
8516 anything in SSE registers. */
8517 if (TARGET_SSE && cfun->va_list_fpr_size)
8518 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8519 else
8520 ix86_varargs_fpr_size = 0;
8522 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8523 return;
8525 save_area = frame_pointer_rtx;
8526 set = get_varargs_alias_set ();
8528 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8529 if (max > X86_64_REGPARM_MAX)
8530 max = X86_64_REGPARM_MAX;
8532 for (i = cum->regno; i < max; i++)
8534 mem = gen_rtx_MEM (word_mode,
8535 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8536 MEM_NOTRAP_P (mem) = 1;
8537 set_mem_alias_set (mem, set);
8538 emit_move_insn (mem,
8539 gen_rtx_REG (word_mode,
8540 x86_64_int_parameter_registers[i]));
8543 if (ix86_varargs_fpr_size)
8545 machine_mode smode;
8546 rtx_code_label *label;
8547 rtx test;
8549 /* Now emit code to save SSE registers. The AX parameter contains number
8550 of SSE parameter registers used to call this function, though all we
8551 actually check here is the zero/non-zero status. */
8553 label = gen_label_rtx ();
8554 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8555 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8556 label));
8558 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8559 we used movdqa (i.e. TImode) instead? Perhaps even better would
8560 be if we could determine the real mode of the data, via a hook
8561 into pass_stdarg. Ignore all that for now. */
8562 smode = V4SFmode;
8563 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8564 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8566 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8567 if (max > X86_64_SSE_REGPARM_MAX)
8568 max = X86_64_SSE_REGPARM_MAX;
8570 for (i = cum->sse_regno; i < max; ++i)
8572 mem = plus_constant (Pmode, save_area,
8573 i * 16 + ix86_varargs_gpr_size);
8574 mem = gen_rtx_MEM (smode, mem);
8575 MEM_NOTRAP_P (mem) = 1;
8576 set_mem_alias_set (mem, set);
8577 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8579 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8582 emit_label (label);
8586 static void
8587 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8589 alias_set_type set = get_varargs_alias_set ();
8590 int i;
8592 /* Reset to zero, as there might be a sysv vaarg used
8593 before. */
8594 ix86_varargs_gpr_size = 0;
8595 ix86_varargs_fpr_size = 0;
8597 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8599 rtx reg, mem;
8601 mem = gen_rtx_MEM (Pmode,
8602 plus_constant (Pmode, virtual_incoming_args_rtx,
8603 i * UNITS_PER_WORD));
8604 MEM_NOTRAP_P (mem) = 1;
8605 set_mem_alias_set (mem, set);
8607 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8608 emit_move_insn (mem, reg);
8612 static void
8613 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
8614 tree type, int *, int no_rtl)
8616 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8617 CUMULATIVE_ARGS next_cum;
8618 tree fntype;
8620 /* This argument doesn't appear to be used anymore. Which is good,
8621 because the old code here didn't suppress rtl generation. */
8622 gcc_assert (!no_rtl);
8624 if (!TARGET_64BIT)
8625 return;
8627 fntype = TREE_TYPE (current_function_decl);
8629 /* For varargs, we do not want to skip the dummy va_dcl argument.
8630 For stdargs, we do want to skip the last named argument. */
8631 next_cum = *cum;
8632 if (stdarg_p (fntype))
8633 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8634 true);
8636 if (cum->call_abi == MS_ABI)
8637 setup_incoming_varargs_ms_64 (&next_cum);
8638 else
8639 setup_incoming_varargs_64 (&next_cum);
8642 static void
8643 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
8644 enum machine_mode mode,
8645 tree type,
8646 int *pretend_size ATTRIBUTE_UNUSED,
8647 int no_rtl)
8649 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8650 CUMULATIVE_ARGS next_cum;
8651 tree fntype;
8652 rtx save_area;
8653 int bnd_reg, i, max;
8655 gcc_assert (!no_rtl);
8657 /* Do nothing if we use plain pointer to argument area. */
8658 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
8659 return;
8661 fntype = TREE_TYPE (current_function_decl);
8663 /* For varargs, we do not want to skip the dummy va_dcl argument.
8664 For stdargs, we do want to skip the last named argument. */
8665 next_cum = *cum;
8666 if (stdarg_p (fntype))
8667 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8668 true);
8669 save_area = frame_pointer_rtx;
8671 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8672 if (max > X86_64_REGPARM_MAX)
8673 max = X86_64_REGPARM_MAX;
8675 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
8676 if (chkp_function_instrumented_p (current_function_decl))
8677 for (i = cum->regno; i < max; i++)
8679 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
8680 rtx reg = gen_rtx_REG (DImode,
8681 x86_64_int_parameter_registers[i]);
8682 rtx ptr = reg;
8683 rtx bounds;
8685 if (bnd_reg <= LAST_BND_REG)
8686 bounds = gen_rtx_REG (BNDmode, bnd_reg);
8687 else
8689 rtx ldx_addr =
8690 plus_constant (Pmode, arg_pointer_rtx,
8691 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
8692 bounds = gen_reg_rtx (BNDmode);
8693 emit_insn (BNDmode == BND64mode
8694 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
8695 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
8698 emit_insn (BNDmode == BND64mode
8699 ? gen_bnd64_stx (addr, ptr, bounds)
8700 : gen_bnd32_stx (addr, ptr, bounds));
8702 bnd_reg++;
8707 /* Checks if TYPE is of kind va_list char *. */
8709 static bool
8710 is_va_list_char_pointer (tree type)
8712 tree canonic;
8714 /* For 32-bit it is always true. */
8715 if (!TARGET_64BIT)
8716 return true;
8717 canonic = ix86_canonical_va_list_type (type);
8718 return (canonic == ms_va_list_type_node
8719 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8722 /* Implement va_start. */
8724 static void
8725 ix86_va_start (tree valist, rtx nextarg)
8727 HOST_WIDE_INT words, n_gpr, n_fpr;
8728 tree f_gpr, f_fpr, f_ovf, f_sav;
8729 tree gpr, fpr, ovf, sav, t;
8730 tree type;
8731 rtx ovf_rtx;
8733 if (flag_split_stack
8734 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8736 unsigned int scratch_regno;
8738 /* When we are splitting the stack, we can't refer to the stack
8739 arguments using internal_arg_pointer, because they may be on
8740 the old stack. The split stack prologue will arrange to
8741 leave a pointer to the old stack arguments in a scratch
8742 register, which we here copy to a pseudo-register. The split
8743 stack prologue can't set the pseudo-register directly because
8744 it (the prologue) runs before any registers have been saved. */
8746 scratch_regno = split_stack_prologue_scratch_regno ();
8747 if (scratch_regno != INVALID_REGNUM)
8749 rtx reg;
8750 rtx_insn *seq;
8752 reg = gen_reg_rtx (Pmode);
8753 cfun->machine->split_stack_varargs_pointer = reg;
8755 start_sequence ();
8756 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8757 seq = get_insns ();
8758 end_sequence ();
8760 push_topmost_sequence ();
8761 emit_insn_after (seq, entry_of_function ());
8762 pop_topmost_sequence ();
8766 /* Only 64bit target needs something special. */
8767 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8769 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8770 std_expand_builtin_va_start (valist, nextarg);
8771 else
8773 rtx va_r, next;
8775 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8776 next = expand_binop (ptr_mode, add_optab,
8777 cfun->machine->split_stack_varargs_pointer,
8778 crtl->args.arg_offset_rtx,
8779 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8780 convert_move (va_r, next, 0);
8782 /* Store zero bounds for va_list. */
8783 if (chkp_function_instrumented_p (current_function_decl))
8784 chkp_expand_bounds_reset_for_mem (valist,
8785 make_tree (TREE_TYPE (valist),
8786 next));
8789 return;
8792 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8793 f_fpr = DECL_CHAIN (f_gpr);
8794 f_ovf = DECL_CHAIN (f_fpr);
8795 f_sav = DECL_CHAIN (f_ovf);
8797 valist = build_simple_mem_ref (valist);
8798 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8799 /* The following should be folded into the MEM_REF offset. */
8800 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8801 f_gpr, NULL_TREE);
8802 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8803 f_fpr, NULL_TREE);
8804 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8805 f_ovf, NULL_TREE);
8806 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8807 f_sav, NULL_TREE);
8809 /* Count number of gp and fp argument registers used. */
8810 words = crtl->args.info.words;
8811 n_gpr = crtl->args.info.regno;
8812 n_fpr = crtl->args.info.sse_regno;
8814 if (cfun->va_list_gpr_size)
8816 type = TREE_TYPE (gpr);
8817 t = build2 (MODIFY_EXPR, type,
8818 gpr, build_int_cst (type, n_gpr * 8));
8819 TREE_SIDE_EFFECTS (t) = 1;
8820 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8823 if (TARGET_SSE && cfun->va_list_fpr_size)
8825 type = TREE_TYPE (fpr);
8826 t = build2 (MODIFY_EXPR, type, fpr,
8827 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8828 TREE_SIDE_EFFECTS (t) = 1;
8829 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8832 /* Find the overflow area. */
8833 type = TREE_TYPE (ovf);
8834 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8835 ovf_rtx = crtl->args.internal_arg_pointer;
8836 else
8837 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8838 t = make_tree (type, ovf_rtx);
8839 if (words != 0)
8840 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8842 /* Store zero bounds for overflow area pointer. */
8843 if (chkp_function_instrumented_p (current_function_decl))
8844 chkp_expand_bounds_reset_for_mem (ovf, t);
8846 t = build2 (MODIFY_EXPR, type, ovf, t);
8847 TREE_SIDE_EFFECTS (t) = 1;
8848 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8850 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8852 /* Find the register save area.
8853 Prologue of the function save it right above stack frame. */
8854 type = TREE_TYPE (sav);
8855 t = make_tree (type, frame_pointer_rtx);
8856 if (!ix86_varargs_gpr_size)
8857 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8859 /* Store zero bounds for save area pointer. */
8860 if (chkp_function_instrumented_p (current_function_decl))
8861 chkp_expand_bounds_reset_for_mem (sav, t);
8863 t = build2 (MODIFY_EXPR, type, sav, t);
8864 TREE_SIDE_EFFECTS (t) = 1;
8865 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8869 /* Implement va_arg. */
8871 static tree
8872 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8873 gimple_seq *post_p)
8875 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8876 tree f_gpr, f_fpr, f_ovf, f_sav;
8877 tree gpr, fpr, ovf, sav, t;
8878 int size, rsize;
8879 tree lab_false, lab_over = NULL_TREE;
8880 tree addr, t2;
8881 rtx container;
8882 int indirect_p = 0;
8883 tree ptrtype;
8884 machine_mode nat_mode;
8885 unsigned int arg_boundary;
8887 /* Only 64bit target needs something special. */
8888 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8889 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8891 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8892 f_fpr = DECL_CHAIN (f_gpr);
8893 f_ovf = DECL_CHAIN (f_fpr);
8894 f_sav = DECL_CHAIN (f_ovf);
8896 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8897 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8898 valist = build_va_arg_indirect_ref (valist);
8899 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8900 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8901 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8903 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8904 if (indirect_p)
8905 type = build_pointer_type (type);
8906 size = int_size_in_bytes (type);
8907 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8909 nat_mode = type_natural_mode (type, NULL, false);
8910 switch (nat_mode)
8912 case V8SFmode:
8913 case V8SImode:
8914 case V32QImode:
8915 case V16HImode:
8916 case V4DFmode:
8917 case V4DImode:
8918 case V16SFmode:
8919 case V16SImode:
8920 case V64QImode:
8921 case V32HImode:
8922 case V8DFmode:
8923 case V8DImode:
8924 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8925 if (!TARGET_64BIT_MS_ABI)
8927 container = NULL;
8928 break;
8931 default:
8932 container = construct_container (nat_mode, TYPE_MODE (type),
8933 type, 0, X86_64_REGPARM_MAX,
8934 X86_64_SSE_REGPARM_MAX, intreg,
8936 break;
8939 /* Pull the value out of the saved registers. */
8941 addr = create_tmp_var (ptr_type_node, "addr");
8943 if (container)
8945 int needed_intregs, needed_sseregs;
8946 bool need_temp;
8947 tree int_addr, sse_addr;
8949 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8950 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8952 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8954 need_temp = (!REG_P (container)
8955 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8956 || TYPE_ALIGN (type) > 128));
8958 /* In case we are passing structure, verify that it is consecutive block
8959 on the register save area. If not we need to do moves. */
8960 if (!need_temp && !REG_P (container))
8962 /* Verify that all registers are strictly consecutive */
8963 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8965 int i;
8967 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8969 rtx slot = XVECEXP (container, 0, i);
8970 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8971 || INTVAL (XEXP (slot, 1)) != i * 16)
8972 need_temp = 1;
8975 else
8977 int i;
8979 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8981 rtx slot = XVECEXP (container, 0, i);
8982 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8983 || INTVAL (XEXP (slot, 1)) != i * 8)
8984 need_temp = 1;
8988 if (!need_temp)
8990 int_addr = addr;
8991 sse_addr = addr;
8993 else
8995 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8996 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8999 /* First ensure that we fit completely in registers. */
9000 if (needed_intregs)
9002 t = build_int_cst (TREE_TYPE (gpr),
9003 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9004 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9005 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9006 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9007 gimplify_and_add (t, pre_p);
9009 if (needed_sseregs)
9011 t = build_int_cst (TREE_TYPE (fpr),
9012 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9013 + X86_64_REGPARM_MAX * 8);
9014 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9015 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9016 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9017 gimplify_and_add (t, pre_p);
9020 /* Compute index to start of area used for integer regs. */
9021 if (needed_intregs)
9023 /* int_addr = gpr + sav; */
9024 t = fold_build_pointer_plus (sav, gpr);
9025 gimplify_assign (int_addr, t, pre_p);
9027 if (needed_sseregs)
9029 /* sse_addr = fpr + sav; */
9030 t = fold_build_pointer_plus (sav, fpr);
9031 gimplify_assign (sse_addr, t, pre_p);
9033 if (need_temp)
9035 int i, prev_size = 0;
9036 tree temp = create_tmp_var (type, "va_arg_tmp");
9038 /* addr = &temp; */
9039 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9040 gimplify_assign (addr, t, pre_p);
9042 for (i = 0; i < XVECLEN (container, 0); i++)
9044 rtx slot = XVECEXP (container, 0, i);
9045 rtx reg = XEXP (slot, 0);
9046 machine_mode mode = GET_MODE (reg);
9047 tree piece_type;
9048 tree addr_type;
9049 tree daddr_type;
9050 tree src_addr, src;
9051 int src_offset;
9052 tree dest_addr, dest;
9053 int cur_size = GET_MODE_SIZE (mode);
9055 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9056 prev_size = INTVAL (XEXP (slot, 1));
9057 if (prev_size + cur_size > size)
9059 cur_size = size - prev_size;
9060 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
9061 if (mode == BLKmode)
9062 mode = QImode;
9064 piece_type = lang_hooks.types.type_for_mode (mode, 1);
9065 if (mode == GET_MODE (reg))
9066 addr_type = build_pointer_type (piece_type);
9067 else
9068 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9069 true);
9070 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9071 true);
9073 if (SSE_REGNO_P (REGNO (reg)))
9075 src_addr = sse_addr;
9076 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9078 else
9080 src_addr = int_addr;
9081 src_offset = REGNO (reg) * 8;
9083 src_addr = fold_convert (addr_type, src_addr);
9084 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9086 dest_addr = fold_convert (daddr_type, addr);
9087 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9088 if (cur_size == GET_MODE_SIZE (mode))
9090 src = build_va_arg_indirect_ref (src_addr);
9091 dest = build_va_arg_indirect_ref (dest_addr);
9093 gimplify_assign (dest, src, pre_p);
9095 else
9097 tree copy
9098 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9099 3, dest_addr, src_addr,
9100 size_int (cur_size));
9101 gimplify_and_add (copy, pre_p);
9103 prev_size += cur_size;
9107 if (needed_intregs)
9109 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9110 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9111 gimplify_assign (gpr, t, pre_p);
9114 if (needed_sseregs)
9116 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9117 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9118 gimplify_assign (fpr, t, pre_p);
9121 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9123 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9126 /* ... otherwise out of the overflow area. */
9128 /* When we align parameter on stack for caller, if the parameter
9129 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9130 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
9131 here with caller. */
9132 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9133 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9134 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9136 /* Care for on-stack alignment if needed. */
9137 if (arg_boundary <= 64 || size == 0)
9138 t = ovf;
9139 else
9141 HOST_WIDE_INT align = arg_boundary / 8;
9142 t = fold_build_pointer_plus_hwi (ovf, align - 1);
9143 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9144 build_int_cst (TREE_TYPE (t), -align));
9147 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9148 gimplify_assign (addr, t, pre_p);
9150 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9151 gimplify_assign (unshare_expr (ovf), t, pre_p);
9153 if (container)
9154 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9156 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9157 addr = fold_convert (ptrtype, addr);
9159 if (indirect_p)
9160 addr = build_va_arg_indirect_ref (addr);
9161 return build_va_arg_indirect_ref (addr);
9164 /* Return true if OPNUM's MEM should be matched
9165 in movabs* patterns. */
9167 bool
9168 ix86_check_movabs (rtx insn, int opnum)
9170 rtx set, mem;
9172 set = PATTERN (insn);
9173 if (GET_CODE (set) == PARALLEL)
9174 set = XVECEXP (set, 0, 0);
9175 gcc_assert (GET_CODE (set) == SET);
9176 mem = XEXP (set, opnum);
9177 while (GET_CODE (mem) == SUBREG)
9178 mem = SUBREG_REG (mem);
9179 gcc_assert (MEM_P (mem));
9180 return volatile_ok || !MEM_VOLATILE_P (mem);
9183 /* Initialize the table of extra 80387 mathematical constants. */
9185 static void
9186 init_ext_80387_constants (void)
9188 static const char * cst[5] =
9190 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
9191 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
9192 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
9193 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
9194 "3.1415926535897932385128089594061862044", /* 4: fldpi */
9196 int i;
9198 for (i = 0; i < 5; i++)
9200 real_from_string (&ext_80387_constants_table[i], cst[i]);
9201 /* Ensure each constant is rounded to XFmode precision. */
9202 real_convert (&ext_80387_constants_table[i],
9203 XFmode, &ext_80387_constants_table[i]);
9206 ext_80387_constants_init = 1;
9209 /* Return non-zero if the constant is something that
9210 can be loaded with a special instruction. */
9213 standard_80387_constant_p (rtx x)
9215 machine_mode mode = GET_MODE (x);
9217 REAL_VALUE_TYPE r;
9219 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
9220 return -1;
9222 if (x == CONST0_RTX (mode))
9223 return 1;
9224 if (x == CONST1_RTX (mode))
9225 return 2;
9227 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9229 /* For XFmode constants, try to find a special 80387 instruction when
9230 optimizing for size or on those CPUs that benefit from them. */
9231 if (mode == XFmode
9232 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
9234 int i;
9236 if (! ext_80387_constants_init)
9237 init_ext_80387_constants ();
9239 for (i = 0; i < 5; i++)
9240 if (real_identical (&r, &ext_80387_constants_table[i]))
9241 return i + 3;
9244 /* Load of the constant -0.0 or -1.0 will be split as
9245 fldz;fchs or fld1;fchs sequence. */
9246 if (real_isnegzero (&r))
9247 return 8;
9248 if (real_identical (&r, &dconstm1))
9249 return 9;
9251 return 0;
9254 /* Return the opcode of the special instruction to be used to load
9255 the constant X. */
9257 const char *
9258 standard_80387_constant_opcode (rtx x)
9260 switch (standard_80387_constant_p (x))
9262 case 1:
9263 return "fldz";
9264 case 2:
9265 return "fld1";
9266 case 3:
9267 return "fldlg2";
9268 case 4:
9269 return "fldln2";
9270 case 5:
9271 return "fldl2e";
9272 case 6:
9273 return "fldl2t";
9274 case 7:
9275 return "fldpi";
9276 case 8:
9277 case 9:
9278 return "#";
9279 default:
9280 gcc_unreachable ();
9284 /* Return the CONST_DOUBLE representing the 80387 constant that is
9285 loaded by the specified special instruction. The argument IDX
9286 matches the return value from standard_80387_constant_p. */
9289 standard_80387_constant_rtx (int idx)
9291 int i;
9293 if (! ext_80387_constants_init)
9294 init_ext_80387_constants ();
9296 switch (idx)
9298 case 3:
9299 case 4:
9300 case 5:
9301 case 6:
9302 case 7:
9303 i = idx - 3;
9304 break;
9306 default:
9307 gcc_unreachable ();
9310 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9311 XFmode);
9314 /* Return 1 if X is all 0s and 2 if x is all 1s
9315 in supported SSE/AVX vector mode. */
9318 standard_sse_constant_p (rtx x)
9320 machine_mode mode = GET_MODE (x);
9322 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
9323 return 1;
9324 if (vector_all_ones_operand (x, mode))
9325 switch (mode)
9327 case V16QImode:
9328 case V8HImode:
9329 case V4SImode:
9330 case V2DImode:
9331 if (TARGET_SSE2)
9332 return 2;
9333 case V32QImode:
9334 case V16HImode:
9335 case V8SImode:
9336 case V4DImode:
9337 if (TARGET_AVX2)
9338 return 2;
9339 case V64QImode:
9340 case V32HImode:
9341 case V16SImode:
9342 case V8DImode:
9343 if (TARGET_AVX512F)
9344 return 2;
9345 default:
9346 break;
9349 return 0;
9352 /* Return the opcode of the special instruction to be used to load
9353 the constant X. */
9355 const char *
9356 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9358 switch (standard_sse_constant_p (x))
9360 case 1:
9361 switch (get_attr_mode (insn))
9363 case MODE_XI:
9364 return "vpxord\t%g0, %g0, %g0";
9365 case MODE_V16SF:
9366 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9367 : "vpxord\t%g0, %g0, %g0";
9368 case MODE_V8DF:
9369 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9370 : "vpxorq\t%g0, %g0, %g0";
9371 case MODE_TI:
9372 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9373 : "%vpxor\t%0, %d0";
9374 case MODE_V2DF:
9375 return "%vxorpd\t%0, %d0";
9376 case MODE_V4SF:
9377 return "%vxorps\t%0, %d0";
9379 case MODE_OI:
9380 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9381 : "vpxor\t%x0, %x0, %x0";
9382 case MODE_V4DF:
9383 return "vxorpd\t%x0, %x0, %x0";
9384 case MODE_V8SF:
9385 return "vxorps\t%x0, %x0, %x0";
9387 default:
9388 break;
9391 case 2:
9392 if (TARGET_AVX512VL
9393 || get_attr_mode (insn) == MODE_XI
9394 || get_attr_mode (insn) == MODE_V8DF
9395 || get_attr_mode (insn) == MODE_V16SF)
9396 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9397 if (TARGET_AVX)
9398 return "vpcmpeqd\t%0, %0, %0";
9399 else
9400 return "pcmpeqd\t%0, %0";
9402 default:
9403 break;
9405 gcc_unreachable ();
9408 /* Returns true if OP contains a symbol reference */
9410 bool
9411 symbolic_reference_mentioned_p (rtx op)
9413 const char *fmt;
9414 int i;
9416 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9417 return true;
9419 fmt = GET_RTX_FORMAT (GET_CODE (op));
9420 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9422 if (fmt[i] == 'E')
9424 int j;
9426 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9427 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9428 return true;
9431 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9432 return true;
9435 return false;
9438 /* Return true if it is appropriate to emit `ret' instructions in the
9439 body of a function. Do this only if the epilogue is simple, needing a
9440 couple of insns. Prior to reloading, we can't tell how many registers
9441 must be saved, so return false then. Return false if there is no frame
9442 marker to de-allocate. */
9444 bool
9445 ix86_can_use_return_insn_p (void)
9447 struct ix86_frame frame;
9449 if (! reload_completed || frame_pointer_needed)
9450 return 0;
9452 /* Don't allow more than 32k pop, since that's all we can do
9453 with one instruction. */
9454 if (crtl->args.pops_args && crtl->args.size >= 32768)
9455 return 0;
9457 ix86_compute_frame_layout (&frame);
9458 return (frame.stack_pointer_offset == UNITS_PER_WORD
9459 && (frame.nregs + frame.nsseregs) == 0);
9462 /* Value should be nonzero if functions must have frame pointers.
9463 Zero means the frame pointer need not be set up (and parms may
9464 be accessed via the stack pointer) in functions that seem suitable. */
9466 static bool
9467 ix86_frame_pointer_required (void)
9469 /* If we accessed previous frames, then the generated code expects
9470 to be able to access the saved ebp value in our frame. */
9471 if (cfun->machine->accesses_prev_frame)
9472 return true;
9474 /* Several x86 os'es need a frame pointer for other reasons,
9475 usually pertaining to setjmp. */
9476 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9477 return true;
9479 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9480 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9481 return true;
9483 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9484 allocation is 4GB. */
9485 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9486 return true;
9488 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9489 turns off the frame pointer by default. Turn it back on now if
9490 we've not got a leaf function. */
9491 if (TARGET_OMIT_LEAF_FRAME_POINTER
9492 && (!crtl->is_leaf
9493 || ix86_current_function_calls_tls_descriptor))
9494 return true;
9496 if (crtl->profile && !flag_fentry)
9497 return true;
9499 return false;
9502 /* Record that the current function accesses previous call frames. */
9504 void
9505 ix86_setup_frame_addresses (void)
9507 cfun->machine->accesses_prev_frame = 1;
9510 #ifndef USE_HIDDEN_LINKONCE
9511 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9512 # define USE_HIDDEN_LINKONCE 1
9513 # else
9514 # define USE_HIDDEN_LINKONCE 0
9515 # endif
9516 #endif
9518 static int pic_labels_used;
9520 /* Fills in the label name that should be used for a pc thunk for
9521 the given register. */
9523 static void
9524 get_pc_thunk_name (char name[32], unsigned int regno)
9526 gcc_assert (!TARGET_64BIT);
9528 if (USE_HIDDEN_LINKONCE)
9529 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9530 else
9531 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9535 /* This function generates code for -fpic that loads %ebx with
9536 the return address of the caller and then returns. */
9538 static void
9539 ix86_code_end (void)
9541 rtx xops[2];
9542 int regno;
9544 for (regno = AX_REG; regno <= SP_REG; regno++)
9546 char name[32];
9547 tree decl;
9549 if (!(pic_labels_used & (1 << regno)))
9550 continue;
9552 get_pc_thunk_name (name, regno);
9554 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9555 get_identifier (name),
9556 build_function_type_list (void_type_node, NULL_TREE));
9557 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9558 NULL_TREE, void_type_node);
9559 TREE_PUBLIC (decl) = 1;
9560 TREE_STATIC (decl) = 1;
9561 DECL_IGNORED_P (decl) = 1;
9563 #if TARGET_MACHO
9564 if (TARGET_MACHO)
9566 switch_to_section (darwin_sections[text_coal_section]);
9567 fputs ("\t.weak_definition\t", asm_out_file);
9568 assemble_name (asm_out_file, name);
9569 fputs ("\n\t.private_extern\t", asm_out_file);
9570 assemble_name (asm_out_file, name);
9571 putc ('\n', asm_out_file);
9572 ASM_OUTPUT_LABEL (asm_out_file, name);
9573 DECL_WEAK (decl) = 1;
9575 else
9576 #endif
9577 if (USE_HIDDEN_LINKONCE)
9579 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9581 targetm.asm_out.unique_section (decl, 0);
9582 switch_to_section (get_named_section (decl, NULL, 0));
9584 targetm.asm_out.globalize_label (asm_out_file, name);
9585 fputs ("\t.hidden\t", asm_out_file);
9586 assemble_name (asm_out_file, name);
9587 putc ('\n', asm_out_file);
9588 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9590 else
9592 switch_to_section (text_section);
9593 ASM_OUTPUT_LABEL (asm_out_file, name);
9596 DECL_INITIAL (decl) = make_node (BLOCK);
9597 current_function_decl = decl;
9598 init_function_start (decl);
9599 first_function_block_is_cold = false;
9600 /* Make sure unwind info is emitted for the thunk if needed. */
9601 final_start_function (emit_barrier (), asm_out_file, 1);
9603 /* Pad stack IP move with 4 instructions (two NOPs count
9604 as one instruction). */
9605 if (TARGET_PAD_SHORT_FUNCTION)
9607 int i = 8;
9609 while (i--)
9610 fputs ("\tnop\n", asm_out_file);
9613 xops[0] = gen_rtx_REG (Pmode, regno);
9614 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9615 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9616 output_asm_insn ("%!ret", NULL);
9617 final_end_function ();
9618 init_insn_lengths ();
9619 free_after_compilation (cfun);
9620 set_cfun (NULL);
9621 current_function_decl = NULL;
9624 if (flag_split_stack)
9625 file_end_indicate_split_stack ();
9628 /* Emit code for the SET_GOT patterns. */
9630 const char *
9631 output_set_got (rtx dest, rtx label)
9633 rtx xops[3];
9635 xops[0] = dest;
9637 if (TARGET_VXWORKS_RTP && flag_pic)
9639 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9640 xops[2] = gen_rtx_MEM (Pmode,
9641 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9642 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9644 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9645 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9646 an unadorned address. */
9647 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9648 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9649 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9650 return "";
9653 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9655 if (!flag_pic)
9657 if (TARGET_MACHO)
9658 /* We don't need a pic base, we're not producing pic. */
9659 gcc_unreachable ();
9661 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9662 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9663 targetm.asm_out.internal_label (asm_out_file, "L",
9664 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9666 else
9668 char name[32];
9669 get_pc_thunk_name (name, REGNO (dest));
9670 pic_labels_used |= 1 << REGNO (dest);
9672 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9673 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9674 output_asm_insn ("%!call\t%X2", xops);
9676 #if TARGET_MACHO
9677 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9678 This is what will be referenced by the Mach-O PIC subsystem. */
9679 if (machopic_should_output_picbase_label () || !label)
9680 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9682 /* When we are restoring the pic base at the site of a nonlocal label,
9683 and we decided to emit the pic base above, we will still output a
9684 local label used for calculating the correction offset (even though
9685 the offset will be 0 in that case). */
9686 if (label)
9687 targetm.asm_out.internal_label (asm_out_file, "L",
9688 CODE_LABEL_NUMBER (label));
9689 #endif
9692 if (!TARGET_MACHO)
9693 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9695 return "";
9698 /* Generate an "push" pattern for input ARG. */
9700 static rtx
9701 gen_push (rtx arg)
9703 struct machine_function *m = cfun->machine;
9705 if (m->fs.cfa_reg == stack_pointer_rtx)
9706 m->fs.cfa_offset += UNITS_PER_WORD;
9707 m->fs.sp_offset += UNITS_PER_WORD;
9709 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9710 arg = gen_rtx_REG (word_mode, REGNO (arg));
9712 return gen_rtx_SET (VOIDmode,
9713 gen_rtx_MEM (word_mode,
9714 gen_rtx_PRE_DEC (Pmode,
9715 stack_pointer_rtx)),
9716 arg);
9719 /* Generate an "pop" pattern for input ARG. */
9721 static rtx
9722 gen_pop (rtx arg)
9724 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9725 arg = gen_rtx_REG (word_mode, REGNO (arg));
9727 return gen_rtx_SET (VOIDmode,
9728 arg,
9729 gen_rtx_MEM (word_mode,
9730 gen_rtx_POST_INC (Pmode,
9731 stack_pointer_rtx)));
9734 /* Return >= 0 if there is an unused call-clobbered register available
9735 for the entire function. */
9737 static unsigned int
9738 ix86_select_alt_pic_regnum (void)
9740 if (ix86_use_pseudo_pic_reg ())
9741 return INVALID_REGNUM;
9743 if (crtl->is_leaf
9744 && !crtl->profile
9745 && !ix86_current_function_calls_tls_descriptor)
9747 int i, drap;
9748 /* Can't use the same register for both PIC and DRAP. */
9749 if (crtl->drap_reg)
9750 drap = REGNO (crtl->drap_reg);
9751 else
9752 drap = -1;
9753 for (i = 2; i >= 0; --i)
9754 if (i != drap && !df_regs_ever_live_p (i))
9755 return i;
9758 return INVALID_REGNUM;
9761 /* Return TRUE if we need to save REGNO. */
9763 static bool
9764 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9766 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
9767 && pic_offset_table_rtx)
9769 if (ix86_use_pseudo_pic_reg ())
9771 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
9772 _mcount in prologue. */
9773 if (!TARGET_64BIT && flag_pic && crtl->profile)
9774 return true;
9776 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9777 || crtl->profile
9778 || crtl->calls_eh_return
9779 || crtl->uses_const_pool
9780 || cfun->has_nonlocal_label)
9781 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9784 if (crtl->calls_eh_return && maybe_eh_return)
9786 unsigned i;
9787 for (i = 0; ; i++)
9789 unsigned test = EH_RETURN_DATA_REGNO (i);
9790 if (test == INVALID_REGNUM)
9791 break;
9792 if (test == regno)
9793 return true;
9797 if (crtl->drap_reg
9798 && regno == REGNO (crtl->drap_reg)
9799 && !cfun->machine->no_drap_save_restore)
9800 return true;
9802 return (df_regs_ever_live_p (regno)
9803 && !call_used_regs[regno]
9804 && !fixed_regs[regno]
9805 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9808 /* Return number of saved general prupose registers. */
9810 static int
9811 ix86_nsaved_regs (void)
9813 int nregs = 0;
9814 int regno;
9816 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9817 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9818 nregs ++;
9819 return nregs;
9822 /* Return number of saved SSE registrers. */
9824 static int
9825 ix86_nsaved_sseregs (void)
9827 int nregs = 0;
9828 int regno;
9830 if (!TARGET_64BIT_MS_ABI)
9831 return 0;
9832 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9833 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9834 nregs ++;
9835 return nregs;
9838 /* Given FROM and TO register numbers, say whether this elimination is
9839 allowed. If stack alignment is needed, we can only replace argument
9840 pointer with hard frame pointer, or replace frame pointer with stack
9841 pointer. Otherwise, frame pointer elimination is automatically
9842 handled and all other eliminations are valid. */
9844 static bool
9845 ix86_can_eliminate (const int from, const int to)
9847 if (stack_realign_fp)
9848 return ((from == ARG_POINTER_REGNUM
9849 && to == HARD_FRAME_POINTER_REGNUM)
9850 || (from == FRAME_POINTER_REGNUM
9851 && to == STACK_POINTER_REGNUM));
9852 else
9853 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9856 /* Return the offset between two registers, one to be eliminated, and the other
9857 its replacement, at the start of a routine. */
9859 HOST_WIDE_INT
9860 ix86_initial_elimination_offset (int from, int to)
9862 struct ix86_frame frame;
9863 ix86_compute_frame_layout (&frame);
9865 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9866 return frame.hard_frame_pointer_offset;
9867 else if (from == FRAME_POINTER_REGNUM
9868 && to == HARD_FRAME_POINTER_REGNUM)
9869 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9870 else
9872 gcc_assert (to == STACK_POINTER_REGNUM);
9874 if (from == ARG_POINTER_REGNUM)
9875 return frame.stack_pointer_offset;
9877 gcc_assert (from == FRAME_POINTER_REGNUM);
9878 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9882 /* In a dynamically-aligned function, we can't know the offset from
9883 stack pointer to frame pointer, so we must ensure that setjmp
9884 eliminates fp against the hard fp (%ebp) rather than trying to
9885 index from %esp up to the top of the frame across a gap that is
9886 of unknown (at compile-time) size. */
9887 static rtx
9888 ix86_builtin_setjmp_frame_value (void)
9890 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9893 /* When using -fsplit-stack, the allocation routines set a field in
9894 the TCB to the bottom of the stack plus this much space, measured
9895 in bytes. */
9897 #define SPLIT_STACK_AVAILABLE 256
9899 /* Fill structure ix86_frame about frame of currently computed function. */
9901 static void
9902 ix86_compute_frame_layout (struct ix86_frame *frame)
9904 unsigned HOST_WIDE_INT stack_alignment_needed;
9905 HOST_WIDE_INT offset;
9906 unsigned HOST_WIDE_INT preferred_alignment;
9907 HOST_WIDE_INT size = get_frame_size ();
9908 HOST_WIDE_INT to_allocate;
9910 frame->nregs = ix86_nsaved_regs ();
9911 frame->nsseregs = ix86_nsaved_sseregs ();
9913 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9914 function prologues and leaf. */
9915 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
9916 && (!crtl->is_leaf || cfun->calls_alloca != 0
9917 || ix86_current_function_calls_tls_descriptor))
9919 crtl->preferred_stack_boundary = 128;
9920 crtl->stack_alignment_needed = 128;
9922 /* preferred_stack_boundary is never updated for call
9923 expanded from tls descriptor. Update it here. We don't update it in
9924 expand stage because according to the comments before
9925 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
9926 away. */
9927 else if (ix86_current_function_calls_tls_descriptor
9928 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
9930 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
9931 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
9932 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
9935 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9936 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9938 gcc_assert (!size || stack_alignment_needed);
9939 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9940 gcc_assert (preferred_alignment <= stack_alignment_needed);
9942 /* For SEH we have to limit the amount of code movement into the prologue.
9943 At present we do this via a BLOCKAGE, at which point there's very little
9944 scheduling that can be done, which means that there's very little point
9945 in doing anything except PUSHs. */
9946 if (TARGET_SEH)
9947 cfun->machine->use_fast_prologue_epilogue = false;
9949 /* During reload iteration the amount of registers saved can change.
9950 Recompute the value as needed. Do not recompute when amount of registers
9951 didn't change as reload does multiple calls to the function and does not
9952 expect the decision to change within single iteration. */
9953 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9954 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9956 int count = frame->nregs;
9957 struct cgraph_node *node = cgraph_node::get (current_function_decl);
9959 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9961 /* The fast prologue uses move instead of push to save registers. This
9962 is significantly longer, but also executes faster as modern hardware
9963 can execute the moves in parallel, but can't do that for push/pop.
9965 Be careful about choosing what prologue to emit: When function takes
9966 many instructions to execute we may use slow version as well as in
9967 case function is known to be outside hot spot (this is known with
9968 feedback only). Weight the size of function by number of registers
9969 to save as it is cheap to use one or two push instructions but very
9970 slow to use many of them. */
9971 if (count)
9972 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9973 if (node->frequency < NODE_FREQUENCY_NORMAL
9974 || (flag_branch_probabilities
9975 && node->frequency < NODE_FREQUENCY_HOT))
9976 cfun->machine->use_fast_prologue_epilogue = false;
9977 else
9978 cfun->machine->use_fast_prologue_epilogue
9979 = !expensive_function_p (count);
9982 frame->save_regs_using_mov
9983 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9984 /* If static stack checking is enabled and done with probes,
9985 the registers need to be saved before allocating the frame. */
9986 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9988 /* Skip return address. */
9989 offset = UNITS_PER_WORD;
9991 /* Skip pushed static chain. */
9992 if (ix86_static_chain_on_stack)
9993 offset += UNITS_PER_WORD;
9995 /* Skip saved base pointer. */
9996 if (frame_pointer_needed)
9997 offset += UNITS_PER_WORD;
9998 frame->hfp_save_offset = offset;
10000 /* The traditional frame pointer location is at the top of the frame. */
10001 frame->hard_frame_pointer_offset = offset;
10003 /* Register save area */
10004 offset += frame->nregs * UNITS_PER_WORD;
10005 frame->reg_save_offset = offset;
10007 /* On SEH target, registers are pushed just before the frame pointer
10008 location. */
10009 if (TARGET_SEH)
10010 frame->hard_frame_pointer_offset = offset;
10012 /* Align and set SSE register save area. */
10013 if (frame->nsseregs)
10015 /* The only ABI that has saved SSE registers (Win64) also has a
10016 16-byte aligned default stack, and thus we don't need to be
10017 within the re-aligned local stack frame to save them. */
10018 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
10019 offset = (offset + 16 - 1) & -16;
10020 offset += frame->nsseregs * 16;
10022 frame->sse_reg_save_offset = offset;
10024 /* The re-aligned stack starts here. Values before this point are not
10025 directly comparable with values below this point. In order to make
10026 sure that no value happens to be the same before and after, force
10027 the alignment computation below to add a non-zero value. */
10028 if (stack_realign_fp)
10029 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
10031 /* Va-arg area */
10032 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
10033 offset += frame->va_arg_size;
10035 /* Align start of frame for local function. */
10036 if (stack_realign_fp
10037 || offset != frame->sse_reg_save_offset
10038 || size != 0
10039 || !crtl->is_leaf
10040 || cfun->calls_alloca
10041 || ix86_current_function_calls_tls_descriptor)
10042 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10044 /* Frame pointer points here. */
10045 frame->frame_pointer_offset = offset;
10047 offset += size;
10049 /* Add outgoing arguments area. Can be skipped if we eliminated
10050 all the function calls as dead code.
10051 Skipping is however impossible when function calls alloca. Alloca
10052 expander assumes that last crtl->outgoing_args_size
10053 of stack frame are unused. */
10054 if (ACCUMULATE_OUTGOING_ARGS
10055 && (!crtl->is_leaf || cfun->calls_alloca
10056 || ix86_current_function_calls_tls_descriptor))
10058 offset += crtl->outgoing_args_size;
10059 frame->outgoing_arguments_size = crtl->outgoing_args_size;
10061 else
10062 frame->outgoing_arguments_size = 0;
10064 /* Align stack boundary. Only needed if we're calling another function
10065 or using alloca. */
10066 if (!crtl->is_leaf || cfun->calls_alloca
10067 || ix86_current_function_calls_tls_descriptor)
10068 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
10070 /* We've reached end of stack frame. */
10071 frame->stack_pointer_offset = offset;
10073 /* Size prologue needs to allocate. */
10074 to_allocate = offset - frame->sse_reg_save_offset;
10076 if ((!to_allocate && frame->nregs <= 1)
10077 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
10078 frame->save_regs_using_mov = false;
10080 if (ix86_using_red_zone ()
10081 && crtl->sp_is_unchanging
10082 && crtl->is_leaf
10083 && !ix86_current_function_calls_tls_descriptor)
10085 frame->red_zone_size = to_allocate;
10086 if (frame->save_regs_using_mov)
10087 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
10088 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
10089 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
10091 else
10092 frame->red_zone_size = 0;
10093 frame->stack_pointer_offset -= frame->red_zone_size;
10095 /* The SEH frame pointer location is near the bottom of the frame.
10096 This is enforced by the fact that the difference between the
10097 stack pointer and the frame pointer is limited to 240 bytes in
10098 the unwind data structure. */
10099 if (TARGET_SEH)
10101 HOST_WIDE_INT diff;
10103 /* If we can leave the frame pointer where it is, do so. Also, returns
10104 the establisher frame for __builtin_frame_address (0). */
10105 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
10106 if (diff <= SEH_MAX_FRAME_SIZE
10107 && (diff > 240 || (diff & 15) != 0)
10108 && !crtl->accesses_prior_frames)
10110 /* Ideally we'd determine what portion of the local stack frame
10111 (within the constraint of the lowest 240) is most heavily used.
10112 But without that complication, simply bias the frame pointer
10113 by 128 bytes so as to maximize the amount of the local stack
10114 frame that is addressable with 8-bit offsets. */
10115 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
10120 /* This is semi-inlined memory_address_length, but simplified
10121 since we know that we're always dealing with reg+offset, and
10122 to avoid having to create and discard all that rtl. */
10124 static inline int
10125 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
10127 int len = 4;
10129 if (offset == 0)
10131 /* EBP and R13 cannot be encoded without an offset. */
10132 len = (regno == BP_REG || regno == R13_REG);
10134 else if (IN_RANGE (offset, -128, 127))
10135 len = 1;
10137 /* ESP and R12 must be encoded with a SIB byte. */
10138 if (regno == SP_REG || regno == R12_REG)
10139 len++;
10141 return len;
10144 /* Return an RTX that points to CFA_OFFSET within the stack frame.
10145 The valid base registers are taken from CFUN->MACHINE->FS. */
10147 static rtx
10148 choose_baseaddr (HOST_WIDE_INT cfa_offset)
10150 const struct machine_function *m = cfun->machine;
10151 rtx base_reg = NULL;
10152 HOST_WIDE_INT base_offset = 0;
10154 if (m->use_fast_prologue_epilogue)
10156 /* Choose the base register most likely to allow the most scheduling
10157 opportunities. Generally FP is valid throughout the function,
10158 while DRAP must be reloaded within the epilogue. But choose either
10159 over the SP due to increased encoding size. */
10161 if (m->fs.fp_valid)
10163 base_reg = hard_frame_pointer_rtx;
10164 base_offset = m->fs.fp_offset - cfa_offset;
10166 else if (m->fs.drap_valid)
10168 base_reg = crtl->drap_reg;
10169 base_offset = 0 - cfa_offset;
10171 else if (m->fs.sp_valid)
10173 base_reg = stack_pointer_rtx;
10174 base_offset = m->fs.sp_offset - cfa_offset;
10177 else
10179 HOST_WIDE_INT toffset;
10180 int len = 16, tlen;
10182 /* Choose the base register with the smallest address encoding.
10183 With a tie, choose FP > DRAP > SP. */
10184 if (m->fs.sp_valid)
10186 base_reg = stack_pointer_rtx;
10187 base_offset = m->fs.sp_offset - cfa_offset;
10188 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
10190 if (m->fs.drap_valid)
10192 toffset = 0 - cfa_offset;
10193 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
10194 if (tlen <= len)
10196 base_reg = crtl->drap_reg;
10197 base_offset = toffset;
10198 len = tlen;
10201 if (m->fs.fp_valid)
10203 toffset = m->fs.fp_offset - cfa_offset;
10204 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
10205 if (tlen <= len)
10207 base_reg = hard_frame_pointer_rtx;
10208 base_offset = toffset;
10209 len = tlen;
10213 gcc_assert (base_reg != NULL);
10215 return plus_constant (Pmode, base_reg, base_offset);
10218 /* Emit code to save registers in the prologue. */
10220 static void
10221 ix86_emit_save_regs (void)
10223 unsigned int regno;
10224 rtx insn;
10226 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
10227 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10229 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
10230 RTX_FRAME_RELATED_P (insn) = 1;
10234 /* Emit a single register save at CFA - CFA_OFFSET. */
10236 static void
10237 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
10238 HOST_WIDE_INT cfa_offset)
10240 struct machine_function *m = cfun->machine;
10241 rtx reg = gen_rtx_REG (mode, regno);
10242 rtx mem, addr, base, insn;
10244 addr = choose_baseaddr (cfa_offset);
10245 mem = gen_frame_mem (mode, addr);
10247 /* For SSE saves, we need to indicate the 128-bit alignment. */
10248 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
10250 insn = emit_move_insn (mem, reg);
10251 RTX_FRAME_RELATED_P (insn) = 1;
10253 base = addr;
10254 if (GET_CODE (base) == PLUS)
10255 base = XEXP (base, 0);
10256 gcc_checking_assert (REG_P (base));
10258 /* When saving registers into a re-aligned local stack frame, avoid
10259 any tricky guessing by dwarf2out. */
10260 if (m->fs.realigned)
10262 gcc_checking_assert (stack_realign_drap);
10264 if (regno == REGNO (crtl->drap_reg))
10266 /* A bit of a hack. We force the DRAP register to be saved in
10267 the re-aligned stack frame, which provides us with a copy
10268 of the CFA that will last past the prologue. Install it. */
10269 gcc_checking_assert (cfun->machine->fs.fp_valid);
10270 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10271 cfun->machine->fs.fp_offset - cfa_offset);
10272 mem = gen_rtx_MEM (mode, addr);
10273 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
10275 else
10277 /* The frame pointer is a stable reference within the
10278 aligned frame. Use it. */
10279 gcc_checking_assert (cfun->machine->fs.fp_valid);
10280 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10281 cfun->machine->fs.fp_offset - cfa_offset);
10282 mem = gen_rtx_MEM (mode, addr);
10283 add_reg_note (insn, REG_CFA_EXPRESSION,
10284 gen_rtx_SET (VOIDmode, mem, reg));
10288 /* The memory may not be relative to the current CFA register,
10289 which means that we may need to generate a new pattern for
10290 use by the unwind info. */
10291 else if (base != m->fs.cfa_reg)
10293 addr = plus_constant (Pmode, m->fs.cfa_reg,
10294 m->fs.cfa_offset - cfa_offset);
10295 mem = gen_rtx_MEM (mode, addr);
10296 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
10300 /* Emit code to save registers using MOV insns.
10301 First register is stored at CFA - CFA_OFFSET. */
10302 static void
10303 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10305 unsigned int regno;
10307 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10308 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10310 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10311 cfa_offset -= UNITS_PER_WORD;
10315 /* Emit code to save SSE registers using MOV insns.
10316 First register is stored at CFA - CFA_OFFSET. */
10317 static void
10318 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10320 unsigned int regno;
10322 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10323 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10325 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10326 cfa_offset -= 16;
10330 static GTY(()) rtx queued_cfa_restores;
10332 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10333 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10334 Don't add the note if the previously saved value will be left untouched
10335 within stack red-zone till return, as unwinders can find the same value
10336 in the register and on the stack. */
10338 static void
10339 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
10341 if (!crtl->shrink_wrapped
10342 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10343 return;
10345 if (insn)
10347 add_reg_note (insn, REG_CFA_RESTORE, reg);
10348 RTX_FRAME_RELATED_P (insn) = 1;
10350 else
10351 queued_cfa_restores
10352 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10355 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10357 static void
10358 ix86_add_queued_cfa_restore_notes (rtx insn)
10360 rtx last;
10361 if (!queued_cfa_restores)
10362 return;
10363 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10365 XEXP (last, 1) = REG_NOTES (insn);
10366 REG_NOTES (insn) = queued_cfa_restores;
10367 queued_cfa_restores = NULL_RTX;
10368 RTX_FRAME_RELATED_P (insn) = 1;
10371 /* Expand prologue or epilogue stack adjustment.
10372 The pattern exist to put a dependency on all ebp-based memory accesses.
10373 STYLE should be negative if instructions should be marked as frame related,
10374 zero if %r11 register is live and cannot be freely used and positive
10375 otherwise. */
10377 static void
10378 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10379 int style, bool set_cfa)
10381 struct machine_function *m = cfun->machine;
10382 rtx insn;
10383 bool add_frame_related_expr = false;
10385 if (Pmode == SImode)
10386 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10387 else if (x86_64_immediate_operand (offset, DImode))
10388 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10389 else
10391 rtx tmp;
10392 /* r11 is used by indirect sibcall return as well, set before the
10393 epilogue and used after the epilogue. */
10394 if (style)
10395 tmp = gen_rtx_REG (DImode, R11_REG);
10396 else
10398 gcc_assert (src != hard_frame_pointer_rtx
10399 && dest != hard_frame_pointer_rtx);
10400 tmp = hard_frame_pointer_rtx;
10402 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10403 if (style < 0)
10404 add_frame_related_expr = true;
10406 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10409 insn = emit_insn (insn);
10410 if (style >= 0)
10411 ix86_add_queued_cfa_restore_notes (insn);
10413 if (set_cfa)
10415 rtx r;
10417 gcc_assert (m->fs.cfa_reg == src);
10418 m->fs.cfa_offset += INTVAL (offset);
10419 m->fs.cfa_reg = dest;
10421 r = gen_rtx_PLUS (Pmode, src, offset);
10422 r = gen_rtx_SET (VOIDmode, dest, r);
10423 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10424 RTX_FRAME_RELATED_P (insn) = 1;
10426 else if (style < 0)
10428 RTX_FRAME_RELATED_P (insn) = 1;
10429 if (add_frame_related_expr)
10431 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10432 r = gen_rtx_SET (VOIDmode, dest, r);
10433 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10437 if (dest == stack_pointer_rtx)
10439 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10440 bool valid = m->fs.sp_valid;
10442 if (src == hard_frame_pointer_rtx)
10444 valid = m->fs.fp_valid;
10445 ooffset = m->fs.fp_offset;
10447 else if (src == crtl->drap_reg)
10449 valid = m->fs.drap_valid;
10450 ooffset = 0;
10452 else
10454 /* Else there are two possibilities: SP itself, which we set
10455 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10456 taken care of this by hand along the eh_return path. */
10457 gcc_checking_assert (src == stack_pointer_rtx
10458 || offset == const0_rtx);
10461 m->fs.sp_offset = ooffset - INTVAL (offset);
10462 m->fs.sp_valid = valid;
10466 /* Find an available register to be used as dynamic realign argument
10467 pointer regsiter. Such a register will be written in prologue and
10468 used in begin of body, so it must not be
10469 1. parameter passing register.
10470 2. GOT pointer.
10471 We reuse static-chain register if it is available. Otherwise, we
10472 use DI for i386 and R13 for x86-64. We chose R13 since it has
10473 shorter encoding.
10475 Return: the regno of chosen register. */
10477 static unsigned int
10478 find_drap_reg (void)
10480 tree decl = cfun->decl;
10482 if (TARGET_64BIT)
10484 /* Use R13 for nested function or function need static chain.
10485 Since function with tail call may use any caller-saved
10486 registers in epilogue, DRAP must not use caller-saved
10487 register in such case. */
10488 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10489 return R13_REG;
10491 return R10_REG;
10493 else
10495 /* Use DI for nested function or function need static chain.
10496 Since function with tail call may use any caller-saved
10497 registers in epilogue, DRAP must not use caller-saved
10498 register in such case. */
10499 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10500 return DI_REG;
10502 /* Reuse static chain register if it isn't used for parameter
10503 passing. */
10504 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10506 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10507 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10508 return CX_REG;
10510 return DI_REG;
10514 /* Return minimum incoming stack alignment. */
10516 static unsigned int
10517 ix86_minimum_incoming_stack_boundary (bool sibcall)
10519 unsigned int incoming_stack_boundary;
10521 /* Prefer the one specified at command line. */
10522 if (ix86_user_incoming_stack_boundary)
10523 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10524 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10525 if -mstackrealign is used, it isn't used for sibcall check and
10526 estimated stack alignment is 128bit. */
10527 else if (!sibcall
10528 && !TARGET_64BIT
10529 && ix86_force_align_arg_pointer
10530 && crtl->stack_alignment_estimated == 128)
10531 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10532 else
10533 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10535 /* Incoming stack alignment can be changed on individual functions
10536 via force_align_arg_pointer attribute. We use the smallest
10537 incoming stack boundary. */
10538 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10539 && lookup_attribute (ix86_force_align_arg_pointer_string,
10540 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10541 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10543 /* The incoming stack frame has to be aligned at least at
10544 parm_stack_boundary. */
10545 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10546 incoming_stack_boundary = crtl->parm_stack_boundary;
10548 /* Stack at entrance of main is aligned by runtime. We use the
10549 smallest incoming stack boundary. */
10550 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10551 && DECL_NAME (current_function_decl)
10552 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10553 && DECL_FILE_SCOPE_P (current_function_decl))
10554 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10556 return incoming_stack_boundary;
10559 /* Update incoming stack boundary and estimated stack alignment. */
10561 static void
10562 ix86_update_stack_boundary (void)
10564 ix86_incoming_stack_boundary
10565 = ix86_minimum_incoming_stack_boundary (false);
10567 /* x86_64 vararg needs 16byte stack alignment for register save
10568 area. */
10569 if (TARGET_64BIT
10570 && cfun->stdarg
10571 && crtl->stack_alignment_estimated < 128)
10572 crtl->stack_alignment_estimated = 128;
10575 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10576 needed or an rtx for DRAP otherwise. */
10578 static rtx
10579 ix86_get_drap_rtx (void)
10581 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10582 crtl->need_drap = true;
10584 if (stack_realign_drap)
10586 /* Assign DRAP to vDRAP and returns vDRAP */
10587 unsigned int regno = find_drap_reg ();
10588 rtx drap_vreg;
10589 rtx arg_ptr;
10590 rtx_insn *seq, *insn;
10592 arg_ptr = gen_rtx_REG (Pmode, regno);
10593 crtl->drap_reg = arg_ptr;
10595 start_sequence ();
10596 drap_vreg = copy_to_reg (arg_ptr);
10597 seq = get_insns ();
10598 end_sequence ();
10600 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10601 if (!optimize)
10603 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10604 RTX_FRAME_RELATED_P (insn) = 1;
10606 return drap_vreg;
10608 else
10609 return NULL;
10612 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10614 static rtx
10615 ix86_internal_arg_pointer (void)
10617 return virtual_incoming_args_rtx;
10620 struct scratch_reg {
10621 rtx reg;
10622 bool saved;
10625 /* Return a short-lived scratch register for use on function entry.
10626 In 32-bit mode, it is valid only after the registers are saved
10627 in the prologue. This register must be released by means of
10628 release_scratch_register_on_entry once it is dead. */
10630 static void
10631 get_scratch_register_on_entry (struct scratch_reg *sr)
10633 int regno;
10635 sr->saved = false;
10637 if (TARGET_64BIT)
10639 /* We always use R11 in 64-bit mode. */
10640 regno = R11_REG;
10642 else
10644 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10645 bool fastcall_p
10646 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10647 bool thiscall_p
10648 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10649 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10650 int regparm = ix86_function_regparm (fntype, decl);
10651 int drap_regno
10652 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10654 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10655 for the static chain register. */
10656 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10657 && drap_regno != AX_REG)
10658 regno = AX_REG;
10659 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10660 for the static chain register. */
10661 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10662 regno = AX_REG;
10663 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10664 regno = DX_REG;
10665 /* ecx is the static chain register. */
10666 else if (regparm < 3 && !fastcall_p && !thiscall_p
10667 && !static_chain_p
10668 && drap_regno != CX_REG)
10669 regno = CX_REG;
10670 else if (ix86_save_reg (BX_REG, true))
10671 regno = BX_REG;
10672 /* esi is the static chain register. */
10673 else if (!(regparm == 3 && static_chain_p)
10674 && ix86_save_reg (SI_REG, true))
10675 regno = SI_REG;
10676 else if (ix86_save_reg (DI_REG, true))
10677 regno = DI_REG;
10678 else
10680 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10681 sr->saved = true;
10685 sr->reg = gen_rtx_REG (Pmode, regno);
10686 if (sr->saved)
10688 rtx insn = emit_insn (gen_push (sr->reg));
10689 RTX_FRAME_RELATED_P (insn) = 1;
10693 /* Release a scratch register obtained from the preceding function. */
10695 static void
10696 release_scratch_register_on_entry (struct scratch_reg *sr)
10698 if (sr->saved)
10700 struct machine_function *m = cfun->machine;
10701 rtx x, insn = emit_insn (gen_pop (sr->reg));
10703 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10704 RTX_FRAME_RELATED_P (insn) = 1;
10705 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10706 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10707 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10708 m->fs.sp_offset -= UNITS_PER_WORD;
10712 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10714 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10716 static void
10717 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10719 /* We skip the probe for the first interval + a small dope of 4 words and
10720 probe that many bytes past the specified size to maintain a protection
10721 area at the botton of the stack. */
10722 const int dope = 4 * UNITS_PER_WORD;
10723 rtx size_rtx = GEN_INT (size), last;
10725 /* See if we have a constant small number of probes to generate. If so,
10726 that's the easy case. The run-time loop is made up of 11 insns in the
10727 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10728 for n # of intervals. */
10729 if (size <= 5 * PROBE_INTERVAL)
10731 HOST_WIDE_INT i, adjust;
10732 bool first_probe = true;
10734 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10735 values of N from 1 until it exceeds SIZE. If only one probe is
10736 needed, this will not generate any code. Then adjust and probe
10737 to PROBE_INTERVAL + SIZE. */
10738 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10740 if (first_probe)
10742 adjust = 2 * PROBE_INTERVAL + dope;
10743 first_probe = false;
10745 else
10746 adjust = PROBE_INTERVAL;
10748 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10749 plus_constant (Pmode, stack_pointer_rtx,
10750 -adjust)));
10751 emit_stack_probe (stack_pointer_rtx);
10754 if (first_probe)
10755 adjust = size + PROBE_INTERVAL + dope;
10756 else
10757 adjust = size + PROBE_INTERVAL - i;
10759 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10760 plus_constant (Pmode, stack_pointer_rtx,
10761 -adjust)));
10762 emit_stack_probe (stack_pointer_rtx);
10764 /* Adjust back to account for the additional first interval. */
10765 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10766 plus_constant (Pmode, stack_pointer_rtx,
10767 PROBE_INTERVAL + dope)));
10770 /* Otherwise, do the same as above, but in a loop. Note that we must be
10771 extra careful with variables wrapping around because we might be at
10772 the very top (or the very bottom) of the address space and we have
10773 to be able to handle this case properly; in particular, we use an
10774 equality test for the loop condition. */
10775 else
10777 HOST_WIDE_INT rounded_size;
10778 struct scratch_reg sr;
10780 get_scratch_register_on_entry (&sr);
10783 /* Step 1: round SIZE to the previous multiple of the interval. */
10785 rounded_size = size & -PROBE_INTERVAL;
10788 /* Step 2: compute initial and final value of the loop counter. */
10790 /* SP = SP_0 + PROBE_INTERVAL. */
10791 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10792 plus_constant (Pmode, stack_pointer_rtx,
10793 - (PROBE_INTERVAL + dope))));
10795 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10796 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10797 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10798 gen_rtx_PLUS (Pmode, sr.reg,
10799 stack_pointer_rtx)));
10802 /* Step 3: the loop
10804 while (SP != LAST_ADDR)
10806 SP = SP + PROBE_INTERVAL
10807 probe at SP
10810 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10811 values of N from 1 until it is equal to ROUNDED_SIZE. */
10813 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10816 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10817 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10819 if (size != rounded_size)
10821 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10822 plus_constant (Pmode, stack_pointer_rtx,
10823 rounded_size - size)));
10824 emit_stack_probe (stack_pointer_rtx);
10827 /* Adjust back to account for the additional first interval. */
10828 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10829 plus_constant (Pmode, stack_pointer_rtx,
10830 PROBE_INTERVAL + dope)));
10832 release_scratch_register_on_entry (&sr);
10835 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10837 /* Even if the stack pointer isn't the CFA register, we need to correctly
10838 describe the adjustments made to it, in particular differentiate the
10839 frame-related ones from the frame-unrelated ones. */
10840 if (size > 0)
10842 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10843 XVECEXP (expr, 0, 0)
10844 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10845 plus_constant (Pmode, stack_pointer_rtx, -size));
10846 XVECEXP (expr, 0, 1)
10847 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10848 plus_constant (Pmode, stack_pointer_rtx,
10849 PROBE_INTERVAL + dope + size));
10850 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10851 RTX_FRAME_RELATED_P (last) = 1;
10853 cfun->machine->fs.sp_offset += size;
10856 /* Make sure nothing is scheduled before we are done. */
10857 emit_insn (gen_blockage ());
10860 /* Adjust the stack pointer up to REG while probing it. */
10862 const char *
10863 output_adjust_stack_and_probe (rtx reg)
10865 static int labelno = 0;
10866 char loop_lab[32], end_lab[32];
10867 rtx xops[2];
10869 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10870 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10872 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10874 /* Jump to END_LAB if SP == LAST_ADDR. */
10875 xops[0] = stack_pointer_rtx;
10876 xops[1] = reg;
10877 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10878 fputs ("\tje\t", asm_out_file);
10879 assemble_name_raw (asm_out_file, end_lab);
10880 fputc ('\n', asm_out_file);
10882 /* SP = SP + PROBE_INTERVAL. */
10883 xops[1] = GEN_INT (PROBE_INTERVAL);
10884 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10886 /* Probe at SP. */
10887 xops[1] = const0_rtx;
10888 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10890 fprintf (asm_out_file, "\tjmp\t");
10891 assemble_name_raw (asm_out_file, loop_lab);
10892 fputc ('\n', asm_out_file);
10894 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10896 return "";
10899 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10900 inclusive. These are offsets from the current stack pointer. */
10902 static void
10903 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10905 /* See if we have a constant small number of probes to generate. If so,
10906 that's the easy case. The run-time loop is made up of 7 insns in the
10907 generic case while the compile-time loop is made up of n insns for n #
10908 of intervals. */
10909 if (size <= 7 * PROBE_INTERVAL)
10911 HOST_WIDE_INT i;
10913 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10914 it exceeds SIZE. If only one probe is needed, this will not
10915 generate any code. Then probe at FIRST + SIZE. */
10916 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10917 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10918 -(first + i)));
10920 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10921 -(first + size)));
10924 /* Otherwise, do the same as above, but in a loop. Note that we must be
10925 extra careful with variables wrapping around because we might be at
10926 the very top (or the very bottom) of the address space and we have
10927 to be able to handle this case properly; in particular, we use an
10928 equality test for the loop condition. */
10929 else
10931 HOST_WIDE_INT rounded_size, last;
10932 struct scratch_reg sr;
10934 get_scratch_register_on_entry (&sr);
10937 /* Step 1: round SIZE to the previous multiple of the interval. */
10939 rounded_size = size & -PROBE_INTERVAL;
10942 /* Step 2: compute initial and final value of the loop counter. */
10944 /* TEST_OFFSET = FIRST. */
10945 emit_move_insn (sr.reg, GEN_INT (-first));
10947 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10948 last = first + rounded_size;
10951 /* Step 3: the loop
10953 while (TEST_ADDR != LAST_ADDR)
10955 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10956 probe at TEST_ADDR
10959 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10960 until it is equal to ROUNDED_SIZE. */
10962 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10965 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10966 that SIZE is equal to ROUNDED_SIZE. */
10968 if (size != rounded_size)
10969 emit_stack_probe (plus_constant (Pmode,
10970 gen_rtx_PLUS (Pmode,
10971 stack_pointer_rtx,
10972 sr.reg),
10973 rounded_size - size));
10975 release_scratch_register_on_entry (&sr);
10978 /* Make sure nothing is scheduled before we are done. */
10979 emit_insn (gen_blockage ());
10982 /* Probe a range of stack addresses from REG to END, inclusive. These are
10983 offsets from the current stack pointer. */
10985 const char *
10986 output_probe_stack_range (rtx reg, rtx end)
10988 static int labelno = 0;
10989 char loop_lab[32], end_lab[32];
10990 rtx xops[3];
10992 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10993 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10995 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10997 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10998 xops[0] = reg;
10999 xops[1] = end;
11000 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11001 fputs ("\tje\t", asm_out_file);
11002 assemble_name_raw (asm_out_file, end_lab);
11003 fputc ('\n', asm_out_file);
11005 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
11006 xops[1] = GEN_INT (PROBE_INTERVAL);
11007 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11009 /* Probe at TEST_ADDR. */
11010 xops[0] = stack_pointer_rtx;
11011 xops[1] = reg;
11012 xops[2] = const0_rtx;
11013 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
11015 fprintf (asm_out_file, "\tjmp\t");
11016 assemble_name_raw (asm_out_file, loop_lab);
11017 fputc ('\n', asm_out_file);
11019 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11021 return "";
11024 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
11025 to be generated in correct form. */
11026 static void
11027 ix86_finalize_stack_realign_flags (void)
11029 /* Check if stack realign is really needed after reload, and
11030 stores result in cfun */
11031 unsigned int incoming_stack_boundary
11032 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
11033 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
11034 unsigned int stack_realign = (incoming_stack_boundary
11035 < (crtl->is_leaf
11036 ? crtl->max_used_stack_slot_alignment
11037 : crtl->stack_alignment_needed));
11039 if (crtl->stack_realign_finalized)
11041 /* After stack_realign_needed is finalized, we can't no longer
11042 change it. */
11043 gcc_assert (crtl->stack_realign_needed == stack_realign);
11044 return;
11047 /* If the only reason for frame_pointer_needed is that we conservatively
11048 assumed stack realignment might be needed, but in the end nothing that
11049 needed the stack alignment had been spilled, clear frame_pointer_needed
11050 and say we don't need stack realignment. */
11051 if (stack_realign
11052 && frame_pointer_needed
11053 && crtl->is_leaf
11054 && flag_omit_frame_pointer
11055 && crtl->sp_is_unchanging
11056 && !ix86_current_function_calls_tls_descriptor
11057 && !crtl->accesses_prior_frames
11058 && !cfun->calls_alloca
11059 && !crtl->calls_eh_return
11060 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
11061 && !ix86_frame_pointer_required ()
11062 && get_frame_size () == 0
11063 && ix86_nsaved_sseregs () == 0
11064 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
11066 HARD_REG_SET set_up_by_prologue, prologue_used;
11067 basic_block bb;
11069 CLEAR_HARD_REG_SET (prologue_used);
11070 CLEAR_HARD_REG_SET (set_up_by_prologue);
11071 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
11072 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
11073 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
11074 HARD_FRAME_POINTER_REGNUM);
11075 FOR_EACH_BB_FN (bb, cfun)
11077 rtx_insn *insn;
11078 FOR_BB_INSNS (bb, insn)
11079 if (NONDEBUG_INSN_P (insn)
11080 && requires_stack_frame_p (insn, prologue_used,
11081 set_up_by_prologue))
11083 crtl->stack_realign_needed = stack_realign;
11084 crtl->stack_realign_finalized = true;
11085 return;
11089 /* If drap has been set, but it actually isn't live at the start
11090 of the function, there is no reason to set it up. */
11091 if (crtl->drap_reg)
11093 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11094 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
11096 crtl->drap_reg = NULL_RTX;
11097 crtl->need_drap = false;
11100 else
11101 cfun->machine->no_drap_save_restore = true;
11103 frame_pointer_needed = false;
11104 stack_realign = false;
11105 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
11106 crtl->stack_alignment_needed = incoming_stack_boundary;
11107 crtl->stack_alignment_estimated = incoming_stack_boundary;
11108 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
11109 crtl->preferred_stack_boundary = incoming_stack_boundary;
11110 df_finish_pass (true);
11111 df_scan_alloc (NULL);
11112 df_scan_blocks ();
11113 df_compute_regs_ever_live (true);
11114 df_analyze ();
11117 crtl->stack_realign_needed = stack_realign;
11118 crtl->stack_realign_finalized = true;
11121 /* Delete SET_GOT right after entry block if it is allocated to reg. */
11123 static void
11124 ix86_elim_entry_set_got (rtx reg)
11126 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11127 rtx_insn *c_insn = BB_HEAD (bb);
11128 if (!NONDEBUG_INSN_P (c_insn))
11129 c_insn = next_nonnote_nondebug_insn (c_insn);
11130 if (c_insn && NONJUMP_INSN_P (c_insn))
11132 rtx pat = PATTERN (c_insn);
11133 if (GET_CODE (pat) == PARALLEL)
11135 rtx vec = XVECEXP (pat, 0, 0);
11136 if (GET_CODE (vec) == SET
11137 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
11138 && REGNO (XEXP (vec, 0)) == REGNO (reg))
11139 delete_insn (c_insn);
11144 /* Expand the prologue into a bunch of separate insns. */
11146 void
11147 ix86_expand_prologue (void)
11149 struct machine_function *m = cfun->machine;
11150 rtx insn, t;
11151 struct ix86_frame frame;
11152 HOST_WIDE_INT allocate;
11153 bool int_registers_saved;
11154 bool sse_registers_saved;
11156 ix86_finalize_stack_realign_flags ();
11158 /* DRAP should not coexist with stack_realign_fp */
11159 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
11161 memset (&m->fs, 0, sizeof (m->fs));
11163 /* Initialize CFA state for before the prologue. */
11164 m->fs.cfa_reg = stack_pointer_rtx;
11165 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
11167 /* Track SP offset to the CFA. We continue tracking this after we've
11168 swapped the CFA register away from SP. In the case of re-alignment
11169 this is fudged; we're interested to offsets within the local frame. */
11170 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11171 m->fs.sp_valid = true;
11173 ix86_compute_frame_layout (&frame);
11175 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
11177 /* We should have already generated an error for any use of
11178 ms_hook on a nested function. */
11179 gcc_checking_assert (!ix86_static_chain_on_stack);
11181 /* Check if profiling is active and we shall use profiling before
11182 prologue variant. If so sorry. */
11183 if (crtl->profile && flag_fentry != 0)
11184 sorry ("ms_hook_prologue attribute isn%'t compatible "
11185 "with -mfentry for 32-bit");
11187 /* In ix86_asm_output_function_label we emitted:
11188 8b ff movl.s %edi,%edi
11189 55 push %ebp
11190 8b ec movl.s %esp,%ebp
11192 This matches the hookable function prologue in Win32 API
11193 functions in Microsoft Windows XP Service Pack 2 and newer.
11194 Wine uses this to enable Windows apps to hook the Win32 API
11195 functions provided by Wine.
11197 What that means is that we've already set up the frame pointer. */
11199 if (frame_pointer_needed
11200 && !(crtl->drap_reg && crtl->stack_realign_needed))
11202 rtx push, mov;
11204 /* We've decided to use the frame pointer already set up.
11205 Describe this to the unwinder by pretending that both
11206 push and mov insns happen right here.
11208 Putting the unwind info here at the end of the ms_hook
11209 is done so that we can make absolutely certain we get
11210 the required byte sequence at the start of the function,
11211 rather than relying on an assembler that can produce
11212 the exact encoding required.
11214 However it does mean (in the unpatched case) that we have
11215 a 1 insn window where the asynchronous unwind info is
11216 incorrect. However, if we placed the unwind info at
11217 its correct location we would have incorrect unwind info
11218 in the patched case. Which is probably all moot since
11219 I don't expect Wine generates dwarf2 unwind info for the
11220 system libraries that use this feature. */
11222 insn = emit_insn (gen_blockage ());
11224 push = gen_push (hard_frame_pointer_rtx);
11225 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
11226 stack_pointer_rtx);
11227 RTX_FRAME_RELATED_P (push) = 1;
11228 RTX_FRAME_RELATED_P (mov) = 1;
11230 RTX_FRAME_RELATED_P (insn) = 1;
11231 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11232 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
11234 /* Note that gen_push incremented m->fs.cfa_offset, even
11235 though we didn't emit the push insn here. */
11236 m->fs.cfa_reg = hard_frame_pointer_rtx;
11237 m->fs.fp_offset = m->fs.cfa_offset;
11238 m->fs.fp_valid = true;
11240 else
11242 /* The frame pointer is not needed so pop %ebp again.
11243 This leaves us with a pristine state. */
11244 emit_insn (gen_pop (hard_frame_pointer_rtx));
11248 /* The first insn of a function that accepts its static chain on the
11249 stack is to push the register that would be filled in by a direct
11250 call. This insn will be skipped by the trampoline. */
11251 else if (ix86_static_chain_on_stack)
11253 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
11254 emit_insn (gen_blockage ());
11256 /* We don't want to interpret this push insn as a register save,
11257 only as a stack adjustment. The real copy of the register as
11258 a save will be done later, if needed. */
11259 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
11260 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
11261 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
11262 RTX_FRAME_RELATED_P (insn) = 1;
11265 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
11266 of DRAP is needed and stack realignment is really needed after reload */
11267 if (stack_realign_drap)
11269 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11271 /* Only need to push parameter pointer reg if it is caller saved. */
11272 if (!call_used_regs[REGNO (crtl->drap_reg)])
11274 /* Push arg pointer reg */
11275 insn = emit_insn (gen_push (crtl->drap_reg));
11276 RTX_FRAME_RELATED_P (insn) = 1;
11279 /* Grab the argument pointer. */
11280 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
11281 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11282 RTX_FRAME_RELATED_P (insn) = 1;
11283 m->fs.cfa_reg = crtl->drap_reg;
11284 m->fs.cfa_offset = 0;
11286 /* Align the stack. */
11287 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11288 stack_pointer_rtx,
11289 GEN_INT (-align_bytes)));
11290 RTX_FRAME_RELATED_P (insn) = 1;
11292 /* Replicate the return address on the stack so that return
11293 address can be reached via (argp - 1) slot. This is needed
11294 to implement macro RETURN_ADDR_RTX and intrinsic function
11295 expand_builtin_return_addr etc. */
11296 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
11297 t = gen_frame_mem (word_mode, t);
11298 insn = emit_insn (gen_push (t));
11299 RTX_FRAME_RELATED_P (insn) = 1;
11301 /* For the purposes of frame and register save area addressing,
11302 we've started over with a new frame. */
11303 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11304 m->fs.realigned = true;
11307 int_registers_saved = (frame.nregs == 0);
11308 sse_registers_saved = (frame.nsseregs == 0);
11310 if (frame_pointer_needed && !m->fs.fp_valid)
11312 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11313 slower on all targets. Also sdb doesn't like it. */
11314 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11315 RTX_FRAME_RELATED_P (insn) = 1;
11317 /* Push registers now, before setting the frame pointer
11318 on SEH target. */
11319 if (!int_registers_saved
11320 && TARGET_SEH
11321 && !frame.save_regs_using_mov)
11323 ix86_emit_save_regs ();
11324 int_registers_saved = true;
11325 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11328 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11330 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11331 RTX_FRAME_RELATED_P (insn) = 1;
11333 if (m->fs.cfa_reg == stack_pointer_rtx)
11334 m->fs.cfa_reg = hard_frame_pointer_rtx;
11335 m->fs.fp_offset = m->fs.sp_offset;
11336 m->fs.fp_valid = true;
11340 if (!int_registers_saved)
11342 /* If saving registers via PUSH, do so now. */
11343 if (!frame.save_regs_using_mov)
11345 ix86_emit_save_regs ();
11346 int_registers_saved = true;
11347 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11350 /* When using red zone we may start register saving before allocating
11351 the stack frame saving one cycle of the prologue. However, avoid
11352 doing this if we have to probe the stack; at least on x86_64 the
11353 stack probe can turn into a call that clobbers a red zone location. */
11354 else if (ix86_using_red_zone ()
11355 && (! TARGET_STACK_PROBE
11356 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11358 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11359 int_registers_saved = true;
11363 if (stack_realign_fp)
11365 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11366 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11368 /* The computation of the size of the re-aligned stack frame means
11369 that we must allocate the size of the register save area before
11370 performing the actual alignment. Otherwise we cannot guarantee
11371 that there's enough storage above the realignment point. */
11372 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11373 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11374 GEN_INT (m->fs.sp_offset
11375 - frame.sse_reg_save_offset),
11376 -1, false);
11378 /* Align the stack. */
11379 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11380 stack_pointer_rtx,
11381 GEN_INT (-align_bytes)));
11383 /* For the purposes of register save area addressing, the stack
11384 pointer is no longer valid. As for the value of sp_offset,
11385 see ix86_compute_frame_layout, which we need to match in order
11386 to pass verification of stack_pointer_offset at the end. */
11387 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11388 m->fs.sp_valid = false;
11391 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11393 if (flag_stack_usage_info)
11395 /* We start to count from ARG_POINTER. */
11396 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11398 /* If it was realigned, take into account the fake frame. */
11399 if (stack_realign_drap)
11401 if (ix86_static_chain_on_stack)
11402 stack_size += UNITS_PER_WORD;
11404 if (!call_used_regs[REGNO (crtl->drap_reg)])
11405 stack_size += UNITS_PER_WORD;
11407 /* This over-estimates by 1 minimal-stack-alignment-unit but
11408 mitigates that by counting in the new return address slot. */
11409 current_function_dynamic_stack_size
11410 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11413 current_function_static_stack_size = stack_size;
11416 /* On SEH target with very large frame size, allocate an area to save
11417 SSE registers (as the very large allocation won't be described). */
11418 if (TARGET_SEH
11419 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11420 && !sse_registers_saved)
11422 HOST_WIDE_INT sse_size =
11423 frame.sse_reg_save_offset - frame.reg_save_offset;
11425 gcc_assert (int_registers_saved);
11427 /* No need to do stack checking as the area will be immediately
11428 written. */
11429 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11430 GEN_INT (-sse_size), -1,
11431 m->fs.cfa_reg == stack_pointer_rtx);
11432 allocate -= sse_size;
11433 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11434 sse_registers_saved = true;
11437 /* The stack has already been decremented by the instruction calling us
11438 so probe if the size is non-negative to preserve the protection area. */
11439 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11441 /* We expect the registers to be saved when probes are used. */
11442 gcc_assert (int_registers_saved);
11444 if (STACK_CHECK_MOVING_SP)
11446 if (!(crtl->is_leaf && !cfun->calls_alloca
11447 && allocate <= PROBE_INTERVAL))
11449 ix86_adjust_stack_and_probe (allocate);
11450 allocate = 0;
11453 else
11455 HOST_WIDE_INT size = allocate;
11457 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11458 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11460 if (TARGET_STACK_PROBE)
11462 if (crtl->is_leaf && !cfun->calls_alloca)
11464 if (size > PROBE_INTERVAL)
11465 ix86_emit_probe_stack_range (0, size);
11467 else
11468 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11470 else
11472 if (crtl->is_leaf && !cfun->calls_alloca)
11474 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11475 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11476 size - STACK_CHECK_PROTECT);
11478 else
11479 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11484 if (allocate == 0)
11486 else if (!ix86_target_stack_probe ()
11487 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11489 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11490 GEN_INT (-allocate), -1,
11491 m->fs.cfa_reg == stack_pointer_rtx);
11493 else
11495 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11496 rtx r10 = NULL;
11497 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11498 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11499 bool eax_live = ix86_eax_live_at_start_p ();
11500 bool r10_live = false;
11502 if (TARGET_64BIT)
11503 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11505 if (eax_live)
11507 insn = emit_insn (gen_push (eax));
11508 allocate -= UNITS_PER_WORD;
11509 /* Note that SEH directives need to continue tracking the stack
11510 pointer even after the frame pointer has been set up. */
11511 if (sp_is_cfa_reg || TARGET_SEH)
11513 if (sp_is_cfa_reg)
11514 m->fs.cfa_offset += UNITS_PER_WORD;
11515 RTX_FRAME_RELATED_P (insn) = 1;
11519 if (r10_live)
11521 r10 = gen_rtx_REG (Pmode, R10_REG);
11522 insn = emit_insn (gen_push (r10));
11523 allocate -= UNITS_PER_WORD;
11524 if (sp_is_cfa_reg || TARGET_SEH)
11526 if (sp_is_cfa_reg)
11527 m->fs.cfa_offset += UNITS_PER_WORD;
11528 RTX_FRAME_RELATED_P (insn) = 1;
11532 emit_move_insn (eax, GEN_INT (allocate));
11533 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11535 /* Use the fact that AX still contains ALLOCATE. */
11536 adjust_stack_insn = (Pmode == DImode
11537 ? gen_pro_epilogue_adjust_stack_di_sub
11538 : gen_pro_epilogue_adjust_stack_si_sub);
11540 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11541 stack_pointer_rtx, eax));
11543 if (sp_is_cfa_reg || TARGET_SEH)
11545 if (sp_is_cfa_reg)
11546 m->fs.cfa_offset += allocate;
11547 RTX_FRAME_RELATED_P (insn) = 1;
11548 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11549 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11550 plus_constant (Pmode, stack_pointer_rtx,
11551 -allocate)));
11553 m->fs.sp_offset += allocate;
11555 /* Use stack_pointer_rtx for relative addressing so that code
11556 works for realigned stack, too. */
11557 if (r10_live && eax_live)
11559 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11560 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11561 gen_frame_mem (word_mode, t));
11562 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11563 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11564 gen_frame_mem (word_mode, t));
11566 else if (eax_live || r10_live)
11568 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11569 emit_move_insn (gen_rtx_REG (word_mode,
11570 (eax_live ? AX_REG : R10_REG)),
11571 gen_frame_mem (word_mode, t));
11574 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11576 /* If we havn't already set up the frame pointer, do so now. */
11577 if (frame_pointer_needed && !m->fs.fp_valid)
11579 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11580 GEN_INT (frame.stack_pointer_offset
11581 - frame.hard_frame_pointer_offset));
11582 insn = emit_insn (insn);
11583 RTX_FRAME_RELATED_P (insn) = 1;
11584 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11586 if (m->fs.cfa_reg == stack_pointer_rtx)
11587 m->fs.cfa_reg = hard_frame_pointer_rtx;
11588 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11589 m->fs.fp_valid = true;
11592 if (!int_registers_saved)
11593 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11594 if (!sse_registers_saved)
11595 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11597 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
11598 in PROLOGUE. */
11599 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
11601 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
11602 insn = emit_insn (gen_set_got (pic));
11603 RTX_FRAME_RELATED_P (insn) = 1;
11604 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11605 emit_insn (gen_prologue_use (pic));
11606 /* Deleting already emmitted SET_GOT if exist and allocated to
11607 REAL_PIC_OFFSET_TABLE_REGNUM. */
11608 ix86_elim_entry_set_got (pic);
11611 if (crtl->drap_reg && !crtl->stack_realign_needed)
11613 /* vDRAP is setup but after reload it turns out stack realign
11614 isn't necessary, here we will emit prologue to setup DRAP
11615 without stack realign adjustment */
11616 t = choose_baseaddr (0);
11617 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11620 /* Prevent instructions from being scheduled into register save push
11621 sequence when access to the redzone area is done through frame pointer.
11622 The offset between the frame pointer and the stack pointer is calculated
11623 relative to the value of the stack pointer at the end of the function
11624 prologue, and moving instructions that access redzone area via frame
11625 pointer inside push sequence violates this assumption. */
11626 if (frame_pointer_needed && frame.red_zone_size)
11627 emit_insn (gen_memory_blockage ());
11629 /* Emit cld instruction if stringops are used in the function. */
11630 if (TARGET_CLD && ix86_current_function_needs_cld)
11631 emit_insn (gen_cld ());
11633 /* SEH requires that the prologue end within 256 bytes of the start of
11634 the function. Prevent instruction schedules that would extend that.
11635 Further, prevent alloca modifications to the stack pointer from being
11636 combined with prologue modifications. */
11637 if (TARGET_SEH)
11638 emit_insn (gen_prologue_use (stack_pointer_rtx));
11641 /* Emit code to restore REG using a POP insn. */
11643 static void
11644 ix86_emit_restore_reg_using_pop (rtx reg)
11646 struct machine_function *m = cfun->machine;
11647 rtx insn = emit_insn (gen_pop (reg));
11649 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11650 m->fs.sp_offset -= UNITS_PER_WORD;
11652 if (m->fs.cfa_reg == crtl->drap_reg
11653 && REGNO (reg) == REGNO (crtl->drap_reg))
11655 /* Previously we'd represented the CFA as an expression
11656 like *(%ebp - 8). We've just popped that value from
11657 the stack, which means we need to reset the CFA to
11658 the drap register. This will remain until we restore
11659 the stack pointer. */
11660 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11661 RTX_FRAME_RELATED_P (insn) = 1;
11663 /* This means that the DRAP register is valid for addressing too. */
11664 m->fs.drap_valid = true;
11665 return;
11668 if (m->fs.cfa_reg == stack_pointer_rtx)
11670 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11671 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11672 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11673 RTX_FRAME_RELATED_P (insn) = 1;
11675 m->fs.cfa_offset -= UNITS_PER_WORD;
11678 /* When the frame pointer is the CFA, and we pop it, we are
11679 swapping back to the stack pointer as the CFA. This happens
11680 for stack frames that don't allocate other data, so we assume
11681 the stack pointer is now pointing at the return address, i.e.
11682 the function entry state, which makes the offset be 1 word. */
11683 if (reg == hard_frame_pointer_rtx)
11685 m->fs.fp_valid = false;
11686 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11688 m->fs.cfa_reg = stack_pointer_rtx;
11689 m->fs.cfa_offset -= UNITS_PER_WORD;
11691 add_reg_note (insn, REG_CFA_DEF_CFA,
11692 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11693 GEN_INT (m->fs.cfa_offset)));
11694 RTX_FRAME_RELATED_P (insn) = 1;
11699 /* Emit code to restore saved registers using POP insns. */
11701 static void
11702 ix86_emit_restore_regs_using_pop (void)
11704 unsigned int regno;
11706 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11707 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11708 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11711 /* Emit code and notes for the LEAVE instruction. */
11713 static void
11714 ix86_emit_leave (void)
11716 struct machine_function *m = cfun->machine;
11717 rtx insn = emit_insn (ix86_gen_leave ());
11719 ix86_add_queued_cfa_restore_notes (insn);
11721 gcc_assert (m->fs.fp_valid);
11722 m->fs.sp_valid = true;
11723 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11724 m->fs.fp_valid = false;
11726 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11728 m->fs.cfa_reg = stack_pointer_rtx;
11729 m->fs.cfa_offset = m->fs.sp_offset;
11731 add_reg_note (insn, REG_CFA_DEF_CFA,
11732 plus_constant (Pmode, stack_pointer_rtx,
11733 m->fs.sp_offset));
11734 RTX_FRAME_RELATED_P (insn) = 1;
11736 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11737 m->fs.fp_offset);
11740 /* Emit code to restore saved registers using MOV insns.
11741 First register is restored from CFA - CFA_OFFSET. */
11742 static void
11743 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11744 bool maybe_eh_return)
11746 struct machine_function *m = cfun->machine;
11747 unsigned int regno;
11749 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11750 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11752 rtx reg = gen_rtx_REG (word_mode, regno);
11753 rtx insn, mem;
11755 mem = choose_baseaddr (cfa_offset);
11756 mem = gen_frame_mem (word_mode, mem);
11757 insn = emit_move_insn (reg, mem);
11759 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11761 /* Previously we'd represented the CFA as an expression
11762 like *(%ebp - 8). We've just popped that value from
11763 the stack, which means we need to reset the CFA to
11764 the drap register. This will remain until we restore
11765 the stack pointer. */
11766 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11767 RTX_FRAME_RELATED_P (insn) = 1;
11769 /* This means that the DRAP register is valid for addressing. */
11770 m->fs.drap_valid = true;
11772 else
11773 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11775 cfa_offset -= UNITS_PER_WORD;
11779 /* Emit code to restore saved registers using MOV insns.
11780 First register is restored from CFA - CFA_OFFSET. */
11781 static void
11782 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11783 bool maybe_eh_return)
11785 unsigned int regno;
11787 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11788 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11790 rtx reg = gen_rtx_REG (V4SFmode, regno);
11791 rtx mem;
11793 mem = choose_baseaddr (cfa_offset);
11794 mem = gen_rtx_MEM (V4SFmode, mem);
11795 set_mem_align (mem, 128);
11796 emit_move_insn (reg, mem);
11798 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11800 cfa_offset -= 16;
11804 /* Restore function stack, frame, and registers. */
11806 void
11807 ix86_expand_epilogue (int style)
11809 struct machine_function *m = cfun->machine;
11810 struct machine_frame_state frame_state_save = m->fs;
11811 struct ix86_frame frame;
11812 bool restore_regs_via_mov;
11813 bool using_drap;
11815 ix86_finalize_stack_realign_flags ();
11816 ix86_compute_frame_layout (&frame);
11818 m->fs.sp_valid = (!frame_pointer_needed
11819 || (crtl->sp_is_unchanging
11820 && !stack_realign_fp));
11821 gcc_assert (!m->fs.sp_valid
11822 || m->fs.sp_offset == frame.stack_pointer_offset);
11824 /* The FP must be valid if the frame pointer is present. */
11825 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11826 gcc_assert (!m->fs.fp_valid
11827 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11829 /* We must have *some* valid pointer to the stack frame. */
11830 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11832 /* The DRAP is never valid at this point. */
11833 gcc_assert (!m->fs.drap_valid);
11835 /* See the comment about red zone and frame
11836 pointer usage in ix86_expand_prologue. */
11837 if (frame_pointer_needed && frame.red_zone_size)
11838 emit_insn (gen_memory_blockage ());
11840 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11841 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11843 /* Determine the CFA offset of the end of the red-zone. */
11844 m->fs.red_zone_offset = 0;
11845 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11847 /* The red-zone begins below the return address. */
11848 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11850 /* When the register save area is in the aligned portion of
11851 the stack, determine the maximum runtime displacement that
11852 matches up with the aligned frame. */
11853 if (stack_realign_drap)
11854 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11855 + UNITS_PER_WORD);
11858 /* Special care must be taken for the normal return case of a function
11859 using eh_return: the eax and edx registers are marked as saved, but
11860 not restored along this path. Adjust the save location to match. */
11861 if (crtl->calls_eh_return && style != 2)
11862 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11864 /* EH_RETURN requires the use of moves to function properly. */
11865 if (crtl->calls_eh_return)
11866 restore_regs_via_mov = true;
11867 /* SEH requires the use of pops to identify the epilogue. */
11868 else if (TARGET_SEH)
11869 restore_regs_via_mov = false;
11870 /* If we're only restoring one register and sp is not valid then
11871 using a move instruction to restore the register since it's
11872 less work than reloading sp and popping the register. */
11873 else if (!m->fs.sp_valid && frame.nregs <= 1)
11874 restore_regs_via_mov = true;
11875 else if (TARGET_EPILOGUE_USING_MOVE
11876 && cfun->machine->use_fast_prologue_epilogue
11877 && (frame.nregs > 1
11878 || m->fs.sp_offset != frame.reg_save_offset))
11879 restore_regs_via_mov = true;
11880 else if (frame_pointer_needed
11881 && !frame.nregs
11882 && m->fs.sp_offset != frame.reg_save_offset)
11883 restore_regs_via_mov = true;
11884 else if (frame_pointer_needed
11885 && TARGET_USE_LEAVE
11886 && cfun->machine->use_fast_prologue_epilogue
11887 && frame.nregs == 1)
11888 restore_regs_via_mov = true;
11889 else
11890 restore_regs_via_mov = false;
11892 if (restore_regs_via_mov || frame.nsseregs)
11894 /* Ensure that the entire register save area is addressable via
11895 the stack pointer, if we will restore via sp. */
11896 if (TARGET_64BIT
11897 && m->fs.sp_offset > 0x7fffffff
11898 && !(m->fs.fp_valid || m->fs.drap_valid)
11899 && (frame.nsseregs + frame.nregs) != 0)
11901 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11902 GEN_INT (m->fs.sp_offset
11903 - frame.sse_reg_save_offset),
11904 style,
11905 m->fs.cfa_reg == stack_pointer_rtx);
11909 /* If there are any SSE registers to restore, then we have to do it
11910 via moves, since there's obviously no pop for SSE regs. */
11911 if (frame.nsseregs)
11912 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11913 style == 2);
11915 if (restore_regs_via_mov)
11917 rtx t;
11919 if (frame.nregs)
11920 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11922 /* eh_return epilogues need %ecx added to the stack pointer. */
11923 if (style == 2)
11925 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11927 /* Stack align doesn't work with eh_return. */
11928 gcc_assert (!stack_realign_drap);
11929 /* Neither does regparm nested functions. */
11930 gcc_assert (!ix86_static_chain_on_stack);
11932 if (frame_pointer_needed)
11934 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11935 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11936 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11938 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11939 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11941 /* Note that we use SA as a temporary CFA, as the return
11942 address is at the proper place relative to it. We
11943 pretend this happens at the FP restore insn because
11944 prior to this insn the FP would be stored at the wrong
11945 offset relative to SA, and after this insn we have no
11946 other reasonable register to use for the CFA. We don't
11947 bother resetting the CFA to the SP for the duration of
11948 the return insn. */
11949 add_reg_note (insn, REG_CFA_DEF_CFA,
11950 plus_constant (Pmode, sa, UNITS_PER_WORD));
11951 ix86_add_queued_cfa_restore_notes (insn);
11952 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11953 RTX_FRAME_RELATED_P (insn) = 1;
11955 m->fs.cfa_reg = sa;
11956 m->fs.cfa_offset = UNITS_PER_WORD;
11957 m->fs.fp_valid = false;
11959 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11960 const0_rtx, style, false);
11962 else
11964 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11965 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11966 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11967 ix86_add_queued_cfa_restore_notes (insn);
11969 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11970 if (m->fs.cfa_offset != UNITS_PER_WORD)
11972 m->fs.cfa_offset = UNITS_PER_WORD;
11973 add_reg_note (insn, REG_CFA_DEF_CFA,
11974 plus_constant (Pmode, stack_pointer_rtx,
11975 UNITS_PER_WORD));
11976 RTX_FRAME_RELATED_P (insn) = 1;
11979 m->fs.sp_offset = UNITS_PER_WORD;
11980 m->fs.sp_valid = true;
11983 else
11985 /* SEH requires that the function end with (1) a stack adjustment
11986 if necessary, (2) a sequence of pops, and (3) a return or
11987 jump instruction. Prevent insns from the function body from
11988 being scheduled into this sequence. */
11989 if (TARGET_SEH)
11991 /* Prevent a catch region from being adjacent to the standard
11992 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11993 several other flags that would be interesting to test are
11994 not yet set up. */
11995 if (flag_non_call_exceptions)
11996 emit_insn (gen_nops (const1_rtx));
11997 else
11998 emit_insn (gen_blockage ());
12001 /* First step is to deallocate the stack frame so that we can
12002 pop the registers. Also do it on SEH target for very large
12003 frame as the emitted instructions aren't allowed by the ABI in
12004 epilogues. */
12005 if (!m->fs.sp_valid
12006 || (TARGET_SEH
12007 && (m->fs.sp_offset - frame.reg_save_offset
12008 >= SEH_MAX_FRAME_SIZE)))
12010 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
12011 GEN_INT (m->fs.fp_offset
12012 - frame.reg_save_offset),
12013 style, false);
12015 else if (m->fs.sp_offset != frame.reg_save_offset)
12017 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12018 GEN_INT (m->fs.sp_offset
12019 - frame.reg_save_offset),
12020 style,
12021 m->fs.cfa_reg == stack_pointer_rtx);
12024 ix86_emit_restore_regs_using_pop ();
12027 /* If we used a stack pointer and haven't already got rid of it,
12028 then do so now. */
12029 if (m->fs.fp_valid)
12031 /* If the stack pointer is valid and pointing at the frame
12032 pointer store address, then we only need a pop. */
12033 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
12034 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12035 /* Leave results in shorter dependency chains on CPUs that are
12036 able to grok it fast. */
12037 else if (TARGET_USE_LEAVE
12038 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
12039 || !cfun->machine->use_fast_prologue_epilogue)
12040 ix86_emit_leave ();
12041 else
12043 pro_epilogue_adjust_stack (stack_pointer_rtx,
12044 hard_frame_pointer_rtx,
12045 const0_rtx, style, !using_drap);
12046 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12050 if (using_drap)
12052 int param_ptr_offset = UNITS_PER_WORD;
12053 rtx insn;
12055 gcc_assert (stack_realign_drap);
12057 if (ix86_static_chain_on_stack)
12058 param_ptr_offset += UNITS_PER_WORD;
12059 if (!call_used_regs[REGNO (crtl->drap_reg)])
12060 param_ptr_offset += UNITS_PER_WORD;
12062 insn = emit_insn (gen_rtx_SET
12063 (VOIDmode, stack_pointer_rtx,
12064 gen_rtx_PLUS (Pmode,
12065 crtl->drap_reg,
12066 GEN_INT (-param_ptr_offset))));
12067 m->fs.cfa_reg = stack_pointer_rtx;
12068 m->fs.cfa_offset = param_ptr_offset;
12069 m->fs.sp_offset = param_ptr_offset;
12070 m->fs.realigned = false;
12072 add_reg_note (insn, REG_CFA_DEF_CFA,
12073 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12074 GEN_INT (param_ptr_offset)));
12075 RTX_FRAME_RELATED_P (insn) = 1;
12077 if (!call_used_regs[REGNO (crtl->drap_reg)])
12078 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
12081 /* At this point the stack pointer must be valid, and we must have
12082 restored all of the registers. We may not have deallocated the
12083 entire stack frame. We've delayed this until now because it may
12084 be possible to merge the local stack deallocation with the
12085 deallocation forced by ix86_static_chain_on_stack. */
12086 gcc_assert (m->fs.sp_valid);
12087 gcc_assert (!m->fs.fp_valid);
12088 gcc_assert (!m->fs.realigned);
12089 if (m->fs.sp_offset != UNITS_PER_WORD)
12091 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12092 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
12093 style, true);
12095 else
12096 ix86_add_queued_cfa_restore_notes (get_last_insn ());
12098 /* Sibcall epilogues don't want a return instruction. */
12099 if (style == 0)
12101 m->fs = frame_state_save;
12102 return;
12105 if (crtl->args.pops_args && crtl->args.size)
12107 rtx popc = GEN_INT (crtl->args.pops_args);
12109 /* i386 can only pop 64K bytes. If asked to pop more, pop return
12110 address, do explicit add, and jump indirectly to the caller. */
12112 if (crtl->args.pops_args >= 65536)
12114 rtx ecx = gen_rtx_REG (SImode, CX_REG);
12115 rtx insn;
12117 /* There is no "pascal" calling convention in any 64bit ABI. */
12118 gcc_assert (!TARGET_64BIT);
12120 insn = emit_insn (gen_pop (ecx));
12121 m->fs.cfa_offset -= UNITS_PER_WORD;
12122 m->fs.sp_offset -= UNITS_PER_WORD;
12124 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12125 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
12126 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12127 add_reg_note (insn, REG_CFA_REGISTER,
12128 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
12129 RTX_FRAME_RELATED_P (insn) = 1;
12131 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12132 popc, -1, true);
12133 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
12135 else
12136 emit_jump_insn (gen_simple_return_pop_internal (popc));
12138 else
12139 emit_jump_insn (gen_simple_return_internal ());
12141 /* Restore the state back to the state from the prologue,
12142 so that it's correct for the next epilogue. */
12143 m->fs = frame_state_save;
12146 /* Reset from the function's potential modifications. */
12148 static void
12149 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
12151 if (pic_offset_table_rtx
12152 && !ix86_use_pseudo_pic_reg ())
12153 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
12154 #if TARGET_MACHO
12155 /* Mach-O doesn't support labels at the end of objects, so if
12156 it looks like we might want one, insert a NOP. */
12158 rtx_insn *insn = get_last_insn ();
12159 rtx_insn *deleted_debug_label = NULL;
12160 while (insn
12161 && NOTE_P (insn)
12162 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
12164 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
12165 notes only, instead set their CODE_LABEL_NUMBER to -1,
12166 otherwise there would be code generation differences
12167 in between -g and -g0. */
12168 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12169 deleted_debug_label = insn;
12170 insn = PREV_INSN (insn);
12172 if (insn
12173 && (LABEL_P (insn)
12174 || (NOTE_P (insn)
12175 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
12176 fputs ("\tnop\n", file);
12177 else if (deleted_debug_label)
12178 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
12179 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12180 CODE_LABEL_NUMBER (insn) = -1;
12182 #endif
12186 /* Return a scratch register to use in the split stack prologue. The
12187 split stack prologue is used for -fsplit-stack. It is the first
12188 instructions in the function, even before the regular prologue.
12189 The scratch register can be any caller-saved register which is not
12190 used for parameters or for the static chain. */
12192 static unsigned int
12193 split_stack_prologue_scratch_regno (void)
12195 if (TARGET_64BIT)
12196 return R11_REG;
12197 else
12199 bool is_fastcall, is_thiscall;
12200 int regparm;
12202 is_fastcall = (lookup_attribute ("fastcall",
12203 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12204 != NULL);
12205 is_thiscall = (lookup_attribute ("thiscall",
12206 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12207 != NULL);
12208 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12210 if (is_fastcall)
12212 if (DECL_STATIC_CHAIN (cfun->decl))
12214 sorry ("-fsplit-stack does not support fastcall with "
12215 "nested function");
12216 return INVALID_REGNUM;
12218 return AX_REG;
12220 else if (is_thiscall)
12222 if (!DECL_STATIC_CHAIN (cfun->decl))
12223 return DX_REG;
12224 return AX_REG;
12226 else if (regparm < 3)
12228 if (!DECL_STATIC_CHAIN (cfun->decl))
12229 return CX_REG;
12230 else
12232 if (regparm >= 2)
12234 sorry ("-fsplit-stack does not support 2 register "
12235 "parameters for a nested function");
12236 return INVALID_REGNUM;
12238 return DX_REG;
12241 else
12243 /* FIXME: We could make this work by pushing a register
12244 around the addition and comparison. */
12245 sorry ("-fsplit-stack does not support 3 register parameters");
12246 return INVALID_REGNUM;
12251 /* A SYMBOL_REF for the function which allocates new stackspace for
12252 -fsplit-stack. */
12254 static GTY(()) rtx split_stack_fn;
12256 /* A SYMBOL_REF for the more stack function when using the large
12257 model. */
12259 static GTY(()) rtx split_stack_fn_large;
12261 /* Handle -fsplit-stack. These are the first instructions in the
12262 function, even before the regular prologue. */
12264 void
12265 ix86_expand_split_stack_prologue (void)
12267 struct ix86_frame frame;
12268 HOST_WIDE_INT allocate;
12269 unsigned HOST_WIDE_INT args_size;
12270 rtx_code_label *label;
12271 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12272 rtx scratch_reg = NULL_RTX;
12273 rtx_code_label *varargs_label = NULL;
12274 rtx fn;
12276 gcc_assert (flag_split_stack && reload_completed);
12278 ix86_finalize_stack_realign_flags ();
12279 ix86_compute_frame_layout (&frame);
12280 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12282 /* This is the label we will branch to if we have enough stack
12283 space. We expect the basic block reordering pass to reverse this
12284 branch if optimizing, so that we branch in the unlikely case. */
12285 label = gen_label_rtx ();
12287 /* We need to compare the stack pointer minus the frame size with
12288 the stack boundary in the TCB. The stack boundary always gives
12289 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12290 can compare directly. Otherwise we need to do an addition. */
12292 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12293 UNSPEC_STACK_CHECK);
12294 limit = gen_rtx_CONST (Pmode, limit);
12295 limit = gen_rtx_MEM (Pmode, limit);
12296 if (allocate < SPLIT_STACK_AVAILABLE)
12297 current = stack_pointer_rtx;
12298 else
12300 unsigned int scratch_regno;
12301 rtx offset;
12303 /* We need a scratch register to hold the stack pointer minus
12304 the required frame size. Since this is the very start of the
12305 function, the scratch register can be any caller-saved
12306 register which is not used for parameters. */
12307 offset = GEN_INT (- allocate);
12308 scratch_regno = split_stack_prologue_scratch_regno ();
12309 if (scratch_regno == INVALID_REGNUM)
12310 return;
12311 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12312 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12314 /* We don't use ix86_gen_add3 in this case because it will
12315 want to split to lea, but when not optimizing the insn
12316 will not be split after this point. */
12317 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12318 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12319 offset)));
12321 else
12323 emit_move_insn (scratch_reg, offset);
12324 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12325 stack_pointer_rtx));
12327 current = scratch_reg;
12330 ix86_expand_branch (GEU, current, limit, label);
12331 jump_insn = get_last_insn ();
12332 JUMP_LABEL (jump_insn) = label;
12334 /* Mark the jump as very likely to be taken. */
12335 add_int_reg_note (jump_insn, REG_BR_PROB,
12336 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12338 if (split_stack_fn == NULL_RTX)
12340 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12341 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12343 fn = split_stack_fn;
12345 /* Get more stack space. We pass in the desired stack space and the
12346 size of the arguments to copy to the new stack. In 32-bit mode
12347 we push the parameters; __morestack will return on a new stack
12348 anyhow. In 64-bit mode we pass the parameters in r10 and
12349 r11. */
12350 allocate_rtx = GEN_INT (allocate);
12351 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12352 call_fusage = NULL_RTX;
12353 if (TARGET_64BIT)
12355 rtx reg10, reg11;
12357 reg10 = gen_rtx_REG (Pmode, R10_REG);
12358 reg11 = gen_rtx_REG (Pmode, R11_REG);
12360 /* If this function uses a static chain, it will be in %r10.
12361 Preserve it across the call to __morestack. */
12362 if (DECL_STATIC_CHAIN (cfun->decl))
12364 rtx rax;
12366 rax = gen_rtx_REG (word_mode, AX_REG);
12367 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12368 use_reg (&call_fusage, rax);
12371 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12372 && !TARGET_PECOFF)
12374 HOST_WIDE_INT argval;
12376 gcc_assert (Pmode == DImode);
12377 /* When using the large model we need to load the address
12378 into a register, and we've run out of registers. So we
12379 switch to a different calling convention, and we call a
12380 different function: __morestack_large. We pass the
12381 argument size in the upper 32 bits of r10 and pass the
12382 frame size in the lower 32 bits. */
12383 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12384 gcc_assert ((args_size & 0xffffffff) == args_size);
12386 if (split_stack_fn_large == NULL_RTX)
12388 split_stack_fn_large =
12389 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12390 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12392 if (ix86_cmodel == CM_LARGE_PIC)
12394 rtx_code_label *label;
12395 rtx x;
12397 label = gen_label_rtx ();
12398 emit_label (label);
12399 LABEL_PRESERVE_P (label) = 1;
12400 emit_insn (gen_set_rip_rex64 (reg10, label));
12401 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12402 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12403 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12404 UNSPEC_GOT);
12405 x = gen_rtx_CONST (Pmode, x);
12406 emit_move_insn (reg11, x);
12407 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12408 x = gen_const_mem (Pmode, x);
12409 emit_move_insn (reg11, x);
12411 else
12412 emit_move_insn (reg11, split_stack_fn_large);
12414 fn = reg11;
12416 argval = ((args_size << 16) << 16) + allocate;
12417 emit_move_insn (reg10, GEN_INT (argval));
12419 else
12421 emit_move_insn (reg10, allocate_rtx);
12422 emit_move_insn (reg11, GEN_INT (args_size));
12423 use_reg (&call_fusage, reg11);
12426 use_reg (&call_fusage, reg10);
12428 else
12430 emit_insn (gen_push (GEN_INT (args_size)));
12431 emit_insn (gen_push (allocate_rtx));
12433 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12434 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12435 NULL_RTX, false);
12436 add_function_usage_to (call_insn, call_fusage);
12438 /* In order to make call/return prediction work right, we now need
12439 to execute a return instruction. See
12440 libgcc/config/i386/morestack.S for the details on how this works.
12442 For flow purposes gcc must not see this as a return
12443 instruction--we need control flow to continue at the subsequent
12444 label. Therefore, we use an unspec. */
12445 gcc_assert (crtl->args.pops_args < 65536);
12446 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12448 /* If we are in 64-bit mode and this function uses a static chain,
12449 we saved %r10 in %rax before calling _morestack. */
12450 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12451 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12452 gen_rtx_REG (word_mode, AX_REG));
12454 /* If this function calls va_start, we need to store a pointer to
12455 the arguments on the old stack, because they may not have been
12456 all copied to the new stack. At this point the old stack can be
12457 found at the frame pointer value used by __morestack, because
12458 __morestack has set that up before calling back to us. Here we
12459 store that pointer in a scratch register, and in
12460 ix86_expand_prologue we store the scratch register in a stack
12461 slot. */
12462 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12464 unsigned int scratch_regno;
12465 rtx frame_reg;
12466 int words;
12468 scratch_regno = split_stack_prologue_scratch_regno ();
12469 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12470 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12472 /* 64-bit:
12473 fp -> old fp value
12474 return address within this function
12475 return address of caller of this function
12476 stack arguments
12477 So we add three words to get to the stack arguments.
12479 32-bit:
12480 fp -> old fp value
12481 return address within this function
12482 first argument to __morestack
12483 second argument to __morestack
12484 return address of caller of this function
12485 stack arguments
12486 So we add five words to get to the stack arguments.
12488 words = TARGET_64BIT ? 3 : 5;
12489 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12490 gen_rtx_PLUS (Pmode, frame_reg,
12491 GEN_INT (words * UNITS_PER_WORD))));
12493 varargs_label = gen_label_rtx ();
12494 emit_jump_insn (gen_jump (varargs_label));
12495 JUMP_LABEL (get_last_insn ()) = varargs_label;
12497 emit_barrier ();
12500 emit_label (label);
12501 LABEL_NUSES (label) = 1;
12503 /* If this function calls va_start, we now have to set the scratch
12504 register for the case where we do not call __morestack. In this
12505 case we need to set it based on the stack pointer. */
12506 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12508 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12509 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12510 GEN_INT (UNITS_PER_WORD))));
12512 emit_label (varargs_label);
12513 LABEL_NUSES (varargs_label) = 1;
12517 /* We may have to tell the dataflow pass that the split stack prologue
12518 is initializing a scratch register. */
12520 static void
12521 ix86_live_on_entry (bitmap regs)
12523 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12525 gcc_assert (flag_split_stack);
12526 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12530 /* Extract the parts of an RTL expression that is a valid memory address
12531 for an instruction. Return 0 if the structure of the address is
12532 grossly off. Return -1 if the address contains ASHIFT, so it is not
12533 strictly valid, but still used for computing length of lea instruction. */
12536 ix86_decompose_address (rtx addr, struct ix86_address *out)
12538 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12539 rtx base_reg, index_reg;
12540 HOST_WIDE_INT scale = 1;
12541 rtx scale_rtx = NULL_RTX;
12542 rtx tmp;
12543 int retval = 1;
12544 enum ix86_address_seg seg = SEG_DEFAULT;
12546 /* Allow zero-extended SImode addresses,
12547 they will be emitted with addr32 prefix. */
12548 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12550 if (GET_CODE (addr) == ZERO_EXTEND
12551 && GET_MODE (XEXP (addr, 0)) == SImode)
12553 addr = XEXP (addr, 0);
12554 if (CONST_INT_P (addr))
12555 return 0;
12557 else if (GET_CODE (addr) == AND
12558 && const_32bit_mask (XEXP (addr, 1), DImode))
12560 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12561 if (addr == NULL_RTX)
12562 return 0;
12564 if (CONST_INT_P (addr))
12565 return 0;
12569 /* Allow SImode subregs of DImode addresses,
12570 they will be emitted with addr32 prefix. */
12571 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12573 if (GET_CODE (addr) == SUBREG
12574 && GET_MODE (SUBREG_REG (addr)) == DImode)
12576 addr = SUBREG_REG (addr);
12577 if (CONST_INT_P (addr))
12578 return 0;
12582 if (REG_P (addr))
12583 base = addr;
12584 else if (GET_CODE (addr) == SUBREG)
12586 if (REG_P (SUBREG_REG (addr)))
12587 base = addr;
12588 else
12589 return 0;
12591 else if (GET_CODE (addr) == PLUS)
12593 rtx addends[4], op;
12594 int n = 0, i;
12596 op = addr;
12599 if (n >= 4)
12600 return 0;
12601 addends[n++] = XEXP (op, 1);
12602 op = XEXP (op, 0);
12604 while (GET_CODE (op) == PLUS);
12605 if (n >= 4)
12606 return 0;
12607 addends[n] = op;
12609 for (i = n; i >= 0; --i)
12611 op = addends[i];
12612 switch (GET_CODE (op))
12614 case MULT:
12615 if (index)
12616 return 0;
12617 index = XEXP (op, 0);
12618 scale_rtx = XEXP (op, 1);
12619 break;
12621 case ASHIFT:
12622 if (index)
12623 return 0;
12624 index = XEXP (op, 0);
12625 tmp = XEXP (op, 1);
12626 if (!CONST_INT_P (tmp))
12627 return 0;
12628 scale = INTVAL (tmp);
12629 if ((unsigned HOST_WIDE_INT) scale > 3)
12630 return 0;
12631 scale = 1 << scale;
12632 break;
12634 case ZERO_EXTEND:
12635 op = XEXP (op, 0);
12636 if (GET_CODE (op) != UNSPEC)
12637 return 0;
12638 /* FALLTHRU */
12640 case UNSPEC:
12641 if (XINT (op, 1) == UNSPEC_TP
12642 && TARGET_TLS_DIRECT_SEG_REFS
12643 && seg == SEG_DEFAULT)
12644 seg = DEFAULT_TLS_SEG_REG;
12645 else
12646 return 0;
12647 break;
12649 case SUBREG:
12650 if (!REG_P (SUBREG_REG (op)))
12651 return 0;
12652 /* FALLTHRU */
12654 case REG:
12655 if (!base)
12656 base = op;
12657 else if (!index)
12658 index = op;
12659 else
12660 return 0;
12661 break;
12663 case CONST:
12664 case CONST_INT:
12665 case SYMBOL_REF:
12666 case LABEL_REF:
12667 if (disp)
12668 return 0;
12669 disp = op;
12670 break;
12672 default:
12673 return 0;
12677 else if (GET_CODE (addr) == MULT)
12679 index = XEXP (addr, 0); /* index*scale */
12680 scale_rtx = XEXP (addr, 1);
12682 else if (GET_CODE (addr) == ASHIFT)
12684 /* We're called for lea too, which implements ashift on occasion. */
12685 index = XEXP (addr, 0);
12686 tmp = XEXP (addr, 1);
12687 if (!CONST_INT_P (tmp))
12688 return 0;
12689 scale = INTVAL (tmp);
12690 if ((unsigned HOST_WIDE_INT) scale > 3)
12691 return 0;
12692 scale = 1 << scale;
12693 retval = -1;
12695 else
12696 disp = addr; /* displacement */
12698 if (index)
12700 if (REG_P (index))
12702 else if (GET_CODE (index) == SUBREG
12703 && REG_P (SUBREG_REG (index)))
12705 else
12706 return 0;
12709 /* Extract the integral value of scale. */
12710 if (scale_rtx)
12712 if (!CONST_INT_P (scale_rtx))
12713 return 0;
12714 scale = INTVAL (scale_rtx);
12717 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12718 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12720 /* Avoid useless 0 displacement. */
12721 if (disp == const0_rtx && (base || index))
12722 disp = NULL_RTX;
12724 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12725 if (base_reg && index_reg && scale == 1
12726 && (index_reg == arg_pointer_rtx
12727 || index_reg == frame_pointer_rtx
12728 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12730 std::swap (base, index);
12731 std::swap (base_reg, index_reg);
12734 /* Special case: %ebp cannot be encoded as a base without a displacement.
12735 Similarly %r13. */
12736 if (!disp
12737 && base_reg
12738 && (base_reg == hard_frame_pointer_rtx
12739 || base_reg == frame_pointer_rtx
12740 || base_reg == arg_pointer_rtx
12741 || (REG_P (base_reg)
12742 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12743 || REGNO (base_reg) == R13_REG))))
12744 disp = const0_rtx;
12746 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12747 Avoid this by transforming to [%esi+0].
12748 Reload calls address legitimization without cfun defined, so we need
12749 to test cfun for being non-NULL. */
12750 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12751 && base_reg && !index_reg && !disp
12752 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12753 disp = const0_rtx;
12755 /* Special case: encode reg+reg instead of reg*2. */
12756 if (!base && index && scale == 2)
12757 base = index, base_reg = index_reg, scale = 1;
12759 /* Special case: scaling cannot be encoded without base or displacement. */
12760 if (!base && !disp && index && scale != 1)
12761 disp = const0_rtx;
12763 out->base = base;
12764 out->index = index;
12765 out->disp = disp;
12766 out->scale = scale;
12767 out->seg = seg;
12769 return retval;
12772 /* Return cost of the memory address x.
12773 For i386, it is better to use a complex address than let gcc copy
12774 the address into a reg and make a new pseudo. But not if the address
12775 requires to two regs - that would mean more pseudos with longer
12776 lifetimes. */
12777 static int
12778 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
12780 struct ix86_address parts;
12781 int cost = 1;
12782 int ok = ix86_decompose_address (x, &parts);
12784 gcc_assert (ok);
12786 if (parts.base && GET_CODE (parts.base) == SUBREG)
12787 parts.base = SUBREG_REG (parts.base);
12788 if (parts.index && GET_CODE (parts.index) == SUBREG)
12789 parts.index = SUBREG_REG (parts.index);
12791 /* Attempt to minimize number of registers in the address. */
12792 if ((parts.base
12793 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12794 || (parts.index
12795 && (!REG_P (parts.index)
12796 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12797 cost++;
12799 /* When address base or index is "pic_offset_table_rtx" we don't increase
12800 address cost. When a memopt with "pic_offset_table_rtx" is not invariant
12801 itself it most likely means that base or index is not invariant.
12802 Therefore only "pic_offset_table_rtx" could be hoisted out, which is not
12803 profitable for x86. */
12804 if (parts.base
12805 && (!pic_offset_table_rtx
12806 || REGNO (pic_offset_table_rtx) != REGNO(parts.base))
12807 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12808 && parts.index
12809 && (!pic_offset_table_rtx
12810 || REGNO (pic_offset_table_rtx) != REGNO(parts.index))
12811 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12812 && parts.base != parts.index)
12813 cost++;
12815 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12816 since it's predecode logic can't detect the length of instructions
12817 and it degenerates to vector decoded. Increase cost of such
12818 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12819 to split such addresses or even refuse such addresses at all.
12821 Following addressing modes are affected:
12822 [base+scale*index]
12823 [scale*index+disp]
12824 [base+index]
12826 The first and last case may be avoidable by explicitly coding the zero in
12827 memory address, but I don't have AMD-K6 machine handy to check this
12828 theory. */
12830 if (TARGET_K6
12831 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12832 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12833 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12834 cost += 10;
12836 return cost;
12839 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12840 this is used for to form addresses to local data when -fPIC is in
12841 use. */
12843 static bool
12844 darwin_local_data_pic (rtx disp)
12846 return (GET_CODE (disp) == UNSPEC
12847 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12850 /* Determine if a given RTX is a valid constant. We already know this
12851 satisfies CONSTANT_P. */
12853 static bool
12854 ix86_legitimate_constant_p (machine_mode, rtx x)
12856 /* Pointer bounds constants are not valid. */
12857 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
12858 return false;
12860 switch (GET_CODE (x))
12862 case CONST:
12863 x = XEXP (x, 0);
12865 if (GET_CODE (x) == PLUS)
12867 if (!CONST_INT_P (XEXP (x, 1)))
12868 return false;
12869 x = XEXP (x, 0);
12872 if (TARGET_MACHO && darwin_local_data_pic (x))
12873 return true;
12875 /* Only some unspecs are valid as "constants". */
12876 if (GET_CODE (x) == UNSPEC)
12877 switch (XINT (x, 1))
12879 case UNSPEC_GOT:
12880 case UNSPEC_GOTOFF:
12881 case UNSPEC_PLTOFF:
12882 return TARGET_64BIT;
12883 case UNSPEC_TPOFF:
12884 case UNSPEC_NTPOFF:
12885 x = XVECEXP (x, 0, 0);
12886 return (GET_CODE (x) == SYMBOL_REF
12887 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12888 case UNSPEC_DTPOFF:
12889 x = XVECEXP (x, 0, 0);
12890 return (GET_CODE (x) == SYMBOL_REF
12891 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12892 default:
12893 return false;
12896 /* We must have drilled down to a symbol. */
12897 if (GET_CODE (x) == LABEL_REF)
12898 return true;
12899 if (GET_CODE (x) != SYMBOL_REF)
12900 return false;
12901 /* FALLTHRU */
12903 case SYMBOL_REF:
12904 /* TLS symbols are never valid. */
12905 if (SYMBOL_REF_TLS_MODEL (x))
12906 return false;
12908 /* DLLIMPORT symbols are never valid. */
12909 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12910 && SYMBOL_REF_DLLIMPORT_P (x))
12911 return false;
12913 #if TARGET_MACHO
12914 /* mdynamic-no-pic */
12915 if (MACHO_DYNAMIC_NO_PIC_P)
12916 return machopic_symbol_defined_p (x);
12917 #endif
12918 break;
12920 case CONST_DOUBLE:
12921 if (GET_MODE (x) == TImode
12922 && x != CONST0_RTX (TImode)
12923 && !TARGET_64BIT)
12924 return false;
12925 break;
12927 case CONST_VECTOR:
12928 if (!standard_sse_constant_p (x))
12929 return false;
12931 default:
12932 break;
12935 /* Otherwise we handle everything else in the move patterns. */
12936 return true;
12939 /* Determine if it's legal to put X into the constant pool. This
12940 is not possible for the address of thread-local symbols, which
12941 is checked above. */
12943 static bool
12944 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
12946 /* We can always put integral constants and vectors in memory. */
12947 switch (GET_CODE (x))
12949 case CONST_INT:
12950 case CONST_DOUBLE:
12951 case CONST_VECTOR:
12952 return false;
12954 default:
12955 break;
12957 return !ix86_legitimate_constant_p (mode, x);
12960 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12961 otherwise zero. */
12963 static bool
12964 is_imported_p (rtx x)
12966 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12967 || GET_CODE (x) != SYMBOL_REF)
12968 return false;
12970 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12974 /* Nonzero if the constant value X is a legitimate general operand
12975 when generating PIC code. It is given that flag_pic is on and
12976 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12978 bool
12979 legitimate_pic_operand_p (rtx x)
12981 rtx inner;
12983 switch (GET_CODE (x))
12985 case CONST:
12986 inner = XEXP (x, 0);
12987 if (GET_CODE (inner) == PLUS
12988 && CONST_INT_P (XEXP (inner, 1)))
12989 inner = XEXP (inner, 0);
12991 /* Only some unspecs are valid as "constants". */
12992 if (GET_CODE (inner) == UNSPEC)
12993 switch (XINT (inner, 1))
12995 case UNSPEC_GOT:
12996 case UNSPEC_GOTOFF:
12997 case UNSPEC_PLTOFF:
12998 return TARGET_64BIT;
12999 case UNSPEC_TPOFF:
13000 x = XVECEXP (inner, 0, 0);
13001 return (GET_CODE (x) == SYMBOL_REF
13002 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13003 case UNSPEC_MACHOPIC_OFFSET:
13004 return legitimate_pic_address_disp_p (x);
13005 default:
13006 return false;
13008 /* FALLTHRU */
13010 case SYMBOL_REF:
13011 case LABEL_REF:
13012 return legitimate_pic_address_disp_p (x);
13014 default:
13015 return true;
13019 /* Determine if a given CONST RTX is a valid memory displacement
13020 in PIC mode. */
13022 bool
13023 legitimate_pic_address_disp_p (rtx disp)
13025 bool saw_plus;
13027 /* In 64bit mode we can allow direct addresses of symbols and labels
13028 when they are not dynamic symbols. */
13029 if (TARGET_64BIT)
13031 rtx op0 = disp, op1;
13033 switch (GET_CODE (disp))
13035 case LABEL_REF:
13036 return true;
13038 case CONST:
13039 if (GET_CODE (XEXP (disp, 0)) != PLUS)
13040 break;
13041 op0 = XEXP (XEXP (disp, 0), 0);
13042 op1 = XEXP (XEXP (disp, 0), 1);
13043 if (!CONST_INT_P (op1)
13044 || INTVAL (op1) >= 16*1024*1024
13045 || INTVAL (op1) < -16*1024*1024)
13046 break;
13047 if (GET_CODE (op0) == LABEL_REF)
13048 return true;
13049 if (GET_CODE (op0) == CONST
13050 && GET_CODE (XEXP (op0, 0)) == UNSPEC
13051 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
13052 return true;
13053 if (GET_CODE (op0) == UNSPEC
13054 && XINT (op0, 1) == UNSPEC_PCREL)
13055 return true;
13056 if (GET_CODE (op0) != SYMBOL_REF)
13057 break;
13058 /* FALLTHRU */
13060 case SYMBOL_REF:
13061 /* TLS references should always be enclosed in UNSPEC.
13062 The dllimported symbol needs always to be resolved. */
13063 if (SYMBOL_REF_TLS_MODEL (op0)
13064 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
13065 return false;
13067 if (TARGET_PECOFF)
13069 if (is_imported_p (op0))
13070 return true;
13072 if (SYMBOL_REF_FAR_ADDR_P (op0)
13073 || !SYMBOL_REF_LOCAL_P (op0))
13074 break;
13076 /* Function-symbols need to be resolved only for
13077 large-model.
13078 For the small-model we don't need to resolve anything
13079 here. */
13080 if ((ix86_cmodel != CM_LARGE_PIC
13081 && SYMBOL_REF_FUNCTION_P (op0))
13082 || ix86_cmodel == CM_SMALL_PIC)
13083 return true;
13084 /* Non-external symbols don't need to be resolved for
13085 large, and medium-model. */
13086 if ((ix86_cmodel == CM_LARGE_PIC
13087 || ix86_cmodel == CM_MEDIUM_PIC)
13088 && !SYMBOL_REF_EXTERNAL_P (op0))
13089 return true;
13091 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
13092 && SYMBOL_REF_LOCAL_P (op0)
13093 && ix86_cmodel != CM_LARGE_PIC)
13094 return true;
13095 break;
13097 default:
13098 break;
13101 if (GET_CODE (disp) != CONST)
13102 return false;
13103 disp = XEXP (disp, 0);
13105 if (TARGET_64BIT)
13107 /* We are unsafe to allow PLUS expressions. This limit allowed distance
13108 of GOT tables. We should not need these anyway. */
13109 if (GET_CODE (disp) != UNSPEC
13110 || (XINT (disp, 1) != UNSPEC_GOTPCREL
13111 && XINT (disp, 1) != UNSPEC_GOTOFF
13112 && XINT (disp, 1) != UNSPEC_PCREL
13113 && XINT (disp, 1) != UNSPEC_PLTOFF))
13114 return false;
13116 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
13117 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
13118 return false;
13119 return true;
13122 saw_plus = false;
13123 if (GET_CODE (disp) == PLUS)
13125 if (!CONST_INT_P (XEXP (disp, 1)))
13126 return false;
13127 disp = XEXP (disp, 0);
13128 saw_plus = true;
13131 if (TARGET_MACHO && darwin_local_data_pic (disp))
13132 return true;
13134 if (GET_CODE (disp) != UNSPEC)
13135 return false;
13137 switch (XINT (disp, 1))
13139 case UNSPEC_GOT:
13140 if (saw_plus)
13141 return false;
13142 /* We need to check for both symbols and labels because VxWorks loads
13143 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
13144 details. */
13145 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13146 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
13147 case UNSPEC_GOTOFF:
13148 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
13149 While ABI specify also 32bit relocation but we don't produce it in
13150 small PIC model at all. */
13151 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13152 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
13153 && !TARGET_64BIT)
13154 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
13155 return false;
13156 case UNSPEC_GOTTPOFF:
13157 case UNSPEC_GOTNTPOFF:
13158 case UNSPEC_INDNTPOFF:
13159 if (saw_plus)
13160 return false;
13161 disp = XVECEXP (disp, 0, 0);
13162 return (GET_CODE (disp) == SYMBOL_REF
13163 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
13164 case UNSPEC_NTPOFF:
13165 disp = XVECEXP (disp, 0, 0);
13166 return (GET_CODE (disp) == SYMBOL_REF
13167 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
13168 case UNSPEC_DTPOFF:
13169 disp = XVECEXP (disp, 0, 0);
13170 return (GET_CODE (disp) == SYMBOL_REF
13171 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
13174 return false;
13177 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
13178 replace the input X, or the original X if no replacement is called for.
13179 The output parameter *WIN is 1 if the calling macro should goto WIN,
13180 0 if it should not. */
13182 bool
13183 ix86_legitimize_reload_address (rtx x, machine_mode, int opnum, int type,
13184 int)
13186 /* Reload can generate:
13188 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
13189 (reg:DI 97))
13190 (reg:DI 2 cx))
13192 This RTX is rejected from ix86_legitimate_address_p due to
13193 non-strictness of base register 97. Following this rejection,
13194 reload pushes all three components into separate registers,
13195 creating invalid memory address RTX.
13197 Following code reloads only the invalid part of the
13198 memory address RTX. */
13200 if (GET_CODE (x) == PLUS
13201 && REG_P (XEXP (x, 1))
13202 && GET_CODE (XEXP (x, 0)) == PLUS
13203 && REG_P (XEXP (XEXP (x, 0), 1)))
13205 rtx base, index;
13206 bool something_reloaded = false;
13208 base = XEXP (XEXP (x, 0), 1);
13209 if (!REG_OK_FOR_BASE_STRICT_P (base))
13211 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
13212 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13213 opnum, (enum reload_type) type);
13214 something_reloaded = true;
13217 index = XEXP (x, 1);
13218 if (!REG_OK_FOR_INDEX_STRICT_P (index))
13220 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
13221 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13222 opnum, (enum reload_type) type);
13223 something_reloaded = true;
13226 gcc_assert (something_reloaded);
13227 return true;
13230 return false;
13233 /* Determine if op is suitable RTX for an address register.
13234 Return naked register if a register or a register subreg is
13235 found, otherwise return NULL_RTX. */
13237 static rtx
13238 ix86_validate_address_register (rtx op)
13240 machine_mode mode = GET_MODE (op);
13242 /* Only SImode or DImode registers can form the address. */
13243 if (mode != SImode && mode != DImode)
13244 return NULL_RTX;
13246 if (REG_P (op))
13247 return op;
13248 else if (GET_CODE (op) == SUBREG)
13250 rtx reg = SUBREG_REG (op);
13252 if (!REG_P (reg))
13253 return NULL_RTX;
13255 mode = GET_MODE (reg);
13257 /* Don't allow SUBREGs that span more than a word. It can
13258 lead to spill failures when the register is one word out
13259 of a two word structure. */
13260 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13261 return NULL_RTX;
13263 /* Allow only SUBREGs of non-eliminable hard registers. */
13264 if (register_no_elim_operand (reg, mode))
13265 return reg;
13268 /* Op is not a register. */
13269 return NULL_RTX;
13272 /* Recognizes RTL expressions that are valid memory addresses for an
13273 instruction. The MODE argument is the machine mode for the MEM
13274 expression that wants to use this address.
13276 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
13277 convert common non-canonical forms to canonical form so that they will
13278 be recognized. */
13280 static bool
13281 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
13283 struct ix86_address parts;
13284 rtx base, index, disp;
13285 HOST_WIDE_INT scale;
13286 enum ix86_address_seg seg;
13288 if (ix86_decompose_address (addr, &parts) <= 0)
13289 /* Decomposition failed. */
13290 return false;
13292 base = parts.base;
13293 index = parts.index;
13294 disp = parts.disp;
13295 scale = parts.scale;
13296 seg = parts.seg;
13298 /* Validate base register. */
13299 if (base)
13301 rtx reg = ix86_validate_address_register (base);
13303 if (reg == NULL_RTX)
13304 return false;
13306 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13307 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13308 /* Base is not valid. */
13309 return false;
13312 /* Validate index register. */
13313 if (index)
13315 rtx reg = ix86_validate_address_register (index);
13317 if (reg == NULL_RTX)
13318 return false;
13320 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13321 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13322 /* Index is not valid. */
13323 return false;
13326 /* Index and base should have the same mode. */
13327 if (base && index
13328 && GET_MODE (base) != GET_MODE (index))
13329 return false;
13331 /* Address override works only on the (%reg) part of %fs:(%reg). */
13332 if (seg != SEG_DEFAULT
13333 && ((base && GET_MODE (base) != word_mode)
13334 || (index && GET_MODE (index) != word_mode)))
13335 return false;
13337 /* Validate scale factor. */
13338 if (scale != 1)
13340 if (!index)
13341 /* Scale without index. */
13342 return false;
13344 if (scale != 2 && scale != 4 && scale != 8)
13345 /* Scale is not a valid multiplier. */
13346 return false;
13349 /* Validate displacement. */
13350 if (disp)
13352 if (GET_CODE (disp) == CONST
13353 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13354 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13355 switch (XINT (XEXP (disp, 0), 1))
13357 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13358 used. While ABI specify also 32bit relocations, we don't produce
13359 them at all and use IP relative instead. */
13360 case UNSPEC_GOT:
13361 case UNSPEC_GOTOFF:
13362 gcc_assert (flag_pic);
13363 if (!TARGET_64BIT)
13364 goto is_legitimate_pic;
13366 /* 64bit address unspec. */
13367 return false;
13369 case UNSPEC_GOTPCREL:
13370 case UNSPEC_PCREL:
13371 gcc_assert (flag_pic);
13372 goto is_legitimate_pic;
13374 case UNSPEC_GOTTPOFF:
13375 case UNSPEC_GOTNTPOFF:
13376 case UNSPEC_INDNTPOFF:
13377 case UNSPEC_NTPOFF:
13378 case UNSPEC_DTPOFF:
13379 break;
13381 case UNSPEC_STACK_CHECK:
13382 gcc_assert (flag_split_stack);
13383 break;
13385 default:
13386 /* Invalid address unspec. */
13387 return false;
13390 else if (SYMBOLIC_CONST (disp)
13391 && (flag_pic
13392 || (TARGET_MACHO
13393 #if TARGET_MACHO
13394 && MACHOPIC_INDIRECT
13395 && !machopic_operand_p (disp)
13396 #endif
13400 is_legitimate_pic:
13401 if (TARGET_64BIT && (index || base))
13403 /* foo@dtpoff(%rX) is ok. */
13404 if (GET_CODE (disp) != CONST
13405 || GET_CODE (XEXP (disp, 0)) != PLUS
13406 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13407 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13408 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13409 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13410 /* Non-constant pic memory reference. */
13411 return false;
13413 else if ((!TARGET_MACHO || flag_pic)
13414 && ! legitimate_pic_address_disp_p (disp))
13415 /* Displacement is an invalid pic construct. */
13416 return false;
13417 #if TARGET_MACHO
13418 else if (MACHO_DYNAMIC_NO_PIC_P
13419 && !ix86_legitimate_constant_p (Pmode, disp))
13420 /* displacment must be referenced via non_lazy_pointer */
13421 return false;
13422 #endif
13424 /* This code used to verify that a symbolic pic displacement
13425 includes the pic_offset_table_rtx register.
13427 While this is good idea, unfortunately these constructs may
13428 be created by "adds using lea" optimization for incorrect
13429 code like:
13431 int a;
13432 int foo(int i)
13434 return *(&a+i);
13437 This code is nonsensical, but results in addressing
13438 GOT table with pic_offset_table_rtx base. We can't
13439 just refuse it easily, since it gets matched by
13440 "addsi3" pattern, that later gets split to lea in the
13441 case output register differs from input. While this
13442 can be handled by separate addsi pattern for this case
13443 that never results in lea, this seems to be easier and
13444 correct fix for crash to disable this test. */
13446 else if (GET_CODE (disp) != LABEL_REF
13447 && !CONST_INT_P (disp)
13448 && (GET_CODE (disp) != CONST
13449 || !ix86_legitimate_constant_p (Pmode, disp))
13450 && (GET_CODE (disp) != SYMBOL_REF
13451 || !ix86_legitimate_constant_p (Pmode, disp)))
13452 /* Displacement is not constant. */
13453 return false;
13454 else if (TARGET_64BIT
13455 && !x86_64_immediate_operand (disp, VOIDmode))
13456 /* Displacement is out of range. */
13457 return false;
13458 /* In x32 mode, constant addresses are sign extended to 64bit, so
13459 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13460 else if (TARGET_X32 && !(index || base)
13461 && CONST_INT_P (disp)
13462 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13463 return false;
13466 /* Everything looks valid. */
13467 return true;
13470 /* Determine if a given RTX is a valid constant address. */
13472 bool
13473 constant_address_p (rtx x)
13475 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13478 /* Return a unique alias set for the GOT. */
13480 static alias_set_type
13481 ix86_GOT_alias_set (void)
13483 static alias_set_type set = -1;
13484 if (set == -1)
13485 set = new_alias_set ();
13486 return set;
13489 /* Set regs_ever_live for PIC base address register
13490 to true if required. */
13491 static void
13492 set_pic_reg_ever_live ()
13494 if (reload_in_progress)
13495 df_set_regs_ever_live (REGNO (pic_offset_table_rtx), true);
13498 /* Return a legitimate reference for ORIG (an address) using the
13499 register REG. If REG is 0, a new pseudo is generated.
13501 There are two types of references that must be handled:
13503 1. Global data references must load the address from the GOT, via
13504 the PIC reg. An insn is emitted to do this load, and the reg is
13505 returned.
13507 2. Static data references, constant pool addresses, and code labels
13508 compute the address as an offset from the GOT, whose base is in
13509 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13510 differentiate them from global data objects. The returned
13511 address is the PIC reg + an unspec constant.
13513 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13514 reg also appears in the address. */
13516 static rtx
13517 legitimize_pic_address (rtx orig, rtx reg)
13519 rtx addr = orig;
13520 rtx new_rtx = orig;
13522 #if TARGET_MACHO
13523 if (TARGET_MACHO && !TARGET_64BIT)
13525 if (reg == 0)
13526 reg = gen_reg_rtx (Pmode);
13527 /* Use the generic Mach-O PIC machinery. */
13528 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13530 #endif
13532 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13534 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13535 if (tmp)
13536 return tmp;
13539 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13540 new_rtx = addr;
13541 else if (TARGET_64BIT && !TARGET_PECOFF
13542 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13544 rtx tmpreg;
13545 /* This symbol may be referenced via a displacement from the PIC
13546 base address (@GOTOFF). */
13548 set_pic_reg_ever_live ();
13549 if (GET_CODE (addr) == CONST)
13550 addr = XEXP (addr, 0);
13551 if (GET_CODE (addr) == PLUS)
13553 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13554 UNSPEC_GOTOFF);
13555 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13557 else
13558 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13559 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13560 if (!reg)
13561 tmpreg = gen_reg_rtx (Pmode);
13562 else
13563 tmpreg = reg;
13564 emit_move_insn (tmpreg, new_rtx);
13566 if (reg != 0)
13568 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13569 tmpreg, 1, OPTAB_DIRECT);
13570 new_rtx = reg;
13572 else
13573 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13575 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13577 /* This symbol may be referenced via a displacement from the PIC
13578 base address (@GOTOFF). */
13580 set_pic_reg_ever_live ();
13581 if (GET_CODE (addr) == CONST)
13582 addr = XEXP (addr, 0);
13583 if (GET_CODE (addr) == PLUS)
13585 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13586 UNSPEC_GOTOFF);
13587 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13589 else
13590 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13591 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13592 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13594 if (reg != 0)
13596 emit_move_insn (reg, new_rtx);
13597 new_rtx = reg;
13600 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13601 /* We can't use @GOTOFF for text labels on VxWorks;
13602 see gotoff_operand. */
13603 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13605 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13606 if (tmp)
13607 return tmp;
13609 /* For x64 PE-COFF there is no GOT table. So we use address
13610 directly. */
13611 if (TARGET_64BIT && TARGET_PECOFF)
13613 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13614 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13616 if (reg == 0)
13617 reg = gen_reg_rtx (Pmode);
13618 emit_move_insn (reg, new_rtx);
13619 new_rtx = reg;
13621 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13623 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13624 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13625 new_rtx = gen_const_mem (Pmode, new_rtx);
13626 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13628 if (reg == 0)
13629 reg = gen_reg_rtx (Pmode);
13630 /* Use directly gen_movsi, otherwise the address is loaded
13631 into register for CSE. We don't want to CSE this addresses,
13632 instead we CSE addresses from the GOT table, so skip this. */
13633 emit_insn (gen_movsi (reg, new_rtx));
13634 new_rtx = reg;
13636 else
13638 /* This symbol must be referenced via a load from the
13639 Global Offset Table (@GOT). */
13641 set_pic_reg_ever_live ();
13642 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13643 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13644 if (TARGET_64BIT)
13645 new_rtx = force_reg (Pmode, new_rtx);
13646 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13647 new_rtx = gen_const_mem (Pmode, new_rtx);
13648 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13650 if (reg == 0)
13651 reg = gen_reg_rtx (Pmode);
13652 emit_move_insn (reg, new_rtx);
13653 new_rtx = reg;
13656 else
13658 if (CONST_INT_P (addr)
13659 && !x86_64_immediate_operand (addr, VOIDmode))
13661 if (reg)
13663 emit_move_insn (reg, addr);
13664 new_rtx = reg;
13666 else
13667 new_rtx = force_reg (Pmode, addr);
13669 else if (GET_CODE (addr) == CONST)
13671 addr = XEXP (addr, 0);
13673 /* We must match stuff we generate before. Assume the only
13674 unspecs that can get here are ours. Not that we could do
13675 anything with them anyway.... */
13676 if (GET_CODE (addr) == UNSPEC
13677 || (GET_CODE (addr) == PLUS
13678 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13679 return orig;
13680 gcc_assert (GET_CODE (addr) == PLUS);
13682 if (GET_CODE (addr) == PLUS)
13684 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13686 /* Check first to see if this is a constant offset from a @GOTOFF
13687 symbol reference. */
13688 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13689 && CONST_INT_P (op1))
13691 if (!TARGET_64BIT)
13693 set_pic_reg_ever_live ();
13694 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13695 UNSPEC_GOTOFF);
13696 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13697 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13698 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13700 if (reg != 0)
13702 emit_move_insn (reg, new_rtx);
13703 new_rtx = reg;
13706 else
13708 if (INTVAL (op1) < -16*1024*1024
13709 || INTVAL (op1) >= 16*1024*1024)
13711 if (!x86_64_immediate_operand (op1, Pmode))
13712 op1 = force_reg (Pmode, op1);
13713 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13717 else
13719 rtx base = legitimize_pic_address (op0, reg);
13720 machine_mode mode = GET_MODE (base);
13721 new_rtx
13722 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13724 if (CONST_INT_P (new_rtx))
13726 if (INTVAL (new_rtx) < -16*1024*1024
13727 || INTVAL (new_rtx) >= 16*1024*1024)
13729 if (!x86_64_immediate_operand (new_rtx, mode))
13730 new_rtx = force_reg (mode, new_rtx);
13731 new_rtx
13732 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13734 else
13735 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13737 else
13739 if (GET_CODE (new_rtx) == PLUS
13740 && CONSTANT_P (XEXP (new_rtx, 1)))
13742 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13743 new_rtx = XEXP (new_rtx, 1);
13745 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13750 return new_rtx;
13753 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13755 static rtx
13756 get_thread_pointer (machine_mode tp_mode, bool to_reg)
13758 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13760 if (GET_MODE (tp) != tp_mode)
13762 gcc_assert (GET_MODE (tp) == SImode);
13763 gcc_assert (tp_mode == DImode);
13765 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13768 if (to_reg)
13769 tp = copy_to_mode_reg (tp_mode, tp);
13771 return tp;
13774 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13776 static GTY(()) rtx ix86_tls_symbol;
13778 static rtx
13779 ix86_tls_get_addr (void)
13781 if (!ix86_tls_symbol)
13783 const char *sym
13784 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13785 ? "___tls_get_addr" : "__tls_get_addr");
13787 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13790 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13792 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13793 UNSPEC_PLTOFF);
13794 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13795 gen_rtx_CONST (Pmode, unspec));
13798 return ix86_tls_symbol;
13801 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13803 static GTY(()) rtx ix86_tls_module_base_symbol;
13806 ix86_tls_module_base (void)
13808 if (!ix86_tls_module_base_symbol)
13810 ix86_tls_module_base_symbol
13811 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13813 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13814 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13817 return ix86_tls_module_base_symbol;
13820 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13821 false if we expect this to be used for a memory address and true if
13822 we expect to load the address into a register. */
13824 static rtx
13825 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13827 rtx dest, base, off;
13828 rtx pic = NULL_RTX, tp = NULL_RTX;
13829 machine_mode tp_mode = Pmode;
13830 int type;
13832 /* Fall back to global dynamic model if tool chain cannot support local
13833 dynamic. */
13834 if (TARGET_SUN_TLS && !TARGET_64BIT
13835 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13836 && model == TLS_MODEL_LOCAL_DYNAMIC)
13837 model = TLS_MODEL_GLOBAL_DYNAMIC;
13839 switch (model)
13841 case TLS_MODEL_GLOBAL_DYNAMIC:
13842 dest = gen_reg_rtx (Pmode);
13844 if (!TARGET_64BIT)
13846 if (flag_pic && !TARGET_PECOFF)
13847 pic = pic_offset_table_rtx;
13848 else
13850 pic = gen_reg_rtx (Pmode);
13851 emit_insn (gen_set_got (pic));
13855 if (TARGET_GNU2_TLS)
13857 if (TARGET_64BIT)
13858 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13859 else
13860 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13862 tp = get_thread_pointer (Pmode, true);
13863 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13865 if (GET_MODE (x) != Pmode)
13866 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13868 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13870 else
13872 rtx caddr = ix86_tls_get_addr ();
13874 if (TARGET_64BIT)
13876 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13877 rtx_insn *insns;
13879 start_sequence ();
13880 emit_call_insn
13881 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13882 insns = get_insns ();
13883 end_sequence ();
13885 if (GET_MODE (x) != Pmode)
13886 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13888 RTL_CONST_CALL_P (insns) = 1;
13889 emit_libcall_block (insns, dest, rax, x);
13891 else
13892 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13894 break;
13896 case TLS_MODEL_LOCAL_DYNAMIC:
13897 base = gen_reg_rtx (Pmode);
13899 if (!TARGET_64BIT)
13901 if (flag_pic)
13902 pic = pic_offset_table_rtx;
13903 else
13905 pic = gen_reg_rtx (Pmode);
13906 emit_insn (gen_set_got (pic));
13910 if (TARGET_GNU2_TLS)
13912 rtx tmp = ix86_tls_module_base ();
13914 if (TARGET_64BIT)
13915 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13916 else
13917 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13919 tp = get_thread_pointer (Pmode, true);
13920 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13921 gen_rtx_MINUS (Pmode, tmp, tp));
13923 else
13925 rtx caddr = ix86_tls_get_addr ();
13927 if (TARGET_64BIT)
13929 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13930 rtx_insn *insns;
13931 rtx eqv;
13933 start_sequence ();
13934 emit_call_insn
13935 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13936 insns = get_insns ();
13937 end_sequence ();
13939 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13940 share the LD_BASE result with other LD model accesses. */
13941 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13942 UNSPEC_TLS_LD_BASE);
13944 RTL_CONST_CALL_P (insns) = 1;
13945 emit_libcall_block (insns, base, rax, eqv);
13947 else
13948 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13951 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13952 off = gen_rtx_CONST (Pmode, off);
13954 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13956 if (TARGET_GNU2_TLS)
13958 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13960 if (GET_MODE (x) != Pmode)
13961 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13963 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13965 break;
13967 case TLS_MODEL_INITIAL_EXEC:
13968 if (TARGET_64BIT)
13970 if (TARGET_SUN_TLS && !TARGET_X32)
13972 /* The Sun linker took the AMD64 TLS spec literally
13973 and can only handle %rax as destination of the
13974 initial executable code sequence. */
13976 dest = gen_reg_rtx (DImode);
13977 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13978 return dest;
13981 /* Generate DImode references to avoid %fs:(%reg32)
13982 problems and linker IE->LE relaxation bug. */
13983 tp_mode = DImode;
13984 pic = NULL;
13985 type = UNSPEC_GOTNTPOFF;
13987 else if (flag_pic)
13989 set_pic_reg_ever_live ();
13990 pic = pic_offset_table_rtx;
13991 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13993 else if (!TARGET_ANY_GNU_TLS)
13995 pic = gen_reg_rtx (Pmode);
13996 emit_insn (gen_set_got (pic));
13997 type = UNSPEC_GOTTPOFF;
13999 else
14001 pic = NULL;
14002 type = UNSPEC_INDNTPOFF;
14005 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
14006 off = gen_rtx_CONST (tp_mode, off);
14007 if (pic)
14008 off = gen_rtx_PLUS (tp_mode, pic, off);
14009 off = gen_const_mem (tp_mode, off);
14010 set_mem_alias_set (off, ix86_GOT_alias_set ());
14012 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14014 base = get_thread_pointer (tp_mode,
14015 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14016 off = force_reg (tp_mode, off);
14017 return gen_rtx_PLUS (tp_mode, base, off);
14019 else
14021 base = get_thread_pointer (Pmode, true);
14022 dest = gen_reg_rtx (Pmode);
14023 emit_insn (ix86_gen_sub3 (dest, base, off));
14025 break;
14027 case TLS_MODEL_LOCAL_EXEC:
14028 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14029 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14030 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
14031 off = gen_rtx_CONST (Pmode, off);
14033 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14035 base = get_thread_pointer (Pmode,
14036 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14037 return gen_rtx_PLUS (Pmode, base, off);
14039 else
14041 base = get_thread_pointer (Pmode, true);
14042 dest = gen_reg_rtx (Pmode);
14043 emit_insn (ix86_gen_sub3 (dest, base, off));
14045 break;
14047 default:
14048 gcc_unreachable ();
14051 return dest;
14054 /* Create or return the unique __imp_DECL dllimport symbol corresponding
14055 to symbol DECL if BEIMPORT is true. Otherwise create or return the
14056 unique refptr-DECL symbol corresponding to symbol DECL. */
14058 struct dllimport_hasher : ggc_cache_hasher<tree_map *>
14060 static inline hashval_t hash (tree_map *m) { return m->hash; }
14061 static inline bool
14062 equal (tree_map *a, tree_map *b)
14064 return a->base.from == b->base.from;
14067 static void
14068 handle_cache_entry (tree_map *&m)
14070 extern void gt_ggc_mx (tree_map *&);
14071 if (m == HTAB_EMPTY_ENTRY || m == HTAB_DELETED_ENTRY)
14072 return;
14073 else if (ggc_marked_p (m->base.from))
14074 gt_ggc_mx (m);
14075 else
14076 m = static_cast<tree_map *> (HTAB_DELETED_ENTRY);
14080 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
14082 static tree
14083 get_dllimport_decl (tree decl, bool beimport)
14085 struct tree_map *h, in;
14086 const char *name;
14087 const char *prefix;
14088 size_t namelen, prefixlen;
14089 char *imp_name;
14090 tree to;
14091 rtx rtl;
14093 if (!dllimport_map)
14094 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
14096 in.hash = htab_hash_pointer (decl);
14097 in.base.from = decl;
14098 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
14099 h = *loc;
14100 if (h)
14101 return h->to;
14103 *loc = h = ggc_alloc<tree_map> ();
14104 h->hash = in.hash;
14105 h->base.from = decl;
14106 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
14107 VAR_DECL, NULL, ptr_type_node);
14108 DECL_ARTIFICIAL (to) = 1;
14109 DECL_IGNORED_P (to) = 1;
14110 DECL_EXTERNAL (to) = 1;
14111 TREE_READONLY (to) = 1;
14113 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
14114 name = targetm.strip_name_encoding (name);
14115 if (beimport)
14116 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
14117 ? "*__imp_" : "*__imp__";
14118 else
14119 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
14120 namelen = strlen (name);
14121 prefixlen = strlen (prefix);
14122 imp_name = (char *) alloca (namelen + prefixlen + 1);
14123 memcpy (imp_name, prefix, prefixlen);
14124 memcpy (imp_name + prefixlen, name, namelen + 1);
14126 name = ggc_alloc_string (imp_name, namelen + prefixlen);
14127 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
14128 SET_SYMBOL_REF_DECL (rtl, to);
14129 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
14130 if (!beimport)
14132 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
14133 #ifdef SUB_TARGET_RECORD_STUB
14134 SUB_TARGET_RECORD_STUB (name);
14135 #endif
14138 rtl = gen_const_mem (Pmode, rtl);
14139 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
14141 SET_DECL_RTL (to, rtl);
14142 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
14144 return to;
14147 /* Expand SYMBOL into its corresponding far-addresse symbol.
14148 WANT_REG is true if we require the result be a register. */
14150 static rtx
14151 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
14153 tree imp_decl;
14154 rtx x;
14156 gcc_assert (SYMBOL_REF_DECL (symbol));
14157 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
14159 x = DECL_RTL (imp_decl);
14160 if (want_reg)
14161 x = force_reg (Pmode, x);
14162 return x;
14165 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
14166 true if we require the result be a register. */
14168 static rtx
14169 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
14171 tree imp_decl;
14172 rtx x;
14174 gcc_assert (SYMBOL_REF_DECL (symbol));
14175 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
14177 x = DECL_RTL (imp_decl);
14178 if (want_reg)
14179 x = force_reg (Pmode, x);
14180 return x;
14183 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
14184 is true if we require the result be a register. */
14186 static rtx
14187 legitimize_pe_coff_symbol (rtx addr, bool inreg)
14189 if (!TARGET_PECOFF)
14190 return NULL_RTX;
14192 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14194 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
14195 return legitimize_dllimport_symbol (addr, inreg);
14196 if (GET_CODE (addr) == CONST
14197 && GET_CODE (XEXP (addr, 0)) == PLUS
14198 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14199 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
14201 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
14202 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14206 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
14207 return NULL_RTX;
14208 if (GET_CODE (addr) == SYMBOL_REF
14209 && !is_imported_p (addr)
14210 && SYMBOL_REF_EXTERNAL_P (addr)
14211 && SYMBOL_REF_DECL (addr))
14212 return legitimize_pe_coff_extern_decl (addr, inreg);
14214 if (GET_CODE (addr) == CONST
14215 && GET_CODE (XEXP (addr, 0)) == PLUS
14216 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14217 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
14218 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
14219 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
14221 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
14222 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14224 return NULL_RTX;
14227 /* Try machine-dependent ways of modifying an illegitimate address
14228 to be legitimate. If we find one, return the new, valid address.
14229 This macro is used in only one place: `memory_address' in explow.c.
14231 OLDX is the address as it was before break_out_memory_refs was called.
14232 In some cases it is useful to look at this to decide what needs to be done.
14234 It is always safe for this macro to do nothing. It exists to recognize
14235 opportunities to optimize the output.
14237 For the 80386, we handle X+REG by loading X into a register R and
14238 using R+REG. R will go in a general reg and indexing will be used.
14239 However, if REG is a broken-out memory address or multiplication,
14240 nothing needs to be done because REG can certainly go in a general reg.
14242 When -fpic is used, special handling is needed for symbolic references.
14243 See comments by legitimize_pic_address in i386.c for details. */
14245 static rtx
14246 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
14248 int changed = 0;
14249 unsigned log;
14251 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14252 if (log)
14253 return legitimize_tls_address (x, (enum tls_model) log, false);
14254 if (GET_CODE (x) == CONST
14255 && GET_CODE (XEXP (x, 0)) == PLUS
14256 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14257 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14259 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14260 (enum tls_model) log, false);
14261 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14264 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14266 rtx tmp = legitimize_pe_coff_symbol (x, true);
14267 if (tmp)
14268 return tmp;
14271 if (flag_pic && SYMBOLIC_CONST (x))
14272 return legitimize_pic_address (x, 0);
14274 #if TARGET_MACHO
14275 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14276 return machopic_indirect_data_reference (x, 0);
14277 #endif
14279 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14280 if (GET_CODE (x) == ASHIFT
14281 && CONST_INT_P (XEXP (x, 1))
14282 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14284 changed = 1;
14285 log = INTVAL (XEXP (x, 1));
14286 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14287 GEN_INT (1 << log));
14290 if (GET_CODE (x) == PLUS)
14292 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
14294 if (GET_CODE (XEXP (x, 0)) == ASHIFT
14295 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14296 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14298 changed = 1;
14299 log = INTVAL (XEXP (XEXP (x, 0), 1));
14300 XEXP (x, 0) = gen_rtx_MULT (Pmode,
14301 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14302 GEN_INT (1 << log));
14305 if (GET_CODE (XEXP (x, 1)) == ASHIFT
14306 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14307 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14309 changed = 1;
14310 log = INTVAL (XEXP (XEXP (x, 1), 1));
14311 XEXP (x, 1) = gen_rtx_MULT (Pmode,
14312 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14313 GEN_INT (1 << log));
14316 /* Put multiply first if it isn't already. */
14317 if (GET_CODE (XEXP (x, 1)) == MULT)
14319 rtx tmp = XEXP (x, 0);
14320 XEXP (x, 0) = XEXP (x, 1);
14321 XEXP (x, 1) = tmp;
14322 changed = 1;
14325 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14326 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
14327 created by virtual register instantiation, register elimination, and
14328 similar optimizations. */
14329 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14331 changed = 1;
14332 x = gen_rtx_PLUS (Pmode,
14333 gen_rtx_PLUS (Pmode, XEXP (x, 0),
14334 XEXP (XEXP (x, 1), 0)),
14335 XEXP (XEXP (x, 1), 1));
14338 /* Canonicalize
14339 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14340 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
14341 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14342 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14343 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14344 && CONSTANT_P (XEXP (x, 1)))
14346 rtx constant;
14347 rtx other = NULL_RTX;
14349 if (CONST_INT_P (XEXP (x, 1)))
14351 constant = XEXP (x, 1);
14352 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14354 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14356 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14357 other = XEXP (x, 1);
14359 else
14360 constant = 0;
14362 if (constant)
14364 changed = 1;
14365 x = gen_rtx_PLUS (Pmode,
14366 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14367 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14368 plus_constant (Pmode, other,
14369 INTVAL (constant)));
14373 if (changed && ix86_legitimate_address_p (mode, x, false))
14374 return x;
14376 if (GET_CODE (XEXP (x, 0)) == MULT)
14378 changed = 1;
14379 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14382 if (GET_CODE (XEXP (x, 1)) == MULT)
14384 changed = 1;
14385 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14388 if (changed
14389 && REG_P (XEXP (x, 1))
14390 && REG_P (XEXP (x, 0)))
14391 return x;
14393 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14395 changed = 1;
14396 x = legitimize_pic_address (x, 0);
14399 if (changed && ix86_legitimate_address_p (mode, x, false))
14400 return x;
14402 if (REG_P (XEXP (x, 0)))
14404 rtx temp = gen_reg_rtx (Pmode);
14405 rtx val = force_operand (XEXP (x, 1), temp);
14406 if (val != temp)
14408 val = convert_to_mode (Pmode, val, 1);
14409 emit_move_insn (temp, val);
14412 XEXP (x, 1) = temp;
14413 return x;
14416 else if (REG_P (XEXP (x, 1)))
14418 rtx temp = gen_reg_rtx (Pmode);
14419 rtx val = force_operand (XEXP (x, 0), temp);
14420 if (val != temp)
14422 val = convert_to_mode (Pmode, val, 1);
14423 emit_move_insn (temp, val);
14426 XEXP (x, 0) = temp;
14427 return x;
14431 return x;
14434 /* Print an integer constant expression in assembler syntax. Addition
14435 and subtraction are the only arithmetic that may appear in these
14436 expressions. FILE is the stdio stream to write to, X is the rtx, and
14437 CODE is the operand print code from the output string. */
14439 static void
14440 output_pic_addr_const (FILE *file, rtx x, int code)
14442 char buf[256];
14444 switch (GET_CODE (x))
14446 case PC:
14447 gcc_assert (flag_pic);
14448 putc ('.', file);
14449 break;
14451 case SYMBOL_REF:
14452 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14453 output_addr_const (file, x);
14454 else
14456 const char *name = XSTR (x, 0);
14458 /* Mark the decl as referenced so that cgraph will
14459 output the function. */
14460 if (SYMBOL_REF_DECL (x))
14461 mark_decl_referenced (SYMBOL_REF_DECL (x));
14463 #if TARGET_MACHO
14464 if (MACHOPIC_INDIRECT
14465 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14466 name = machopic_indirection_name (x, /*stub_p=*/true);
14467 #endif
14468 assemble_name (file, name);
14470 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14471 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14472 fputs ("@PLT", file);
14473 break;
14475 case LABEL_REF:
14476 x = XEXP (x, 0);
14477 /* FALLTHRU */
14478 case CODE_LABEL:
14479 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14480 assemble_name (asm_out_file, buf);
14481 break;
14483 case CONST_INT:
14484 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14485 break;
14487 case CONST:
14488 /* This used to output parentheses around the expression,
14489 but that does not work on the 386 (either ATT or BSD assembler). */
14490 output_pic_addr_const (file, XEXP (x, 0), code);
14491 break;
14493 case CONST_DOUBLE:
14494 if (GET_MODE (x) == VOIDmode)
14496 /* We can use %d if the number is <32 bits and positive. */
14497 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14498 fprintf (file, "0x%lx%08lx",
14499 (unsigned long) CONST_DOUBLE_HIGH (x),
14500 (unsigned long) CONST_DOUBLE_LOW (x));
14501 else
14502 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14504 else
14505 /* We can't handle floating point constants;
14506 TARGET_PRINT_OPERAND must handle them. */
14507 output_operand_lossage ("floating constant misused");
14508 break;
14510 case PLUS:
14511 /* Some assemblers need integer constants to appear first. */
14512 if (CONST_INT_P (XEXP (x, 0)))
14514 output_pic_addr_const (file, XEXP (x, 0), code);
14515 putc ('+', file);
14516 output_pic_addr_const (file, XEXP (x, 1), code);
14518 else
14520 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14521 output_pic_addr_const (file, XEXP (x, 1), code);
14522 putc ('+', file);
14523 output_pic_addr_const (file, XEXP (x, 0), code);
14525 break;
14527 case MINUS:
14528 if (!TARGET_MACHO)
14529 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14530 output_pic_addr_const (file, XEXP (x, 0), code);
14531 putc ('-', file);
14532 output_pic_addr_const (file, XEXP (x, 1), code);
14533 if (!TARGET_MACHO)
14534 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14535 break;
14537 case UNSPEC:
14538 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14540 bool f = i386_asm_output_addr_const_extra (file, x);
14541 gcc_assert (f);
14542 break;
14545 gcc_assert (XVECLEN (x, 0) == 1);
14546 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14547 switch (XINT (x, 1))
14549 case UNSPEC_GOT:
14550 fputs ("@GOT", file);
14551 break;
14552 case UNSPEC_GOTOFF:
14553 fputs ("@GOTOFF", file);
14554 break;
14555 case UNSPEC_PLTOFF:
14556 fputs ("@PLTOFF", file);
14557 break;
14558 case UNSPEC_PCREL:
14559 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14560 "(%rip)" : "[rip]", file);
14561 break;
14562 case UNSPEC_GOTPCREL:
14563 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14564 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14565 break;
14566 case UNSPEC_GOTTPOFF:
14567 /* FIXME: This might be @TPOFF in Sun ld too. */
14568 fputs ("@gottpoff", file);
14569 break;
14570 case UNSPEC_TPOFF:
14571 fputs ("@tpoff", file);
14572 break;
14573 case UNSPEC_NTPOFF:
14574 if (TARGET_64BIT)
14575 fputs ("@tpoff", file);
14576 else
14577 fputs ("@ntpoff", file);
14578 break;
14579 case UNSPEC_DTPOFF:
14580 fputs ("@dtpoff", file);
14581 break;
14582 case UNSPEC_GOTNTPOFF:
14583 if (TARGET_64BIT)
14584 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14585 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14586 else
14587 fputs ("@gotntpoff", file);
14588 break;
14589 case UNSPEC_INDNTPOFF:
14590 fputs ("@indntpoff", file);
14591 break;
14592 #if TARGET_MACHO
14593 case UNSPEC_MACHOPIC_OFFSET:
14594 putc ('-', file);
14595 machopic_output_function_base_name (file);
14596 break;
14597 #endif
14598 default:
14599 output_operand_lossage ("invalid UNSPEC as operand");
14600 break;
14602 break;
14604 default:
14605 output_operand_lossage ("invalid expression as operand");
14609 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14610 We need to emit DTP-relative relocations. */
14612 static void ATTRIBUTE_UNUSED
14613 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14615 fputs (ASM_LONG, file);
14616 output_addr_const (file, x);
14617 fputs ("@dtpoff", file);
14618 switch (size)
14620 case 4:
14621 break;
14622 case 8:
14623 fputs (", 0", file);
14624 break;
14625 default:
14626 gcc_unreachable ();
14630 /* Return true if X is a representation of the PIC register. This copes
14631 with calls from ix86_find_base_term, where the register might have
14632 been replaced by a cselib value. */
14634 static bool
14635 ix86_pic_register_p (rtx x)
14637 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14638 return (pic_offset_table_rtx
14639 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14640 else if (!REG_P (x))
14641 return false;
14642 else if (pic_offset_table_rtx)
14644 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14645 return true;
14646 if (HARD_REGISTER_P (x)
14647 && !HARD_REGISTER_P (pic_offset_table_rtx)
14648 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14649 return true;
14650 return false;
14652 else
14653 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14656 /* Helper function for ix86_delegitimize_address.
14657 Attempt to delegitimize TLS local-exec accesses. */
14659 static rtx
14660 ix86_delegitimize_tls_address (rtx orig_x)
14662 rtx x = orig_x, unspec;
14663 struct ix86_address addr;
14665 if (!TARGET_TLS_DIRECT_SEG_REFS)
14666 return orig_x;
14667 if (MEM_P (x))
14668 x = XEXP (x, 0);
14669 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14670 return orig_x;
14671 if (ix86_decompose_address (x, &addr) == 0
14672 || addr.seg != DEFAULT_TLS_SEG_REG
14673 || addr.disp == NULL_RTX
14674 || GET_CODE (addr.disp) != CONST)
14675 return orig_x;
14676 unspec = XEXP (addr.disp, 0);
14677 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14678 unspec = XEXP (unspec, 0);
14679 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14680 return orig_x;
14681 x = XVECEXP (unspec, 0, 0);
14682 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14683 if (unspec != XEXP (addr.disp, 0))
14684 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14685 if (addr.index)
14687 rtx idx = addr.index;
14688 if (addr.scale != 1)
14689 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14690 x = gen_rtx_PLUS (Pmode, idx, x);
14692 if (addr.base)
14693 x = gen_rtx_PLUS (Pmode, addr.base, x);
14694 if (MEM_P (orig_x))
14695 x = replace_equiv_address_nv (orig_x, x);
14696 return x;
14699 /* In the name of slightly smaller debug output, and to cater to
14700 general assembler lossage, recognize PIC+GOTOFF and turn it back
14701 into a direct symbol reference.
14703 On Darwin, this is necessary to avoid a crash, because Darwin
14704 has a different PIC label for each routine but the DWARF debugging
14705 information is not associated with any particular routine, so it's
14706 necessary to remove references to the PIC label from RTL stored by
14707 the DWARF output code. */
14709 static rtx
14710 ix86_delegitimize_address (rtx x)
14712 rtx orig_x = delegitimize_mem_from_attrs (x);
14713 /* addend is NULL or some rtx if x is something+GOTOFF where
14714 something doesn't include the PIC register. */
14715 rtx addend = NULL_RTX;
14716 /* reg_addend is NULL or a multiple of some register. */
14717 rtx reg_addend = NULL_RTX;
14718 /* const_addend is NULL or a const_int. */
14719 rtx const_addend = NULL_RTX;
14720 /* This is the result, or NULL. */
14721 rtx result = NULL_RTX;
14723 x = orig_x;
14725 if (MEM_P (x))
14726 x = XEXP (x, 0);
14728 if (TARGET_64BIT)
14730 if (GET_CODE (x) == CONST
14731 && GET_CODE (XEXP (x, 0)) == PLUS
14732 && GET_MODE (XEXP (x, 0)) == Pmode
14733 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14734 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14735 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14737 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14738 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14739 if (MEM_P (orig_x))
14740 x = replace_equiv_address_nv (orig_x, x);
14741 return x;
14744 if (GET_CODE (x) == CONST
14745 && GET_CODE (XEXP (x, 0)) == UNSPEC
14746 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14747 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14748 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14750 x = XVECEXP (XEXP (x, 0), 0, 0);
14751 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14753 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14754 GET_MODE (x), 0);
14755 if (x == NULL_RTX)
14756 return orig_x;
14758 return x;
14761 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14762 return ix86_delegitimize_tls_address (orig_x);
14764 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14765 and -mcmodel=medium -fpic. */
14768 if (GET_CODE (x) != PLUS
14769 || GET_CODE (XEXP (x, 1)) != CONST)
14770 return ix86_delegitimize_tls_address (orig_x);
14772 if (ix86_pic_register_p (XEXP (x, 0)))
14773 /* %ebx + GOT/GOTOFF */
14775 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14777 /* %ebx + %reg * scale + GOT/GOTOFF */
14778 reg_addend = XEXP (x, 0);
14779 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14780 reg_addend = XEXP (reg_addend, 1);
14781 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14782 reg_addend = XEXP (reg_addend, 0);
14783 else
14785 reg_addend = NULL_RTX;
14786 addend = XEXP (x, 0);
14789 else
14790 addend = XEXP (x, 0);
14792 x = XEXP (XEXP (x, 1), 0);
14793 if (GET_CODE (x) == PLUS
14794 && CONST_INT_P (XEXP (x, 1)))
14796 const_addend = XEXP (x, 1);
14797 x = XEXP (x, 0);
14800 if (GET_CODE (x) == UNSPEC
14801 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14802 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14803 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14804 && !MEM_P (orig_x) && !addend)))
14805 result = XVECEXP (x, 0, 0);
14807 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14808 && !MEM_P (orig_x))
14809 result = XVECEXP (x, 0, 0);
14811 if (! result)
14812 return ix86_delegitimize_tls_address (orig_x);
14814 if (const_addend)
14815 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14816 if (reg_addend)
14817 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14818 if (addend)
14820 /* If the rest of original X doesn't involve the PIC register, add
14821 addend and subtract pic_offset_table_rtx. This can happen e.g.
14822 for code like:
14823 leal (%ebx, %ecx, 4), %ecx
14825 movl foo@GOTOFF(%ecx), %edx
14826 in which case we return (%ecx - %ebx) + foo
14827 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
14828 and reload has completed. */
14829 if (pic_offset_table_rtx
14830 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
14831 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14832 pic_offset_table_rtx),
14833 result);
14834 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
14836 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
14837 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
14838 result = gen_rtx_PLUS (Pmode, tmp, result);
14840 else
14841 return orig_x;
14843 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14845 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14846 if (result == NULL_RTX)
14847 return orig_x;
14849 return result;
14852 /* If X is a machine specific address (i.e. a symbol or label being
14853 referenced as a displacement from the GOT implemented using an
14854 UNSPEC), then return the base term. Otherwise return X. */
14857 ix86_find_base_term (rtx x)
14859 rtx term;
14861 if (TARGET_64BIT)
14863 if (GET_CODE (x) != CONST)
14864 return x;
14865 term = XEXP (x, 0);
14866 if (GET_CODE (term) == PLUS
14867 && (CONST_INT_P (XEXP (term, 1))
14868 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14869 term = XEXP (term, 0);
14870 if (GET_CODE (term) != UNSPEC
14871 || (XINT (term, 1) != UNSPEC_GOTPCREL
14872 && XINT (term, 1) != UNSPEC_PCREL))
14873 return x;
14875 return XVECEXP (term, 0, 0);
14878 return ix86_delegitimize_address (x);
14881 static void
14882 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
14883 bool fp, FILE *file)
14885 const char *suffix;
14887 if (mode == CCFPmode || mode == CCFPUmode)
14889 code = ix86_fp_compare_code_to_integer (code);
14890 mode = CCmode;
14892 if (reverse)
14893 code = reverse_condition (code);
14895 switch (code)
14897 case EQ:
14898 switch (mode)
14900 case CCAmode:
14901 suffix = "a";
14902 break;
14904 case CCCmode:
14905 suffix = "c";
14906 break;
14908 case CCOmode:
14909 suffix = "o";
14910 break;
14912 case CCSmode:
14913 suffix = "s";
14914 break;
14916 default:
14917 suffix = "e";
14919 break;
14920 case NE:
14921 switch (mode)
14923 case CCAmode:
14924 suffix = "na";
14925 break;
14927 case CCCmode:
14928 suffix = "nc";
14929 break;
14931 case CCOmode:
14932 suffix = "no";
14933 break;
14935 case CCSmode:
14936 suffix = "ns";
14937 break;
14939 default:
14940 suffix = "ne";
14942 break;
14943 case GT:
14944 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14945 suffix = "g";
14946 break;
14947 case GTU:
14948 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14949 Those same assemblers have the same but opposite lossage on cmov. */
14950 if (mode == CCmode)
14951 suffix = fp ? "nbe" : "a";
14952 else
14953 gcc_unreachable ();
14954 break;
14955 case LT:
14956 switch (mode)
14958 case CCNOmode:
14959 case CCGOCmode:
14960 suffix = "s";
14961 break;
14963 case CCmode:
14964 case CCGCmode:
14965 suffix = "l";
14966 break;
14968 default:
14969 gcc_unreachable ();
14971 break;
14972 case LTU:
14973 if (mode == CCmode)
14974 suffix = "b";
14975 else if (mode == CCCmode)
14976 suffix = fp ? "b" : "c";
14977 else
14978 gcc_unreachable ();
14979 break;
14980 case GE:
14981 switch (mode)
14983 case CCNOmode:
14984 case CCGOCmode:
14985 suffix = "ns";
14986 break;
14988 case CCmode:
14989 case CCGCmode:
14990 suffix = "ge";
14991 break;
14993 default:
14994 gcc_unreachable ();
14996 break;
14997 case GEU:
14998 if (mode == CCmode)
14999 suffix = "nb";
15000 else if (mode == CCCmode)
15001 suffix = fp ? "nb" : "nc";
15002 else
15003 gcc_unreachable ();
15004 break;
15005 case LE:
15006 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
15007 suffix = "le";
15008 break;
15009 case LEU:
15010 if (mode == CCmode)
15011 suffix = "be";
15012 else
15013 gcc_unreachable ();
15014 break;
15015 case UNORDERED:
15016 suffix = fp ? "u" : "p";
15017 break;
15018 case ORDERED:
15019 suffix = fp ? "nu" : "np";
15020 break;
15021 default:
15022 gcc_unreachable ();
15024 fputs (suffix, file);
15027 /* Print the name of register X to FILE based on its machine mode and number.
15028 If CODE is 'w', pretend the mode is HImode.
15029 If CODE is 'b', pretend the mode is QImode.
15030 If CODE is 'k', pretend the mode is SImode.
15031 If CODE is 'q', pretend the mode is DImode.
15032 If CODE is 'x', pretend the mode is V4SFmode.
15033 If CODE is 't', pretend the mode is V8SFmode.
15034 If CODE is 'g', pretend the mode is V16SFmode.
15035 If CODE is 'h', pretend the reg is the 'high' byte register.
15036 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
15037 If CODE is 'd', duplicate the operand for AVX instruction.
15040 void
15041 print_reg (rtx x, int code, FILE *file)
15043 const char *reg;
15044 unsigned int regno;
15045 bool duplicated = code == 'd' && TARGET_AVX;
15047 if (ASSEMBLER_DIALECT == ASM_ATT)
15048 putc ('%', file);
15050 if (x == pc_rtx)
15052 gcc_assert (TARGET_64BIT);
15053 fputs ("rip", file);
15054 return;
15057 regno = true_regnum (x);
15058 gcc_assert (regno != ARG_POINTER_REGNUM
15059 && regno != FRAME_POINTER_REGNUM
15060 && regno != FLAGS_REG
15061 && regno != FPSR_REG
15062 && regno != FPCR_REG);
15064 if (code == 'w' || MMX_REG_P (x))
15065 code = 2;
15066 else if (code == 'b')
15067 code = 1;
15068 else if (code == 'k')
15069 code = 4;
15070 else if (code == 'q')
15071 code = 8;
15072 else if (code == 'y')
15073 code = 3;
15074 else if (code == 'h')
15075 code = 0;
15076 else if (code == 'x')
15077 code = 16;
15078 else if (code == 't')
15079 code = 32;
15080 else if (code == 'g')
15081 code = 64;
15082 else
15083 code = GET_MODE_SIZE (GET_MODE (x));
15085 /* Irritatingly, AMD extended registers use different naming convention
15086 from the normal registers: "r%d[bwd]" */
15087 if (REX_INT_REGNO_P (regno))
15089 gcc_assert (TARGET_64BIT);
15090 putc ('r', file);
15091 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
15092 switch (code)
15094 case 0:
15095 error ("extended registers have no high halves");
15096 break;
15097 case 1:
15098 putc ('b', file);
15099 break;
15100 case 2:
15101 putc ('w', file);
15102 break;
15103 case 4:
15104 putc ('d', file);
15105 break;
15106 case 8:
15107 /* no suffix */
15108 break;
15109 default:
15110 error ("unsupported operand size for extended register");
15111 break;
15113 return;
15116 reg = NULL;
15117 switch (code)
15119 case 3:
15120 if (STACK_TOP_P (x))
15122 reg = "st(0)";
15123 break;
15125 /* FALLTHRU */
15126 case 8:
15127 case 4:
15128 case 12:
15129 if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x) && ! ANY_BND_REG_P (x))
15130 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
15131 /* FALLTHRU */
15132 case 16:
15133 case 2:
15134 normal:
15135 reg = hi_reg_name[regno];
15136 break;
15137 case 1:
15138 if (regno >= ARRAY_SIZE (qi_reg_name))
15139 goto normal;
15140 reg = qi_reg_name[regno];
15141 break;
15142 case 0:
15143 if (regno >= ARRAY_SIZE (qi_high_reg_name))
15144 goto normal;
15145 reg = qi_high_reg_name[regno];
15146 break;
15147 case 32:
15148 if (SSE_REG_P (x))
15150 gcc_assert (!duplicated);
15151 putc ('y', file);
15152 fputs (hi_reg_name[regno] + 1, file);
15153 return;
15155 case 64:
15156 if (SSE_REG_P (x))
15158 gcc_assert (!duplicated);
15159 putc ('z', file);
15160 fputs (hi_reg_name[REGNO (x)] + 1, file);
15161 return;
15163 break;
15164 default:
15165 gcc_unreachable ();
15168 fputs (reg, file);
15169 if (duplicated)
15171 if (ASSEMBLER_DIALECT == ASM_ATT)
15172 fprintf (file, ", %%%s", reg);
15173 else
15174 fprintf (file, ", %s", reg);
15178 /* Meaning of CODE:
15179 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
15180 C -- print opcode suffix for set/cmov insn.
15181 c -- like C, but print reversed condition
15182 F,f -- likewise, but for floating-point.
15183 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
15184 otherwise nothing
15185 R -- print embeded rounding and sae.
15186 r -- print only sae.
15187 z -- print the opcode suffix for the size of the current operand.
15188 Z -- likewise, with special suffixes for x87 instructions.
15189 * -- print a star (in certain assembler syntax)
15190 A -- print an absolute memory reference.
15191 E -- print address with DImode register names if TARGET_64BIT.
15192 w -- print the operand as if it's a "word" (HImode) even if it isn't.
15193 s -- print a shift double count, followed by the assemblers argument
15194 delimiter.
15195 b -- print the QImode name of the register for the indicated operand.
15196 %b0 would print %al if operands[0] is reg 0.
15197 w -- likewise, print the HImode name of the register.
15198 k -- likewise, print the SImode name of the register.
15199 q -- likewise, print the DImode name of the register.
15200 x -- likewise, print the V4SFmode name of the register.
15201 t -- likewise, print the V8SFmode name of the register.
15202 g -- likewise, print the V16SFmode name of the register.
15203 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
15204 y -- print "st(0)" instead of "st" as a register.
15205 d -- print duplicated register operand for AVX instruction.
15206 D -- print condition for SSE cmp instruction.
15207 P -- if PIC, print an @PLT suffix.
15208 p -- print raw symbol name.
15209 X -- don't print any sort of PIC '@' suffix for a symbol.
15210 & -- print some in-use local-dynamic symbol name.
15211 H -- print a memory address offset by 8; used for sse high-parts
15212 Y -- print condition for XOP pcom* instruction.
15213 + -- print a branch hint as 'cs' or 'ds' prefix
15214 ; -- print a semicolon (after prefixes due to bug in older gas).
15215 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15216 @ -- print a segment register of thread base pointer load
15217 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15218 ! -- print MPX prefix for jxx/call/ret instructions if required.
15221 void
15222 ix86_print_operand (FILE *file, rtx x, int code)
15224 if (code)
15226 switch (code)
15228 case 'A':
15229 switch (ASSEMBLER_DIALECT)
15231 case ASM_ATT:
15232 putc ('*', file);
15233 break;
15235 case ASM_INTEL:
15236 /* Intel syntax. For absolute addresses, registers should not
15237 be surrounded by braces. */
15238 if (!REG_P (x))
15240 putc ('[', file);
15241 ix86_print_operand (file, x, 0);
15242 putc (']', file);
15243 return;
15245 break;
15247 default:
15248 gcc_unreachable ();
15251 ix86_print_operand (file, x, 0);
15252 return;
15254 case 'E':
15255 /* Wrap address in an UNSPEC to declare special handling. */
15256 if (TARGET_64BIT)
15257 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15259 output_address (x);
15260 return;
15262 case 'L':
15263 if (ASSEMBLER_DIALECT == ASM_ATT)
15264 putc ('l', file);
15265 return;
15267 case 'W':
15268 if (ASSEMBLER_DIALECT == ASM_ATT)
15269 putc ('w', file);
15270 return;
15272 case 'B':
15273 if (ASSEMBLER_DIALECT == ASM_ATT)
15274 putc ('b', file);
15275 return;
15277 case 'Q':
15278 if (ASSEMBLER_DIALECT == ASM_ATT)
15279 putc ('l', file);
15280 return;
15282 case 'S':
15283 if (ASSEMBLER_DIALECT == ASM_ATT)
15284 putc ('s', file);
15285 return;
15287 case 'T':
15288 if (ASSEMBLER_DIALECT == ASM_ATT)
15289 putc ('t', file);
15290 return;
15292 case 'O':
15293 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15294 if (ASSEMBLER_DIALECT != ASM_ATT)
15295 return;
15297 switch (GET_MODE_SIZE (GET_MODE (x)))
15299 case 2:
15300 putc ('w', file);
15301 break;
15303 case 4:
15304 putc ('l', file);
15305 break;
15307 case 8:
15308 putc ('q', file);
15309 break;
15311 default:
15312 output_operand_lossage
15313 ("invalid operand size for operand code 'O'");
15314 return;
15317 putc ('.', file);
15318 #endif
15319 return;
15321 case 'z':
15322 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15324 /* Opcodes don't get size suffixes if using Intel opcodes. */
15325 if (ASSEMBLER_DIALECT == ASM_INTEL)
15326 return;
15328 switch (GET_MODE_SIZE (GET_MODE (x)))
15330 case 1:
15331 putc ('b', file);
15332 return;
15334 case 2:
15335 putc ('w', file);
15336 return;
15338 case 4:
15339 putc ('l', file);
15340 return;
15342 case 8:
15343 putc ('q', file);
15344 return;
15346 default:
15347 output_operand_lossage
15348 ("invalid operand size for operand code 'z'");
15349 return;
15353 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15354 warning
15355 (0, "non-integer operand used with operand code 'z'");
15356 /* FALLTHRU */
15358 case 'Z':
15359 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15360 if (ASSEMBLER_DIALECT == ASM_INTEL)
15361 return;
15363 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15365 switch (GET_MODE_SIZE (GET_MODE (x)))
15367 case 2:
15368 #ifdef HAVE_AS_IX86_FILDS
15369 putc ('s', file);
15370 #endif
15371 return;
15373 case 4:
15374 putc ('l', file);
15375 return;
15377 case 8:
15378 #ifdef HAVE_AS_IX86_FILDQ
15379 putc ('q', file);
15380 #else
15381 fputs ("ll", file);
15382 #endif
15383 return;
15385 default:
15386 break;
15389 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15391 /* 387 opcodes don't get size suffixes
15392 if the operands are registers. */
15393 if (STACK_REG_P (x))
15394 return;
15396 switch (GET_MODE_SIZE (GET_MODE (x)))
15398 case 4:
15399 putc ('s', file);
15400 return;
15402 case 8:
15403 putc ('l', file);
15404 return;
15406 case 12:
15407 case 16:
15408 putc ('t', file);
15409 return;
15411 default:
15412 break;
15415 else
15417 output_operand_lossage
15418 ("invalid operand type used with operand code 'Z'");
15419 return;
15422 output_operand_lossage
15423 ("invalid operand size for operand code 'Z'");
15424 return;
15426 case 'd':
15427 case 'b':
15428 case 'w':
15429 case 'k':
15430 case 'q':
15431 case 'h':
15432 case 't':
15433 case 'g':
15434 case 'y':
15435 case 'x':
15436 case 'X':
15437 case 'P':
15438 case 'p':
15439 break;
15441 case 's':
15442 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15444 ix86_print_operand (file, x, 0);
15445 fputs (", ", file);
15447 return;
15449 case 'Y':
15450 switch (GET_CODE (x))
15452 case NE:
15453 fputs ("neq", file);
15454 break;
15455 case EQ:
15456 fputs ("eq", file);
15457 break;
15458 case GE:
15459 case GEU:
15460 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15461 break;
15462 case GT:
15463 case GTU:
15464 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15465 break;
15466 case LE:
15467 case LEU:
15468 fputs ("le", file);
15469 break;
15470 case LT:
15471 case LTU:
15472 fputs ("lt", file);
15473 break;
15474 case UNORDERED:
15475 fputs ("unord", file);
15476 break;
15477 case ORDERED:
15478 fputs ("ord", file);
15479 break;
15480 case UNEQ:
15481 fputs ("ueq", file);
15482 break;
15483 case UNGE:
15484 fputs ("nlt", file);
15485 break;
15486 case UNGT:
15487 fputs ("nle", file);
15488 break;
15489 case UNLE:
15490 fputs ("ule", file);
15491 break;
15492 case UNLT:
15493 fputs ("ult", file);
15494 break;
15495 case LTGT:
15496 fputs ("une", file);
15497 break;
15498 default:
15499 output_operand_lossage ("operand is not a condition code, "
15500 "invalid operand code 'Y'");
15501 return;
15503 return;
15505 case 'D':
15506 /* Little bit of braindamage here. The SSE compare instructions
15507 does use completely different names for the comparisons that the
15508 fp conditional moves. */
15509 switch (GET_CODE (x))
15511 case UNEQ:
15512 if (TARGET_AVX)
15514 fputs ("eq_us", file);
15515 break;
15517 case EQ:
15518 fputs ("eq", file);
15519 break;
15520 case UNLT:
15521 if (TARGET_AVX)
15523 fputs ("nge", file);
15524 break;
15526 case LT:
15527 fputs ("lt", file);
15528 break;
15529 case UNLE:
15530 if (TARGET_AVX)
15532 fputs ("ngt", file);
15533 break;
15535 case LE:
15536 fputs ("le", file);
15537 break;
15538 case UNORDERED:
15539 fputs ("unord", file);
15540 break;
15541 case LTGT:
15542 if (TARGET_AVX)
15544 fputs ("neq_oq", file);
15545 break;
15547 case NE:
15548 fputs ("neq", file);
15549 break;
15550 case GE:
15551 if (TARGET_AVX)
15553 fputs ("ge", file);
15554 break;
15556 case UNGE:
15557 fputs ("nlt", file);
15558 break;
15559 case GT:
15560 if (TARGET_AVX)
15562 fputs ("gt", file);
15563 break;
15565 case UNGT:
15566 fputs ("nle", file);
15567 break;
15568 case ORDERED:
15569 fputs ("ord", file);
15570 break;
15571 default:
15572 output_operand_lossage ("operand is not a condition code, "
15573 "invalid operand code 'D'");
15574 return;
15576 return;
15578 case 'F':
15579 case 'f':
15580 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15581 if (ASSEMBLER_DIALECT == ASM_ATT)
15582 putc ('.', file);
15583 #endif
15585 case 'C':
15586 case 'c':
15587 if (!COMPARISON_P (x))
15589 output_operand_lossage ("operand is not a condition code, "
15590 "invalid operand code '%c'", code);
15591 return;
15593 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15594 code == 'c' || code == 'f',
15595 code == 'F' || code == 'f',
15596 file);
15597 return;
15599 case 'H':
15600 if (!offsettable_memref_p (x))
15602 output_operand_lossage ("operand is not an offsettable memory "
15603 "reference, invalid operand code 'H'");
15604 return;
15606 /* It doesn't actually matter what mode we use here, as we're
15607 only going to use this for printing. */
15608 x = adjust_address_nv (x, DImode, 8);
15609 /* Output 'qword ptr' for intel assembler dialect. */
15610 if (ASSEMBLER_DIALECT == ASM_INTEL)
15611 code = 'q';
15612 break;
15614 case 'K':
15615 gcc_assert (CONST_INT_P (x));
15617 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15618 #ifdef HAVE_AS_IX86_HLE
15619 fputs ("xacquire ", file);
15620 #else
15621 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15622 #endif
15623 else if (INTVAL (x) & IX86_HLE_RELEASE)
15624 #ifdef HAVE_AS_IX86_HLE
15625 fputs ("xrelease ", file);
15626 #else
15627 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15628 #endif
15629 /* We do not want to print value of the operand. */
15630 return;
15632 case 'N':
15633 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15634 fputs ("{z}", file);
15635 return;
15637 case 'r':
15638 gcc_assert (CONST_INT_P (x));
15639 gcc_assert (INTVAL (x) == ROUND_SAE);
15641 if (ASSEMBLER_DIALECT == ASM_INTEL)
15642 fputs (", ", file);
15644 fputs ("{sae}", file);
15646 if (ASSEMBLER_DIALECT == ASM_ATT)
15647 fputs (", ", file);
15649 return;
15651 case 'R':
15652 gcc_assert (CONST_INT_P (x));
15654 if (ASSEMBLER_DIALECT == ASM_INTEL)
15655 fputs (", ", file);
15657 switch (INTVAL (x))
15659 case ROUND_NEAREST_INT | ROUND_SAE:
15660 fputs ("{rn-sae}", file);
15661 break;
15662 case ROUND_NEG_INF | ROUND_SAE:
15663 fputs ("{rd-sae}", file);
15664 break;
15665 case ROUND_POS_INF | ROUND_SAE:
15666 fputs ("{ru-sae}", file);
15667 break;
15668 case ROUND_ZERO | ROUND_SAE:
15669 fputs ("{rz-sae}", file);
15670 break;
15671 default:
15672 gcc_unreachable ();
15675 if (ASSEMBLER_DIALECT == ASM_ATT)
15676 fputs (", ", file);
15678 return;
15680 case '*':
15681 if (ASSEMBLER_DIALECT == ASM_ATT)
15682 putc ('*', file);
15683 return;
15685 case '&':
15687 const char *name = get_some_local_dynamic_name ();
15688 if (name == NULL)
15689 output_operand_lossage ("'%%&' used without any "
15690 "local dynamic TLS references");
15691 else
15692 assemble_name (file, name);
15693 return;
15696 case '+':
15698 rtx x;
15700 if (!optimize
15701 || optimize_function_for_size_p (cfun)
15702 || !TARGET_BRANCH_PREDICTION_HINTS)
15703 return;
15705 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15706 if (x)
15708 int pred_val = XINT (x, 0);
15710 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15711 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15713 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15714 bool cputaken
15715 = final_forward_branch_p (current_output_insn) == 0;
15717 /* Emit hints only in the case default branch prediction
15718 heuristics would fail. */
15719 if (taken != cputaken)
15721 /* We use 3e (DS) prefix for taken branches and
15722 2e (CS) prefix for not taken branches. */
15723 if (taken)
15724 fputs ("ds ; ", file);
15725 else
15726 fputs ("cs ; ", file);
15730 return;
15733 case ';':
15734 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15735 putc (';', file);
15736 #endif
15737 return;
15739 case '@':
15740 if (ASSEMBLER_DIALECT == ASM_ATT)
15741 putc ('%', file);
15743 /* The kernel uses a different segment register for performance
15744 reasons; a system call would not have to trash the userspace
15745 segment register, which would be expensive. */
15746 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15747 fputs ("fs", file);
15748 else
15749 fputs ("gs", file);
15750 return;
15752 case '~':
15753 putc (TARGET_AVX2 ? 'i' : 'f', file);
15754 return;
15756 case '^':
15757 if (TARGET_64BIT && Pmode != word_mode)
15758 fputs ("addr32 ", file);
15759 return;
15761 case '!':
15762 if (ix86_bnd_prefixed_insn_p (current_output_insn))
15763 fputs ("bnd ", file);
15764 return;
15766 default:
15767 output_operand_lossage ("invalid operand code '%c'", code);
15771 if (REG_P (x))
15772 print_reg (x, code, file);
15774 else if (MEM_P (x))
15776 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15777 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15778 && GET_MODE (x) != BLKmode)
15780 const char * size;
15781 switch (GET_MODE_SIZE (GET_MODE (x)))
15783 case 1: size = "BYTE"; break;
15784 case 2: size = "WORD"; break;
15785 case 4: size = "DWORD"; break;
15786 case 8: size = "QWORD"; break;
15787 case 12: size = "TBYTE"; break;
15788 case 16:
15789 if (GET_MODE (x) == XFmode)
15790 size = "TBYTE";
15791 else
15792 size = "XMMWORD";
15793 break;
15794 case 32: size = "YMMWORD"; break;
15795 case 64: size = "ZMMWORD"; break;
15796 default:
15797 gcc_unreachable ();
15800 /* Check for explicit size override (codes 'b', 'w', 'k',
15801 'q' and 'x') */
15802 if (code == 'b')
15803 size = "BYTE";
15804 else if (code == 'w')
15805 size = "WORD";
15806 else if (code == 'k')
15807 size = "DWORD";
15808 else if (code == 'q')
15809 size = "QWORD";
15810 else if (code == 'x')
15811 size = "XMMWORD";
15813 fputs (size, file);
15814 fputs (" PTR ", file);
15817 x = XEXP (x, 0);
15818 /* Avoid (%rip) for call operands. */
15819 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15820 && !CONST_INT_P (x))
15821 output_addr_const (file, x);
15822 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15823 output_operand_lossage ("invalid constraints for operand");
15824 else
15825 output_address (x);
15828 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15830 REAL_VALUE_TYPE r;
15831 long l;
15833 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15834 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15836 if (ASSEMBLER_DIALECT == ASM_ATT)
15837 putc ('$', file);
15838 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15839 if (code == 'q')
15840 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15841 (unsigned long long) (int) l);
15842 else
15843 fprintf (file, "0x%08x", (unsigned int) l);
15846 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15848 REAL_VALUE_TYPE r;
15849 long l[2];
15851 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15852 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15854 if (ASSEMBLER_DIALECT == ASM_ATT)
15855 putc ('$', file);
15856 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15859 /* These float cases don't actually occur as immediate operands. */
15860 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15862 char dstr[30];
15864 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15865 fputs (dstr, file);
15868 else
15870 /* We have patterns that allow zero sets of memory, for instance.
15871 In 64-bit mode, we should probably support all 8-byte vectors,
15872 since we can in fact encode that into an immediate. */
15873 if (GET_CODE (x) == CONST_VECTOR)
15875 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15876 x = const0_rtx;
15879 if (code != 'P' && code != 'p')
15881 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15883 if (ASSEMBLER_DIALECT == ASM_ATT)
15884 putc ('$', file);
15886 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15887 || GET_CODE (x) == LABEL_REF)
15889 if (ASSEMBLER_DIALECT == ASM_ATT)
15890 putc ('$', file);
15891 else
15892 fputs ("OFFSET FLAT:", file);
15895 if (CONST_INT_P (x))
15896 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15897 else if (flag_pic || MACHOPIC_INDIRECT)
15898 output_pic_addr_const (file, x, code);
15899 else
15900 output_addr_const (file, x);
15904 static bool
15905 ix86_print_operand_punct_valid_p (unsigned char code)
15907 return (code == '@' || code == '*' || code == '+' || code == '&'
15908 || code == ';' || code == '~' || code == '^' || code == '!');
15911 /* Print a memory operand whose address is ADDR. */
15913 static void
15914 ix86_print_operand_address (FILE *file, rtx addr)
15916 struct ix86_address parts;
15917 rtx base, index, disp;
15918 int scale;
15919 int ok;
15920 bool vsib = false;
15921 int code = 0;
15923 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15925 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15926 gcc_assert (parts.index == NULL_RTX);
15927 parts.index = XVECEXP (addr, 0, 1);
15928 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15929 addr = XVECEXP (addr, 0, 0);
15930 vsib = true;
15932 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15934 gcc_assert (TARGET_64BIT);
15935 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15936 code = 'q';
15938 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
15940 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
15941 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
15942 if (parts.base != NULL_RTX)
15944 parts.index = parts.base;
15945 parts.scale = 1;
15947 parts.base = XVECEXP (addr, 0, 0);
15948 addr = XVECEXP (addr, 0, 0);
15950 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
15952 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15953 gcc_assert (parts.index == NULL_RTX);
15954 parts.index = XVECEXP (addr, 0, 1);
15955 addr = XVECEXP (addr, 0, 0);
15957 else
15958 ok = ix86_decompose_address (addr, &parts);
15960 gcc_assert (ok);
15962 base = parts.base;
15963 index = parts.index;
15964 disp = parts.disp;
15965 scale = parts.scale;
15967 switch (parts.seg)
15969 case SEG_DEFAULT:
15970 break;
15971 case SEG_FS:
15972 case SEG_GS:
15973 if (ASSEMBLER_DIALECT == ASM_ATT)
15974 putc ('%', file);
15975 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15976 break;
15977 default:
15978 gcc_unreachable ();
15981 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15982 if (TARGET_64BIT && !base && !index)
15984 rtx symbol = disp;
15986 if (GET_CODE (disp) == CONST
15987 && GET_CODE (XEXP (disp, 0)) == PLUS
15988 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15989 symbol = XEXP (XEXP (disp, 0), 0);
15991 if (GET_CODE (symbol) == LABEL_REF
15992 || (GET_CODE (symbol) == SYMBOL_REF
15993 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15994 base = pc_rtx;
15996 if (!base && !index)
15998 /* Displacement only requires special attention. */
16000 if (CONST_INT_P (disp))
16002 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
16003 fputs ("ds:", file);
16004 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
16006 else if (flag_pic)
16007 output_pic_addr_const (file, disp, 0);
16008 else
16009 output_addr_const (file, disp);
16011 else
16013 /* Print SImode register names to force addr32 prefix. */
16014 if (SImode_address_operand (addr, VOIDmode))
16016 #ifdef ENABLE_CHECKING
16017 gcc_assert (TARGET_64BIT);
16018 switch (GET_CODE (addr))
16020 case SUBREG:
16021 gcc_assert (GET_MODE (addr) == SImode);
16022 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
16023 break;
16024 case ZERO_EXTEND:
16025 case AND:
16026 gcc_assert (GET_MODE (addr) == DImode);
16027 break;
16028 default:
16029 gcc_unreachable ();
16031 #endif
16032 gcc_assert (!code);
16033 code = 'k';
16035 else if (code == 0
16036 && TARGET_X32
16037 && disp
16038 && CONST_INT_P (disp)
16039 && INTVAL (disp) < -16*1024*1024)
16041 /* X32 runs in 64-bit mode, where displacement, DISP, in
16042 address DISP(%r64), is encoded as 32-bit immediate sign-
16043 extended from 32-bit to 64-bit. For -0x40000300(%r64),
16044 address is %r64 + 0xffffffffbffffd00. When %r64 <
16045 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
16046 which is invalid for x32. The correct address is %r64
16047 - 0x40000300 == 0xf7ffdd64. To properly encode
16048 -0x40000300(%r64) for x32, we zero-extend negative
16049 displacement by forcing addr32 prefix which truncates
16050 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
16051 zero-extend all negative displacements, including -1(%rsp).
16052 However, for small negative displacements, sign-extension
16053 won't cause overflow. We only zero-extend negative
16054 displacements if they < -16*1024*1024, which is also used
16055 to check legitimate address displacements for PIC. */
16056 code = 'k';
16059 if (ASSEMBLER_DIALECT == ASM_ATT)
16061 if (disp)
16063 if (flag_pic)
16064 output_pic_addr_const (file, disp, 0);
16065 else if (GET_CODE (disp) == LABEL_REF)
16066 output_asm_label (disp);
16067 else
16068 output_addr_const (file, disp);
16071 putc ('(', file);
16072 if (base)
16073 print_reg (base, code, file);
16074 if (index)
16076 putc (',', file);
16077 print_reg (index, vsib ? 0 : code, file);
16078 if (scale != 1 || vsib)
16079 fprintf (file, ",%d", scale);
16081 putc (')', file);
16083 else
16085 rtx offset = NULL_RTX;
16087 if (disp)
16089 /* Pull out the offset of a symbol; print any symbol itself. */
16090 if (GET_CODE (disp) == CONST
16091 && GET_CODE (XEXP (disp, 0)) == PLUS
16092 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16094 offset = XEXP (XEXP (disp, 0), 1);
16095 disp = gen_rtx_CONST (VOIDmode,
16096 XEXP (XEXP (disp, 0), 0));
16099 if (flag_pic)
16100 output_pic_addr_const (file, disp, 0);
16101 else if (GET_CODE (disp) == LABEL_REF)
16102 output_asm_label (disp);
16103 else if (CONST_INT_P (disp))
16104 offset = disp;
16105 else
16106 output_addr_const (file, disp);
16109 putc ('[', file);
16110 if (base)
16112 print_reg (base, code, file);
16113 if (offset)
16115 if (INTVAL (offset) >= 0)
16116 putc ('+', file);
16117 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16120 else if (offset)
16121 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16122 else
16123 putc ('0', file);
16125 if (index)
16127 putc ('+', file);
16128 print_reg (index, vsib ? 0 : code, file);
16129 if (scale != 1 || vsib)
16130 fprintf (file, "*%d", scale);
16132 putc (']', file);
16137 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
16139 static bool
16140 i386_asm_output_addr_const_extra (FILE *file, rtx x)
16142 rtx op;
16144 if (GET_CODE (x) != UNSPEC)
16145 return false;
16147 op = XVECEXP (x, 0, 0);
16148 switch (XINT (x, 1))
16150 case UNSPEC_GOTTPOFF:
16151 output_addr_const (file, op);
16152 /* FIXME: This might be @TPOFF in Sun ld. */
16153 fputs ("@gottpoff", file);
16154 break;
16155 case UNSPEC_TPOFF:
16156 output_addr_const (file, op);
16157 fputs ("@tpoff", file);
16158 break;
16159 case UNSPEC_NTPOFF:
16160 output_addr_const (file, op);
16161 if (TARGET_64BIT)
16162 fputs ("@tpoff", file);
16163 else
16164 fputs ("@ntpoff", file);
16165 break;
16166 case UNSPEC_DTPOFF:
16167 output_addr_const (file, op);
16168 fputs ("@dtpoff", file);
16169 break;
16170 case UNSPEC_GOTNTPOFF:
16171 output_addr_const (file, op);
16172 if (TARGET_64BIT)
16173 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16174 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
16175 else
16176 fputs ("@gotntpoff", file);
16177 break;
16178 case UNSPEC_INDNTPOFF:
16179 output_addr_const (file, op);
16180 fputs ("@indntpoff", file);
16181 break;
16182 #if TARGET_MACHO
16183 case UNSPEC_MACHOPIC_OFFSET:
16184 output_addr_const (file, op);
16185 putc ('-', file);
16186 machopic_output_function_base_name (file);
16187 break;
16188 #endif
16190 case UNSPEC_STACK_CHECK:
16192 int offset;
16194 gcc_assert (flag_split_stack);
16196 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
16197 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
16198 #else
16199 gcc_unreachable ();
16200 #endif
16202 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
16204 break;
16206 default:
16207 return false;
16210 return true;
16213 /* Split one or more double-mode RTL references into pairs of half-mode
16214 references. The RTL can be REG, offsettable MEM, integer constant, or
16215 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
16216 split and "num" is its length. lo_half and hi_half are output arrays
16217 that parallel "operands". */
16219 void
16220 split_double_mode (machine_mode mode, rtx operands[],
16221 int num, rtx lo_half[], rtx hi_half[])
16223 machine_mode half_mode;
16224 unsigned int byte;
16226 switch (mode)
16228 case TImode:
16229 half_mode = DImode;
16230 break;
16231 case DImode:
16232 half_mode = SImode;
16233 break;
16234 default:
16235 gcc_unreachable ();
16238 byte = GET_MODE_SIZE (half_mode);
16240 while (num--)
16242 rtx op = operands[num];
16244 /* simplify_subreg refuse to split volatile memory addresses,
16245 but we still have to handle it. */
16246 if (MEM_P (op))
16248 lo_half[num] = adjust_address (op, half_mode, 0);
16249 hi_half[num] = adjust_address (op, half_mode, byte);
16251 else
16253 lo_half[num] = simplify_gen_subreg (half_mode, op,
16254 GET_MODE (op) == VOIDmode
16255 ? mode : GET_MODE (op), 0);
16256 hi_half[num] = simplify_gen_subreg (half_mode, op,
16257 GET_MODE (op) == VOIDmode
16258 ? mode : GET_MODE (op), byte);
16263 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
16264 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
16265 is the expression of the binary operation. The output may either be
16266 emitted here, or returned to the caller, like all output_* functions.
16268 There is no guarantee that the operands are the same mode, as they
16269 might be within FLOAT or FLOAT_EXTEND expressions. */
16271 #ifndef SYSV386_COMPAT
16272 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
16273 wants to fix the assemblers because that causes incompatibility
16274 with gcc. No-one wants to fix gcc because that causes
16275 incompatibility with assemblers... You can use the option of
16276 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
16277 #define SYSV386_COMPAT 1
16278 #endif
16280 const char *
16281 output_387_binary_op (rtx insn, rtx *operands)
16283 static char buf[40];
16284 const char *p;
16285 const char *ssep;
16286 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16288 #ifdef ENABLE_CHECKING
16289 /* Even if we do not want to check the inputs, this documents input
16290 constraints. Which helps in understanding the following code. */
16291 if (STACK_REG_P (operands[0])
16292 && ((REG_P (operands[1])
16293 && REGNO (operands[0]) == REGNO (operands[1])
16294 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16295 || (REG_P (operands[2])
16296 && REGNO (operands[0]) == REGNO (operands[2])
16297 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16298 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16299 ; /* ok */
16300 else
16301 gcc_assert (is_sse);
16302 #endif
16304 switch (GET_CODE (operands[3]))
16306 case PLUS:
16307 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16308 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16309 p = "fiadd";
16310 else
16311 p = "fadd";
16312 ssep = "vadd";
16313 break;
16315 case MINUS:
16316 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16317 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16318 p = "fisub";
16319 else
16320 p = "fsub";
16321 ssep = "vsub";
16322 break;
16324 case MULT:
16325 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16326 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16327 p = "fimul";
16328 else
16329 p = "fmul";
16330 ssep = "vmul";
16331 break;
16333 case DIV:
16334 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16335 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16336 p = "fidiv";
16337 else
16338 p = "fdiv";
16339 ssep = "vdiv";
16340 break;
16342 default:
16343 gcc_unreachable ();
16346 if (is_sse)
16348 if (TARGET_AVX)
16350 strcpy (buf, ssep);
16351 if (GET_MODE (operands[0]) == SFmode)
16352 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16353 else
16354 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16356 else
16358 strcpy (buf, ssep + 1);
16359 if (GET_MODE (operands[0]) == SFmode)
16360 strcat (buf, "ss\t{%2, %0|%0, %2}");
16361 else
16362 strcat (buf, "sd\t{%2, %0|%0, %2}");
16364 return buf;
16366 strcpy (buf, p);
16368 switch (GET_CODE (operands[3]))
16370 case MULT:
16371 case PLUS:
16372 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16374 rtx temp = operands[2];
16375 operands[2] = operands[1];
16376 operands[1] = temp;
16379 /* know operands[0] == operands[1]. */
16381 if (MEM_P (operands[2]))
16383 p = "%Z2\t%2";
16384 break;
16387 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16389 if (STACK_TOP_P (operands[0]))
16390 /* How is it that we are storing to a dead operand[2]?
16391 Well, presumably operands[1] is dead too. We can't
16392 store the result to st(0) as st(0) gets popped on this
16393 instruction. Instead store to operands[2] (which I
16394 think has to be st(1)). st(1) will be popped later.
16395 gcc <= 2.8.1 didn't have this check and generated
16396 assembly code that the Unixware assembler rejected. */
16397 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16398 else
16399 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16400 break;
16403 if (STACK_TOP_P (operands[0]))
16404 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16405 else
16406 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16407 break;
16409 case MINUS:
16410 case DIV:
16411 if (MEM_P (operands[1]))
16413 p = "r%Z1\t%1";
16414 break;
16417 if (MEM_P (operands[2]))
16419 p = "%Z2\t%2";
16420 break;
16423 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16425 #if SYSV386_COMPAT
16426 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16427 derived assemblers, confusingly reverse the direction of
16428 the operation for fsub{r} and fdiv{r} when the
16429 destination register is not st(0). The Intel assembler
16430 doesn't have this brain damage. Read !SYSV386_COMPAT to
16431 figure out what the hardware really does. */
16432 if (STACK_TOP_P (operands[0]))
16433 p = "{p\t%0, %2|rp\t%2, %0}";
16434 else
16435 p = "{rp\t%2, %0|p\t%0, %2}";
16436 #else
16437 if (STACK_TOP_P (operands[0]))
16438 /* As above for fmul/fadd, we can't store to st(0). */
16439 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16440 else
16441 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16442 #endif
16443 break;
16446 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16448 #if SYSV386_COMPAT
16449 if (STACK_TOP_P (operands[0]))
16450 p = "{rp\t%0, %1|p\t%1, %0}";
16451 else
16452 p = "{p\t%1, %0|rp\t%0, %1}";
16453 #else
16454 if (STACK_TOP_P (operands[0]))
16455 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16456 else
16457 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16458 #endif
16459 break;
16462 if (STACK_TOP_P (operands[0]))
16464 if (STACK_TOP_P (operands[1]))
16465 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16466 else
16467 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16468 break;
16470 else if (STACK_TOP_P (operands[1]))
16472 #if SYSV386_COMPAT
16473 p = "{\t%1, %0|r\t%0, %1}";
16474 #else
16475 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16476 #endif
16478 else
16480 #if SYSV386_COMPAT
16481 p = "{r\t%2, %0|\t%0, %2}";
16482 #else
16483 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16484 #endif
16486 break;
16488 default:
16489 gcc_unreachable ();
16492 strcat (buf, p);
16493 return buf;
16496 /* Check if a 256bit AVX register is referenced inside of EXP. */
16498 static bool
16499 ix86_check_avx256_register (const_rtx exp)
16501 if (GET_CODE (exp) == SUBREG)
16502 exp = SUBREG_REG (exp);
16504 return (REG_P (exp)
16505 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16508 /* Return needed mode for entity in optimize_mode_switching pass. */
16510 static int
16511 ix86_avx_u128_mode_needed (rtx_insn *insn)
16513 if (CALL_P (insn))
16515 rtx link;
16517 /* Needed mode is set to AVX_U128_CLEAN if there are
16518 no 256bit modes used in function arguments. */
16519 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16520 link;
16521 link = XEXP (link, 1))
16523 if (GET_CODE (XEXP (link, 0)) == USE)
16525 rtx arg = XEXP (XEXP (link, 0), 0);
16527 if (ix86_check_avx256_register (arg))
16528 return AVX_U128_DIRTY;
16532 return AVX_U128_CLEAN;
16535 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16536 changes state only when a 256bit register is written to, but we need
16537 to prevent the compiler from moving optimal insertion point above
16538 eventual read from 256bit register. */
16539 subrtx_iterator::array_type array;
16540 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16541 if (ix86_check_avx256_register (*iter))
16542 return AVX_U128_DIRTY;
16544 return AVX_U128_ANY;
16547 /* Return mode that i387 must be switched into
16548 prior to the execution of insn. */
16550 static int
16551 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16553 enum attr_i387_cw mode;
16555 /* The mode UNINITIALIZED is used to store control word after a
16556 function call or ASM pattern. The mode ANY specify that function
16557 has no requirements on the control word and make no changes in the
16558 bits we are interested in. */
16560 if (CALL_P (insn)
16561 || (NONJUMP_INSN_P (insn)
16562 && (asm_noperands (PATTERN (insn)) >= 0
16563 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16564 return I387_CW_UNINITIALIZED;
16566 if (recog_memoized (insn) < 0)
16567 return I387_CW_ANY;
16569 mode = get_attr_i387_cw (insn);
16571 switch (entity)
16573 case I387_TRUNC:
16574 if (mode == I387_CW_TRUNC)
16575 return mode;
16576 break;
16578 case I387_FLOOR:
16579 if (mode == I387_CW_FLOOR)
16580 return mode;
16581 break;
16583 case I387_CEIL:
16584 if (mode == I387_CW_CEIL)
16585 return mode;
16586 break;
16588 case I387_MASK_PM:
16589 if (mode == I387_CW_MASK_PM)
16590 return mode;
16591 break;
16593 default:
16594 gcc_unreachable ();
16597 return I387_CW_ANY;
16600 /* Return mode that entity must be switched into
16601 prior to the execution of insn. */
16603 static int
16604 ix86_mode_needed (int entity, rtx_insn *insn)
16606 switch (entity)
16608 case AVX_U128:
16609 return ix86_avx_u128_mode_needed (insn);
16610 case I387_TRUNC:
16611 case I387_FLOOR:
16612 case I387_CEIL:
16613 case I387_MASK_PM:
16614 return ix86_i387_mode_needed (entity, insn);
16615 default:
16616 gcc_unreachable ();
16618 return 0;
16621 /* Check if a 256bit AVX register is referenced in stores. */
16623 static void
16624 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16626 if (ix86_check_avx256_register (dest))
16628 bool *used = (bool *) data;
16629 *used = true;
16633 /* Calculate mode of upper 128bit AVX registers after the insn. */
16635 static int
16636 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16638 rtx pat = PATTERN (insn);
16640 if (vzeroupper_operation (pat, VOIDmode)
16641 || vzeroall_operation (pat, VOIDmode))
16642 return AVX_U128_CLEAN;
16644 /* We know that state is clean after CALL insn if there are no
16645 256bit registers used in the function return register. */
16646 if (CALL_P (insn))
16648 bool avx_reg256_found = false;
16649 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16651 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16654 /* Otherwise, return current mode. Remember that if insn
16655 references AVX 256bit registers, the mode was already changed
16656 to DIRTY from MODE_NEEDED. */
16657 return mode;
16660 /* Return the mode that an insn results in. */
16663 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16665 switch (entity)
16667 case AVX_U128:
16668 return ix86_avx_u128_mode_after (mode, insn);
16669 case I387_TRUNC:
16670 case I387_FLOOR:
16671 case I387_CEIL:
16672 case I387_MASK_PM:
16673 return mode;
16674 default:
16675 gcc_unreachable ();
16679 static int
16680 ix86_avx_u128_mode_entry (void)
16682 tree arg;
16684 /* Entry mode is set to AVX_U128_DIRTY if there are
16685 256bit modes used in function arguments. */
16686 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16687 arg = TREE_CHAIN (arg))
16689 rtx incoming = DECL_INCOMING_RTL (arg);
16691 if (incoming && ix86_check_avx256_register (incoming))
16692 return AVX_U128_DIRTY;
16695 return AVX_U128_CLEAN;
16698 /* Return a mode that ENTITY is assumed to be
16699 switched to at function entry. */
16701 static int
16702 ix86_mode_entry (int entity)
16704 switch (entity)
16706 case AVX_U128:
16707 return ix86_avx_u128_mode_entry ();
16708 case I387_TRUNC:
16709 case I387_FLOOR:
16710 case I387_CEIL:
16711 case I387_MASK_PM:
16712 return I387_CW_ANY;
16713 default:
16714 gcc_unreachable ();
16718 static int
16719 ix86_avx_u128_mode_exit (void)
16721 rtx reg = crtl->return_rtx;
16723 /* Exit mode is set to AVX_U128_DIRTY if there are
16724 256bit modes used in the function return register. */
16725 if (reg && ix86_check_avx256_register (reg))
16726 return AVX_U128_DIRTY;
16728 return AVX_U128_CLEAN;
16731 /* Return a mode that ENTITY is assumed to be
16732 switched to at function exit. */
16734 static int
16735 ix86_mode_exit (int entity)
16737 switch (entity)
16739 case AVX_U128:
16740 return ix86_avx_u128_mode_exit ();
16741 case I387_TRUNC:
16742 case I387_FLOOR:
16743 case I387_CEIL:
16744 case I387_MASK_PM:
16745 return I387_CW_ANY;
16746 default:
16747 gcc_unreachable ();
16751 static int
16752 ix86_mode_priority (int, int n)
16754 return n;
16757 /* Output code to initialize control word copies used by trunc?f?i and
16758 rounding patterns. CURRENT_MODE is set to current control word,
16759 while NEW_MODE is set to new control word. */
16761 static void
16762 emit_i387_cw_initialization (int mode)
16764 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16765 rtx new_mode;
16767 enum ix86_stack_slot slot;
16769 rtx reg = gen_reg_rtx (HImode);
16771 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16772 emit_move_insn (reg, copy_rtx (stored_mode));
16774 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16775 || optimize_insn_for_size_p ())
16777 switch (mode)
16779 case I387_CW_TRUNC:
16780 /* round toward zero (truncate) */
16781 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16782 slot = SLOT_CW_TRUNC;
16783 break;
16785 case I387_CW_FLOOR:
16786 /* round down toward -oo */
16787 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16788 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16789 slot = SLOT_CW_FLOOR;
16790 break;
16792 case I387_CW_CEIL:
16793 /* round up toward +oo */
16794 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16795 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16796 slot = SLOT_CW_CEIL;
16797 break;
16799 case I387_CW_MASK_PM:
16800 /* mask precision exception for nearbyint() */
16801 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16802 slot = SLOT_CW_MASK_PM;
16803 break;
16805 default:
16806 gcc_unreachable ();
16809 else
16811 switch (mode)
16813 case I387_CW_TRUNC:
16814 /* round toward zero (truncate) */
16815 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16816 slot = SLOT_CW_TRUNC;
16817 break;
16819 case I387_CW_FLOOR:
16820 /* round down toward -oo */
16821 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16822 slot = SLOT_CW_FLOOR;
16823 break;
16825 case I387_CW_CEIL:
16826 /* round up toward +oo */
16827 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16828 slot = SLOT_CW_CEIL;
16829 break;
16831 case I387_CW_MASK_PM:
16832 /* mask precision exception for nearbyint() */
16833 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16834 slot = SLOT_CW_MASK_PM;
16835 break;
16837 default:
16838 gcc_unreachable ();
16842 gcc_assert (slot < MAX_386_STACK_LOCALS);
16844 new_mode = assign_386_stack_local (HImode, slot);
16845 emit_move_insn (new_mode, reg);
16848 /* Emit vzeroupper. */
16850 void
16851 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16853 int i;
16855 /* Cancel automatic vzeroupper insertion if there are
16856 live call-saved SSE registers at the insertion point. */
16858 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16859 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16860 return;
16862 if (TARGET_64BIT)
16863 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16864 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16865 return;
16867 emit_insn (gen_avx_vzeroupper ());
16870 /* Generate one or more insns to set ENTITY to MODE. */
16872 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16873 is the set of hard registers live at the point where the insn(s)
16874 are to be inserted. */
16876 static void
16877 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
16878 HARD_REG_SET regs_live)
16880 switch (entity)
16882 case AVX_U128:
16883 if (mode == AVX_U128_CLEAN)
16884 ix86_avx_emit_vzeroupper (regs_live);
16885 break;
16886 case I387_TRUNC:
16887 case I387_FLOOR:
16888 case I387_CEIL:
16889 case I387_MASK_PM:
16890 if (mode != I387_CW_ANY
16891 && mode != I387_CW_UNINITIALIZED)
16892 emit_i387_cw_initialization (mode);
16893 break;
16894 default:
16895 gcc_unreachable ();
16899 /* Output code for INSN to convert a float to a signed int. OPERANDS
16900 are the insn operands. The output may be [HSD]Imode and the input
16901 operand may be [SDX]Fmode. */
16903 const char *
16904 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
16906 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16907 int dimode_p = GET_MODE (operands[0]) == DImode;
16908 int round_mode = get_attr_i387_cw (insn);
16910 /* Jump through a hoop or two for DImode, since the hardware has no
16911 non-popping instruction. We used to do this a different way, but
16912 that was somewhat fragile and broke with post-reload splitters. */
16913 if ((dimode_p || fisttp) && !stack_top_dies)
16914 output_asm_insn ("fld\t%y1", operands);
16916 gcc_assert (STACK_TOP_P (operands[1]));
16917 gcc_assert (MEM_P (operands[0]));
16918 gcc_assert (GET_MODE (operands[1]) != TFmode);
16920 if (fisttp)
16921 output_asm_insn ("fisttp%Z0\t%0", operands);
16922 else
16924 if (round_mode != I387_CW_ANY)
16925 output_asm_insn ("fldcw\t%3", operands);
16926 if (stack_top_dies || dimode_p)
16927 output_asm_insn ("fistp%Z0\t%0", operands);
16928 else
16929 output_asm_insn ("fist%Z0\t%0", operands);
16930 if (round_mode != I387_CW_ANY)
16931 output_asm_insn ("fldcw\t%2", operands);
16934 return "";
16937 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16938 have the values zero or one, indicates the ffreep insn's operand
16939 from the OPERANDS array. */
16941 static const char *
16942 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16944 if (TARGET_USE_FFREEP)
16945 #ifdef HAVE_AS_IX86_FFREEP
16946 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16947 #else
16949 static char retval[32];
16950 int regno = REGNO (operands[opno]);
16952 gcc_assert (STACK_REGNO_P (regno));
16954 regno -= FIRST_STACK_REG;
16956 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16957 return retval;
16959 #endif
16961 return opno ? "fstp\t%y1" : "fstp\t%y0";
16965 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16966 should be used. UNORDERED_P is true when fucom should be used. */
16968 const char *
16969 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16971 int stack_top_dies;
16972 rtx cmp_op0, cmp_op1;
16973 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16975 if (eflags_p)
16977 cmp_op0 = operands[0];
16978 cmp_op1 = operands[1];
16980 else
16982 cmp_op0 = operands[1];
16983 cmp_op1 = operands[2];
16986 if (is_sse)
16988 if (GET_MODE (operands[0]) == SFmode)
16989 if (unordered_p)
16990 return "%vucomiss\t{%1, %0|%0, %1}";
16991 else
16992 return "%vcomiss\t{%1, %0|%0, %1}";
16993 else
16994 if (unordered_p)
16995 return "%vucomisd\t{%1, %0|%0, %1}";
16996 else
16997 return "%vcomisd\t{%1, %0|%0, %1}";
17000 gcc_assert (STACK_TOP_P (cmp_op0));
17002 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17004 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
17006 if (stack_top_dies)
17008 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
17009 return output_387_ffreep (operands, 1);
17011 else
17012 return "ftst\n\tfnstsw\t%0";
17015 if (STACK_REG_P (cmp_op1)
17016 && stack_top_dies
17017 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
17018 && REGNO (cmp_op1) != FIRST_STACK_REG)
17020 /* If both the top of the 387 stack dies, and the other operand
17021 is also a stack register that dies, then this must be a
17022 `fcompp' float compare */
17024 if (eflags_p)
17026 /* There is no double popping fcomi variant. Fortunately,
17027 eflags is immune from the fstp's cc clobbering. */
17028 if (unordered_p)
17029 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
17030 else
17031 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
17032 return output_387_ffreep (operands, 0);
17034 else
17036 if (unordered_p)
17037 return "fucompp\n\tfnstsw\t%0";
17038 else
17039 return "fcompp\n\tfnstsw\t%0";
17042 else
17044 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
17046 static const char * const alt[16] =
17048 "fcom%Z2\t%y2\n\tfnstsw\t%0",
17049 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
17050 "fucom%Z2\t%y2\n\tfnstsw\t%0",
17051 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
17053 "ficom%Z2\t%y2\n\tfnstsw\t%0",
17054 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
17055 NULL,
17056 NULL,
17058 "fcomi\t{%y1, %0|%0, %y1}",
17059 "fcomip\t{%y1, %0|%0, %y1}",
17060 "fucomi\t{%y1, %0|%0, %y1}",
17061 "fucomip\t{%y1, %0|%0, %y1}",
17063 NULL,
17064 NULL,
17065 NULL,
17066 NULL
17069 int mask;
17070 const char *ret;
17072 mask = eflags_p << 3;
17073 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
17074 mask |= unordered_p << 1;
17075 mask |= stack_top_dies;
17077 gcc_assert (mask < 16);
17078 ret = alt[mask];
17079 gcc_assert (ret);
17081 return ret;
17085 void
17086 ix86_output_addr_vec_elt (FILE *file, int value)
17088 const char *directive = ASM_LONG;
17090 #ifdef ASM_QUAD
17091 if (TARGET_LP64)
17092 directive = ASM_QUAD;
17093 #else
17094 gcc_assert (!TARGET_64BIT);
17095 #endif
17097 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
17100 void
17101 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
17103 const char *directive = ASM_LONG;
17105 #ifdef ASM_QUAD
17106 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
17107 directive = ASM_QUAD;
17108 #else
17109 gcc_assert (!TARGET_64BIT);
17110 #endif
17111 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
17112 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
17113 fprintf (file, "%s%s%d-%s%d\n",
17114 directive, LPREFIX, value, LPREFIX, rel);
17115 else if (HAVE_AS_GOTOFF_IN_DATA)
17116 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
17117 #if TARGET_MACHO
17118 else if (TARGET_MACHO)
17120 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
17121 machopic_output_function_base_name (file);
17122 putc ('\n', file);
17124 #endif
17125 else
17126 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
17127 GOT_SYMBOL_NAME, LPREFIX, value);
17130 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
17131 for the target. */
17133 void
17134 ix86_expand_clear (rtx dest)
17136 rtx tmp;
17138 /* We play register width games, which are only valid after reload. */
17139 gcc_assert (reload_completed);
17141 /* Avoid HImode and its attendant prefix byte. */
17142 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
17143 dest = gen_rtx_REG (SImode, REGNO (dest));
17144 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
17146 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
17148 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17149 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
17152 emit_insn (tmp);
17155 /* X is an unchanging MEM. If it is a constant pool reference, return
17156 the constant pool rtx, else NULL. */
17159 maybe_get_pool_constant (rtx x)
17161 x = ix86_delegitimize_address (XEXP (x, 0));
17163 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
17164 return get_pool_constant (x);
17166 return NULL_RTX;
17169 void
17170 ix86_expand_move (machine_mode mode, rtx operands[])
17172 rtx op0, op1;
17173 enum tls_model model;
17175 op0 = operands[0];
17176 op1 = operands[1];
17178 if (GET_CODE (op1) == SYMBOL_REF)
17180 rtx tmp;
17182 model = SYMBOL_REF_TLS_MODEL (op1);
17183 if (model)
17185 op1 = legitimize_tls_address (op1, model, true);
17186 op1 = force_operand (op1, op0);
17187 if (op1 == op0)
17188 return;
17189 op1 = convert_to_mode (mode, op1, 1);
17191 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
17192 op1 = tmp;
17194 else if (GET_CODE (op1) == CONST
17195 && GET_CODE (XEXP (op1, 0)) == PLUS
17196 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
17198 rtx addend = XEXP (XEXP (op1, 0), 1);
17199 rtx symbol = XEXP (XEXP (op1, 0), 0);
17200 rtx tmp;
17202 model = SYMBOL_REF_TLS_MODEL (symbol);
17203 if (model)
17204 tmp = legitimize_tls_address (symbol, model, true);
17205 else
17206 tmp = legitimize_pe_coff_symbol (symbol, true);
17208 if (tmp)
17210 tmp = force_operand (tmp, NULL);
17211 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
17212 op0, 1, OPTAB_DIRECT);
17213 if (tmp == op0)
17214 return;
17215 op1 = convert_to_mode (mode, tmp, 1);
17219 if ((flag_pic || MACHOPIC_INDIRECT)
17220 && symbolic_operand (op1, mode))
17222 if (TARGET_MACHO && !TARGET_64BIT)
17224 #if TARGET_MACHO
17225 /* dynamic-no-pic */
17226 if (MACHOPIC_INDIRECT)
17228 rtx temp = ((reload_in_progress
17229 || ((op0 && REG_P (op0))
17230 && mode == Pmode))
17231 ? op0 : gen_reg_rtx (Pmode));
17232 op1 = machopic_indirect_data_reference (op1, temp);
17233 if (MACHOPIC_PURE)
17234 op1 = machopic_legitimize_pic_address (op1, mode,
17235 temp == op1 ? 0 : temp);
17237 if (op0 != op1 && GET_CODE (op0) != MEM)
17239 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
17240 emit_insn (insn);
17241 return;
17243 if (GET_CODE (op0) == MEM)
17244 op1 = force_reg (Pmode, op1);
17245 else
17247 rtx temp = op0;
17248 if (GET_CODE (temp) != REG)
17249 temp = gen_reg_rtx (Pmode);
17250 temp = legitimize_pic_address (op1, temp);
17251 if (temp == op0)
17252 return;
17253 op1 = temp;
17255 /* dynamic-no-pic */
17256 #endif
17258 else
17260 if (MEM_P (op0))
17261 op1 = force_reg (mode, op1);
17262 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17264 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17265 op1 = legitimize_pic_address (op1, reg);
17266 if (op0 == op1)
17267 return;
17268 op1 = convert_to_mode (mode, op1, 1);
17272 else
17274 if (MEM_P (op0)
17275 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17276 || !push_operand (op0, mode))
17277 && MEM_P (op1))
17278 op1 = force_reg (mode, op1);
17280 if (push_operand (op0, mode)
17281 && ! general_no_elim_operand (op1, mode))
17282 op1 = copy_to_mode_reg (mode, op1);
17284 /* Force large constants in 64bit compilation into register
17285 to get them CSEed. */
17286 if (can_create_pseudo_p ()
17287 && (mode == DImode) && TARGET_64BIT
17288 && immediate_operand (op1, mode)
17289 && !x86_64_zext_immediate_operand (op1, VOIDmode)
17290 && !register_operand (op0, mode)
17291 && optimize)
17292 op1 = copy_to_mode_reg (mode, op1);
17294 if (can_create_pseudo_p ()
17295 && FLOAT_MODE_P (mode)
17296 && GET_CODE (op1) == CONST_DOUBLE)
17298 /* If we are loading a floating point constant to a register,
17299 force the value to memory now, since we'll get better code
17300 out the back end. */
17302 op1 = validize_mem (force_const_mem (mode, op1));
17303 if (!register_operand (op0, mode))
17305 rtx temp = gen_reg_rtx (mode);
17306 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
17307 emit_move_insn (op0, temp);
17308 return;
17313 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17316 void
17317 ix86_expand_vector_move (machine_mode mode, rtx operands[])
17319 rtx op0 = operands[0], op1 = operands[1];
17320 unsigned int align = GET_MODE_ALIGNMENT (mode);
17322 if (push_operand (op0, VOIDmode))
17323 op0 = emit_move_resolve_push (mode, op0);
17325 /* Force constants other than zero into memory. We do not know how
17326 the instructions used to build constants modify the upper 64 bits
17327 of the register, once we have that information we may be able
17328 to handle some of them more efficiently. */
17329 if (can_create_pseudo_p ()
17330 && register_operand (op0, mode)
17331 && (CONSTANT_P (op1)
17332 || (GET_CODE (op1) == SUBREG
17333 && CONSTANT_P (SUBREG_REG (op1))))
17334 && !standard_sse_constant_p (op1))
17335 op1 = validize_mem (force_const_mem (mode, op1));
17337 /* We need to check memory alignment for SSE mode since attribute
17338 can make operands unaligned. */
17339 if (can_create_pseudo_p ()
17340 && SSE_REG_MODE_P (mode)
17341 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17342 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17344 rtx tmp[2];
17346 /* ix86_expand_vector_move_misalign() does not like constants ... */
17347 if (CONSTANT_P (op1)
17348 || (GET_CODE (op1) == SUBREG
17349 && CONSTANT_P (SUBREG_REG (op1))))
17350 op1 = validize_mem (force_const_mem (mode, op1));
17352 /* ... nor both arguments in memory. */
17353 if (!register_operand (op0, mode)
17354 && !register_operand (op1, mode))
17355 op1 = force_reg (mode, op1);
17357 tmp[0] = op0; tmp[1] = op1;
17358 ix86_expand_vector_move_misalign (mode, tmp);
17359 return;
17362 /* Make operand1 a register if it isn't already. */
17363 if (can_create_pseudo_p ()
17364 && !register_operand (op0, mode)
17365 && !register_operand (op1, mode))
17367 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17368 return;
17371 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17374 /* Split 32-byte AVX unaligned load and store if needed. */
17376 static void
17377 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17379 rtx m;
17380 rtx (*extract) (rtx, rtx, rtx);
17381 rtx (*load_unaligned) (rtx, rtx);
17382 rtx (*store_unaligned) (rtx, rtx);
17383 machine_mode mode;
17385 switch (GET_MODE (op0))
17387 default:
17388 gcc_unreachable ();
17389 case V32QImode:
17390 extract = gen_avx_vextractf128v32qi;
17391 load_unaligned = gen_avx_loaddquv32qi;
17392 store_unaligned = gen_avx_storedquv32qi;
17393 mode = V16QImode;
17394 break;
17395 case V8SFmode:
17396 extract = gen_avx_vextractf128v8sf;
17397 load_unaligned = gen_avx_loadups256;
17398 store_unaligned = gen_avx_storeups256;
17399 mode = V4SFmode;
17400 break;
17401 case V4DFmode:
17402 extract = gen_avx_vextractf128v4df;
17403 load_unaligned = gen_avx_loadupd256;
17404 store_unaligned = gen_avx_storeupd256;
17405 mode = V2DFmode;
17406 break;
17409 if (MEM_P (op1))
17411 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
17413 rtx r = gen_reg_rtx (mode);
17414 m = adjust_address (op1, mode, 0);
17415 emit_move_insn (r, m);
17416 m = adjust_address (op1, mode, 16);
17417 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17418 emit_move_insn (op0, r);
17420 /* Normal *mov<mode>_internal pattern will handle
17421 unaligned loads just fine if misaligned_operand
17422 is true, and without the UNSPEC it can be combined
17423 with arithmetic instructions. */
17424 else if (misaligned_operand (op1, GET_MODE (op1)))
17425 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17426 else
17427 emit_insn (load_unaligned (op0, op1));
17429 else if (MEM_P (op0))
17431 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
17433 m = adjust_address (op0, mode, 0);
17434 emit_insn (extract (m, op1, const0_rtx));
17435 m = adjust_address (op0, mode, 16);
17436 emit_insn (extract (m, op1, const1_rtx));
17438 else
17439 emit_insn (store_unaligned (op0, op1));
17441 else
17442 gcc_unreachable ();
17445 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17446 straight to ix86_expand_vector_move. */
17447 /* Code generation for scalar reg-reg moves of single and double precision data:
17448 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17449 movaps reg, reg
17450 else
17451 movss reg, reg
17452 if (x86_sse_partial_reg_dependency == true)
17453 movapd reg, reg
17454 else
17455 movsd reg, reg
17457 Code generation for scalar loads of double precision data:
17458 if (x86_sse_split_regs == true)
17459 movlpd mem, reg (gas syntax)
17460 else
17461 movsd mem, reg
17463 Code generation for unaligned packed loads of single precision data
17464 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17465 if (x86_sse_unaligned_move_optimal)
17466 movups mem, reg
17468 if (x86_sse_partial_reg_dependency == true)
17470 xorps reg, reg
17471 movlps mem, reg
17472 movhps mem+8, reg
17474 else
17476 movlps mem, reg
17477 movhps mem+8, reg
17480 Code generation for unaligned packed loads of double precision data
17481 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17482 if (x86_sse_unaligned_move_optimal)
17483 movupd mem, reg
17485 if (x86_sse_split_regs == true)
17487 movlpd mem, reg
17488 movhpd mem+8, reg
17490 else
17492 movsd mem, reg
17493 movhpd mem+8, reg
17497 void
17498 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
17500 rtx op0, op1, orig_op0 = NULL_RTX, m;
17501 rtx (*load_unaligned) (rtx, rtx);
17502 rtx (*store_unaligned) (rtx, rtx);
17504 op0 = operands[0];
17505 op1 = operands[1];
17507 if (GET_MODE_SIZE (mode) == 64)
17509 switch (GET_MODE_CLASS (mode))
17511 case MODE_VECTOR_INT:
17512 case MODE_INT:
17513 if (GET_MODE (op0) != V16SImode)
17515 if (!MEM_P (op0))
17517 orig_op0 = op0;
17518 op0 = gen_reg_rtx (V16SImode);
17520 else
17521 op0 = gen_lowpart (V16SImode, op0);
17523 op1 = gen_lowpart (V16SImode, op1);
17524 /* FALLTHRU */
17526 case MODE_VECTOR_FLOAT:
17527 switch (GET_MODE (op0))
17529 default:
17530 gcc_unreachable ();
17531 case V16SImode:
17532 load_unaligned = gen_avx512f_loaddquv16si;
17533 store_unaligned = gen_avx512f_storedquv16si;
17534 break;
17535 case V16SFmode:
17536 load_unaligned = gen_avx512f_loadups512;
17537 store_unaligned = gen_avx512f_storeups512;
17538 break;
17539 case V8DFmode:
17540 load_unaligned = gen_avx512f_loadupd512;
17541 store_unaligned = gen_avx512f_storeupd512;
17542 break;
17545 if (MEM_P (op1))
17546 emit_insn (load_unaligned (op0, op1));
17547 else if (MEM_P (op0))
17548 emit_insn (store_unaligned (op0, op1));
17549 else
17550 gcc_unreachable ();
17551 if (orig_op0)
17552 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17553 break;
17555 default:
17556 gcc_unreachable ();
17559 return;
17562 if (TARGET_AVX
17563 && GET_MODE_SIZE (mode) == 32)
17565 switch (GET_MODE_CLASS (mode))
17567 case MODE_VECTOR_INT:
17568 case MODE_INT:
17569 if (GET_MODE (op0) != V32QImode)
17571 if (!MEM_P (op0))
17573 orig_op0 = op0;
17574 op0 = gen_reg_rtx (V32QImode);
17576 else
17577 op0 = gen_lowpart (V32QImode, op0);
17579 op1 = gen_lowpart (V32QImode, op1);
17580 /* FALLTHRU */
17582 case MODE_VECTOR_FLOAT:
17583 ix86_avx256_split_vector_move_misalign (op0, op1);
17584 if (orig_op0)
17585 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17586 break;
17588 default:
17589 gcc_unreachable ();
17592 return;
17595 if (MEM_P (op1))
17597 /* Normal *mov<mode>_internal pattern will handle
17598 unaligned loads just fine if misaligned_operand
17599 is true, and without the UNSPEC it can be combined
17600 with arithmetic instructions. */
17601 if (TARGET_AVX
17602 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17603 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17604 && misaligned_operand (op1, GET_MODE (op1)))
17605 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17606 /* ??? If we have typed data, then it would appear that using
17607 movdqu is the only way to get unaligned data loaded with
17608 integer type. */
17609 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17611 if (GET_MODE (op0) != V16QImode)
17613 orig_op0 = op0;
17614 op0 = gen_reg_rtx (V16QImode);
17616 op1 = gen_lowpart (V16QImode, op1);
17617 /* We will eventually emit movups based on insn attributes. */
17618 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17619 if (orig_op0)
17620 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17622 else if (TARGET_SSE2 && mode == V2DFmode)
17624 rtx zero;
17626 if (TARGET_AVX
17627 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17628 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17629 || optimize_insn_for_size_p ())
17631 /* We will eventually emit movups based on insn attributes. */
17632 emit_insn (gen_sse2_loadupd (op0, op1));
17633 return;
17636 /* When SSE registers are split into halves, we can avoid
17637 writing to the top half twice. */
17638 if (TARGET_SSE_SPLIT_REGS)
17640 emit_clobber (op0);
17641 zero = op0;
17643 else
17645 /* ??? Not sure about the best option for the Intel chips.
17646 The following would seem to satisfy; the register is
17647 entirely cleared, breaking the dependency chain. We
17648 then store to the upper half, with a dependency depth
17649 of one. A rumor has it that Intel recommends two movsd
17650 followed by an unpacklpd, but this is unconfirmed. And
17651 given that the dependency depth of the unpacklpd would
17652 still be one, I'm not sure why this would be better. */
17653 zero = CONST0_RTX (V2DFmode);
17656 m = adjust_address (op1, DFmode, 0);
17657 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17658 m = adjust_address (op1, DFmode, 8);
17659 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17661 else
17663 rtx t;
17665 if (TARGET_AVX
17666 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17667 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17668 || optimize_insn_for_size_p ())
17670 if (GET_MODE (op0) != V4SFmode)
17672 orig_op0 = op0;
17673 op0 = gen_reg_rtx (V4SFmode);
17675 op1 = gen_lowpart (V4SFmode, op1);
17676 emit_insn (gen_sse_loadups (op0, op1));
17677 if (orig_op0)
17678 emit_move_insn (orig_op0,
17679 gen_lowpart (GET_MODE (orig_op0), op0));
17680 return;
17683 if (mode != V4SFmode)
17684 t = gen_reg_rtx (V4SFmode);
17685 else
17686 t = op0;
17688 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17689 emit_move_insn (t, CONST0_RTX (V4SFmode));
17690 else
17691 emit_clobber (t);
17693 m = adjust_address (op1, V2SFmode, 0);
17694 emit_insn (gen_sse_loadlps (t, t, m));
17695 m = adjust_address (op1, V2SFmode, 8);
17696 emit_insn (gen_sse_loadhps (t, t, m));
17697 if (mode != V4SFmode)
17698 emit_move_insn (op0, gen_lowpart (mode, t));
17701 else if (MEM_P (op0))
17703 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17705 op0 = gen_lowpart (V16QImode, op0);
17706 op1 = gen_lowpart (V16QImode, op1);
17707 /* We will eventually emit movups based on insn attributes. */
17708 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17710 else if (TARGET_SSE2 && mode == V2DFmode)
17712 if (TARGET_AVX
17713 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17714 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17715 || optimize_insn_for_size_p ())
17716 /* We will eventually emit movups based on insn attributes. */
17717 emit_insn (gen_sse2_storeupd (op0, op1));
17718 else
17720 m = adjust_address (op0, DFmode, 0);
17721 emit_insn (gen_sse2_storelpd (m, op1));
17722 m = adjust_address (op0, DFmode, 8);
17723 emit_insn (gen_sse2_storehpd (m, op1));
17726 else
17728 if (mode != V4SFmode)
17729 op1 = gen_lowpart (V4SFmode, op1);
17731 if (TARGET_AVX
17732 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17733 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17734 || optimize_insn_for_size_p ())
17736 op0 = gen_lowpart (V4SFmode, op0);
17737 emit_insn (gen_sse_storeups (op0, op1));
17739 else
17741 m = adjust_address (op0, V2SFmode, 0);
17742 emit_insn (gen_sse_storelps (m, op1));
17743 m = adjust_address (op0, V2SFmode, 8);
17744 emit_insn (gen_sse_storehps (m, op1));
17748 else
17749 gcc_unreachable ();
17752 /* Helper function of ix86_fixup_binary_operands to canonicalize
17753 operand order. Returns true if the operands should be swapped. */
17755 static bool
17756 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
17757 rtx operands[])
17759 rtx dst = operands[0];
17760 rtx src1 = operands[1];
17761 rtx src2 = operands[2];
17763 /* If the operation is not commutative, we can't do anything. */
17764 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17765 return false;
17767 /* Highest priority is that src1 should match dst. */
17768 if (rtx_equal_p (dst, src1))
17769 return false;
17770 if (rtx_equal_p (dst, src2))
17771 return true;
17773 /* Next highest priority is that immediate constants come second. */
17774 if (immediate_operand (src2, mode))
17775 return false;
17776 if (immediate_operand (src1, mode))
17777 return true;
17779 /* Lowest priority is that memory references should come second. */
17780 if (MEM_P (src2))
17781 return false;
17782 if (MEM_P (src1))
17783 return true;
17785 return false;
17789 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17790 destination to use for the operation. If different from the true
17791 destination in operands[0], a copy operation will be required. */
17794 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
17795 rtx operands[])
17797 rtx dst = operands[0];
17798 rtx src1 = operands[1];
17799 rtx src2 = operands[2];
17801 /* Canonicalize operand order. */
17802 if (ix86_swap_binary_operands_p (code, mode, operands))
17804 /* It is invalid to swap operands of different modes. */
17805 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17807 std::swap (src1, src2);
17810 /* Both source operands cannot be in memory. */
17811 if (MEM_P (src1) && MEM_P (src2))
17813 /* Optimization: Only read from memory once. */
17814 if (rtx_equal_p (src1, src2))
17816 src2 = force_reg (mode, src2);
17817 src1 = src2;
17819 else if (rtx_equal_p (dst, src1))
17820 src2 = force_reg (mode, src2);
17821 else
17822 src1 = force_reg (mode, src1);
17825 /* If the destination is memory, and we do not have matching source
17826 operands, do things in registers. */
17827 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17828 dst = gen_reg_rtx (mode);
17830 /* Source 1 cannot be a constant. */
17831 if (CONSTANT_P (src1))
17832 src1 = force_reg (mode, src1);
17834 /* Source 1 cannot be a non-matching memory. */
17835 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17836 src1 = force_reg (mode, src1);
17838 /* Improve address combine. */
17839 if (code == PLUS
17840 && GET_MODE_CLASS (mode) == MODE_INT
17841 && MEM_P (src2))
17842 src2 = force_reg (mode, src2);
17844 operands[1] = src1;
17845 operands[2] = src2;
17846 return dst;
17849 /* Similarly, but assume that the destination has already been
17850 set up properly. */
17852 void
17853 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17854 machine_mode mode, rtx operands[])
17856 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17857 gcc_assert (dst == operands[0]);
17860 /* Attempt to expand a binary operator. Make the expansion closer to the
17861 actual machine, then just general_operand, which will allow 3 separate
17862 memory references (one output, two input) in a single insn. */
17864 void
17865 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
17866 rtx operands[])
17868 rtx src1, src2, dst, op, clob;
17870 dst = ix86_fixup_binary_operands (code, mode, operands);
17871 src1 = operands[1];
17872 src2 = operands[2];
17874 /* Emit the instruction. */
17876 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17877 if (reload_in_progress)
17879 /* Reload doesn't know about the flags register, and doesn't know that
17880 it doesn't want to clobber it. We can only do this with PLUS. */
17881 gcc_assert (code == PLUS);
17882 emit_insn (op);
17884 else if (reload_completed
17885 && code == PLUS
17886 && !rtx_equal_p (dst, src1))
17888 /* This is going to be an LEA; avoid splitting it later. */
17889 emit_insn (op);
17891 else
17893 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17894 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17897 /* Fix up the destination if needed. */
17898 if (dst != operands[0])
17899 emit_move_insn (operands[0], dst);
17902 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17903 the given OPERANDS. */
17905 void
17906 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
17907 rtx operands[])
17909 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17910 if (GET_CODE (operands[1]) == SUBREG)
17912 op1 = operands[1];
17913 op2 = operands[2];
17915 else if (GET_CODE (operands[2]) == SUBREG)
17917 op1 = operands[2];
17918 op2 = operands[1];
17920 /* Optimize (__m128i) d | (__m128i) e and similar code
17921 when d and e are float vectors into float vector logical
17922 insn. In C/C++ without using intrinsics there is no other way
17923 to express vector logical operation on float vectors than
17924 to cast them temporarily to integer vectors. */
17925 if (op1
17926 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17927 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17928 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17929 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17930 && SUBREG_BYTE (op1) == 0
17931 && (GET_CODE (op2) == CONST_VECTOR
17932 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17933 && SUBREG_BYTE (op2) == 0))
17934 && can_create_pseudo_p ())
17936 rtx dst;
17937 switch (GET_MODE (SUBREG_REG (op1)))
17939 case V4SFmode:
17940 case V8SFmode:
17941 case V16SFmode:
17942 case V2DFmode:
17943 case V4DFmode:
17944 case V8DFmode:
17945 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17946 if (GET_CODE (op2) == CONST_VECTOR)
17948 op2 = gen_lowpart (GET_MODE (dst), op2);
17949 op2 = force_reg (GET_MODE (dst), op2);
17951 else
17953 op1 = operands[1];
17954 op2 = SUBREG_REG (operands[2]);
17955 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17956 op2 = force_reg (GET_MODE (dst), op2);
17958 op1 = SUBREG_REG (op1);
17959 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17960 op1 = force_reg (GET_MODE (dst), op1);
17961 emit_insn (gen_rtx_SET (VOIDmode, dst,
17962 gen_rtx_fmt_ee (code, GET_MODE (dst),
17963 op1, op2)));
17964 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17965 return;
17966 default:
17967 break;
17970 if (!nonimmediate_operand (operands[1], mode))
17971 operands[1] = force_reg (mode, operands[1]);
17972 if (!nonimmediate_operand (operands[2], mode))
17973 operands[2] = force_reg (mode, operands[2]);
17974 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17975 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17976 gen_rtx_fmt_ee (code, mode, operands[1],
17977 operands[2])));
17980 /* Return TRUE or FALSE depending on whether the binary operator meets the
17981 appropriate constraints. */
17983 bool
17984 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
17985 rtx operands[3])
17987 rtx dst = operands[0];
17988 rtx src1 = operands[1];
17989 rtx src2 = operands[2];
17991 /* Both source operands cannot be in memory. */
17992 if (MEM_P (src1) && MEM_P (src2))
17993 return false;
17995 /* Canonicalize operand order for commutative operators. */
17996 if (ix86_swap_binary_operands_p (code, mode, operands))
17997 std::swap (src1, src2);
17999 /* If the destination is memory, we must have a matching source operand. */
18000 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18001 return false;
18003 /* Source 1 cannot be a constant. */
18004 if (CONSTANT_P (src1))
18005 return false;
18007 /* Source 1 cannot be a non-matching memory. */
18008 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18009 /* Support "andhi/andsi/anddi" as a zero-extending move. */
18010 return (code == AND
18011 && (mode == HImode
18012 || mode == SImode
18013 || (TARGET_64BIT && mode == DImode))
18014 && satisfies_constraint_L (src2));
18016 return true;
18019 /* Attempt to expand a unary operator. Make the expansion closer to the
18020 actual machine, then just general_operand, which will allow 2 separate
18021 memory references (one output, one input) in a single insn. */
18023 void
18024 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
18025 rtx operands[])
18027 int matching_memory;
18028 rtx src, dst, op, clob;
18030 dst = operands[0];
18031 src = operands[1];
18033 /* If the destination is memory, and we do not have matching source
18034 operands, do things in registers. */
18035 matching_memory = 0;
18036 if (MEM_P (dst))
18038 if (rtx_equal_p (dst, src))
18039 matching_memory = 1;
18040 else
18041 dst = gen_reg_rtx (mode);
18044 /* When source operand is memory, destination must match. */
18045 if (MEM_P (src) && !matching_memory)
18046 src = force_reg (mode, src);
18048 /* Emit the instruction. */
18050 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
18051 if (reload_in_progress || code == NOT)
18053 /* Reload doesn't know about the flags register, and doesn't know that
18054 it doesn't want to clobber it. */
18055 gcc_assert (code == NOT);
18056 emit_insn (op);
18058 else
18060 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18061 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18064 /* Fix up the destination if needed. */
18065 if (dst != operands[0])
18066 emit_move_insn (operands[0], dst);
18069 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
18070 divisor are within the range [0-255]. */
18072 void
18073 ix86_split_idivmod (machine_mode mode, rtx operands[],
18074 bool signed_p)
18076 rtx_code_label *end_label, *qimode_label;
18077 rtx insn, div, mod;
18078 rtx scratch, tmp0, tmp1, tmp2;
18079 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
18080 rtx (*gen_zero_extend) (rtx, rtx);
18081 rtx (*gen_test_ccno_1) (rtx, rtx);
18083 switch (mode)
18085 case SImode:
18086 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
18087 gen_test_ccno_1 = gen_testsi_ccno_1;
18088 gen_zero_extend = gen_zero_extendqisi2;
18089 break;
18090 case DImode:
18091 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
18092 gen_test_ccno_1 = gen_testdi_ccno_1;
18093 gen_zero_extend = gen_zero_extendqidi2;
18094 break;
18095 default:
18096 gcc_unreachable ();
18099 end_label = gen_label_rtx ();
18100 qimode_label = gen_label_rtx ();
18102 scratch = gen_reg_rtx (mode);
18104 /* Use 8bit unsigned divimod if dividend and divisor are within
18105 the range [0-255]. */
18106 emit_move_insn (scratch, operands[2]);
18107 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
18108 scratch, 1, OPTAB_DIRECT);
18109 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
18110 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
18111 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
18112 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
18113 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
18114 pc_rtx);
18115 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
18116 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18117 JUMP_LABEL (insn) = qimode_label;
18119 /* Generate original signed/unsigned divimod. */
18120 div = gen_divmod4_1 (operands[0], operands[1],
18121 operands[2], operands[3]);
18122 emit_insn (div);
18124 /* Branch to the end. */
18125 emit_jump_insn (gen_jump (end_label));
18126 emit_barrier ();
18128 /* Generate 8bit unsigned divide. */
18129 emit_label (qimode_label);
18130 /* Don't use operands[0] for result of 8bit divide since not all
18131 registers support QImode ZERO_EXTRACT. */
18132 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
18133 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
18134 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
18135 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
18137 if (signed_p)
18139 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
18140 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
18142 else
18144 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
18145 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
18148 /* Extract remainder from AH. */
18149 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
18150 if (REG_P (operands[1]))
18151 insn = emit_move_insn (operands[1], tmp1);
18152 else
18154 /* Need a new scratch register since the old one has result
18155 of 8bit divide. */
18156 scratch = gen_reg_rtx (mode);
18157 emit_move_insn (scratch, tmp1);
18158 insn = emit_move_insn (operands[1], scratch);
18160 set_unique_reg_note (insn, REG_EQUAL, mod);
18162 /* Zero extend quotient from AL. */
18163 tmp1 = gen_lowpart (QImode, tmp0);
18164 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
18165 set_unique_reg_note (insn, REG_EQUAL, div);
18167 emit_label (end_label);
18170 #define LEA_MAX_STALL (3)
18171 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
18173 /* Increase given DISTANCE in half-cycles according to
18174 dependencies between PREV and NEXT instructions.
18175 Add 1 half-cycle if there is no dependency and
18176 go to next cycle if there is some dependecy. */
18178 static unsigned int
18179 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
18181 df_ref def, use;
18183 if (!prev || !next)
18184 return distance + (distance & 1) + 2;
18186 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
18187 return distance + 1;
18189 FOR_EACH_INSN_USE (use, next)
18190 FOR_EACH_INSN_DEF (def, prev)
18191 if (!DF_REF_IS_ARTIFICIAL (def)
18192 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
18193 return distance + (distance & 1) + 2;
18195 return distance + 1;
18198 /* Function checks if instruction INSN defines register number
18199 REGNO1 or REGNO2. */
18201 static bool
18202 insn_defines_reg (unsigned int regno1, unsigned int regno2,
18203 rtx insn)
18205 df_ref def;
18207 FOR_EACH_INSN_DEF (def, insn)
18208 if (DF_REF_REG_DEF_P (def)
18209 && !DF_REF_IS_ARTIFICIAL (def)
18210 && (regno1 == DF_REF_REGNO (def)
18211 || regno2 == DF_REF_REGNO (def)))
18212 return true;
18214 return false;
18217 /* Function checks if instruction INSN uses register number
18218 REGNO as a part of address expression. */
18220 static bool
18221 insn_uses_reg_mem (unsigned int regno, rtx insn)
18223 df_ref use;
18225 FOR_EACH_INSN_USE (use, insn)
18226 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
18227 return true;
18229 return false;
18232 /* Search backward for non-agu definition of register number REGNO1
18233 or register number REGNO2 in basic block starting from instruction
18234 START up to head of basic block or instruction INSN.
18236 Function puts true value into *FOUND var if definition was found
18237 and false otherwise.
18239 Distance in half-cycles between START and found instruction or head
18240 of BB is added to DISTANCE and returned. */
18242 static int
18243 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18244 rtx_insn *insn, int distance,
18245 rtx_insn *start, bool *found)
18247 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18248 rtx_insn *prev = start;
18249 rtx_insn *next = NULL;
18251 *found = false;
18253 while (prev
18254 && prev != insn
18255 && distance < LEA_SEARCH_THRESHOLD)
18257 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18259 distance = increase_distance (prev, next, distance);
18260 if (insn_defines_reg (regno1, regno2, prev))
18262 if (recog_memoized (prev) < 0
18263 || get_attr_type (prev) != TYPE_LEA)
18265 *found = true;
18266 return distance;
18270 next = prev;
18272 if (prev == BB_HEAD (bb))
18273 break;
18275 prev = PREV_INSN (prev);
18278 return distance;
18281 /* Search backward for non-agu definition of register number REGNO1
18282 or register number REGNO2 in INSN's basic block until
18283 1. Pass LEA_SEARCH_THRESHOLD instructions, or
18284 2. Reach neighbour BBs boundary, or
18285 3. Reach agu definition.
18286 Returns the distance between the non-agu definition point and INSN.
18287 If no definition point, returns -1. */
18289 static int
18290 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18291 rtx_insn *insn)
18293 basic_block bb = BLOCK_FOR_INSN (insn);
18294 int distance = 0;
18295 bool found = false;
18297 if (insn != BB_HEAD (bb))
18298 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18299 distance, PREV_INSN (insn),
18300 &found);
18302 if (!found && distance < LEA_SEARCH_THRESHOLD)
18304 edge e;
18305 edge_iterator ei;
18306 bool simple_loop = false;
18308 FOR_EACH_EDGE (e, ei, bb->preds)
18309 if (e->src == bb)
18311 simple_loop = true;
18312 break;
18315 if (simple_loop)
18316 distance = distance_non_agu_define_in_bb (regno1, regno2,
18317 insn, distance,
18318 BB_END (bb), &found);
18319 else
18321 int shortest_dist = -1;
18322 bool found_in_bb = false;
18324 FOR_EACH_EDGE (e, ei, bb->preds)
18326 int bb_dist
18327 = distance_non_agu_define_in_bb (regno1, regno2,
18328 insn, distance,
18329 BB_END (e->src),
18330 &found_in_bb);
18331 if (found_in_bb)
18333 if (shortest_dist < 0)
18334 shortest_dist = bb_dist;
18335 else if (bb_dist > 0)
18336 shortest_dist = MIN (bb_dist, shortest_dist);
18338 found = true;
18342 distance = shortest_dist;
18346 /* get_attr_type may modify recog data. We want to make sure
18347 that recog data is valid for instruction INSN, on which
18348 distance_non_agu_define is called. INSN is unchanged here. */
18349 extract_insn_cached (insn);
18351 if (!found)
18352 return -1;
18354 return distance >> 1;
18357 /* Return the distance in half-cycles between INSN and the next
18358 insn that uses register number REGNO in memory address added
18359 to DISTANCE. Return -1 if REGNO0 is set.
18361 Put true value into *FOUND if register usage was found and
18362 false otherwise.
18363 Put true value into *REDEFINED if register redefinition was
18364 found and false otherwise. */
18366 static int
18367 distance_agu_use_in_bb (unsigned int regno,
18368 rtx_insn *insn, int distance, rtx_insn *start,
18369 bool *found, bool *redefined)
18371 basic_block bb = NULL;
18372 rtx_insn *next = start;
18373 rtx_insn *prev = NULL;
18375 *found = false;
18376 *redefined = false;
18378 if (start != NULL_RTX)
18380 bb = BLOCK_FOR_INSN (start);
18381 if (start != BB_HEAD (bb))
18382 /* If insn and start belong to the same bb, set prev to insn,
18383 so the call to increase_distance will increase the distance
18384 between insns by 1. */
18385 prev = insn;
18388 while (next
18389 && next != insn
18390 && distance < LEA_SEARCH_THRESHOLD)
18392 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18394 distance = increase_distance(prev, next, distance);
18395 if (insn_uses_reg_mem (regno, next))
18397 /* Return DISTANCE if OP0 is used in memory
18398 address in NEXT. */
18399 *found = true;
18400 return distance;
18403 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18405 /* Return -1 if OP0 is set in NEXT. */
18406 *redefined = true;
18407 return -1;
18410 prev = next;
18413 if (next == BB_END (bb))
18414 break;
18416 next = NEXT_INSN (next);
18419 return distance;
18422 /* Return the distance between INSN and the next insn that uses
18423 register number REGNO0 in memory address. Return -1 if no such
18424 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18426 static int
18427 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18429 basic_block bb = BLOCK_FOR_INSN (insn);
18430 int distance = 0;
18431 bool found = false;
18432 bool redefined = false;
18434 if (insn != BB_END (bb))
18435 distance = distance_agu_use_in_bb (regno0, insn, distance,
18436 NEXT_INSN (insn),
18437 &found, &redefined);
18439 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18441 edge e;
18442 edge_iterator ei;
18443 bool simple_loop = false;
18445 FOR_EACH_EDGE (e, ei, bb->succs)
18446 if (e->dest == bb)
18448 simple_loop = true;
18449 break;
18452 if (simple_loop)
18453 distance = distance_agu_use_in_bb (regno0, insn,
18454 distance, BB_HEAD (bb),
18455 &found, &redefined);
18456 else
18458 int shortest_dist = -1;
18459 bool found_in_bb = false;
18460 bool redefined_in_bb = false;
18462 FOR_EACH_EDGE (e, ei, bb->succs)
18464 int bb_dist
18465 = distance_agu_use_in_bb (regno0, insn,
18466 distance, BB_HEAD (e->dest),
18467 &found_in_bb, &redefined_in_bb);
18468 if (found_in_bb)
18470 if (shortest_dist < 0)
18471 shortest_dist = bb_dist;
18472 else if (bb_dist > 0)
18473 shortest_dist = MIN (bb_dist, shortest_dist);
18475 found = true;
18479 distance = shortest_dist;
18483 if (!found || redefined)
18484 return -1;
18486 return distance >> 1;
18489 /* Define this macro to tune LEA priority vs ADD, it take effect when
18490 there is a dilemma of choicing LEA or ADD
18491 Negative value: ADD is more preferred than LEA
18492 Zero: Netrual
18493 Positive value: LEA is more preferred than ADD*/
18494 #define IX86_LEA_PRIORITY 0
18496 /* Return true if usage of lea INSN has performance advantage
18497 over a sequence of instructions. Instructions sequence has
18498 SPLIT_COST cycles higher latency than lea latency. */
18500 static bool
18501 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18502 unsigned int regno2, int split_cost, bool has_scale)
18504 int dist_define, dist_use;
18506 /* For Silvermont if using a 2-source or 3-source LEA for
18507 non-destructive destination purposes, or due to wanting
18508 ability to use SCALE, the use of LEA is justified. */
18509 if (TARGET_SILVERMONT || TARGET_INTEL)
18511 if (has_scale)
18512 return true;
18513 if (split_cost < 1)
18514 return false;
18515 if (regno0 == regno1 || regno0 == regno2)
18516 return false;
18517 return true;
18520 dist_define = distance_non_agu_define (regno1, regno2, insn);
18521 dist_use = distance_agu_use (regno0, insn);
18523 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18525 /* If there is no non AGU operand definition, no AGU
18526 operand usage and split cost is 0 then both lea
18527 and non lea variants have same priority. Currently
18528 we prefer lea for 64 bit code and non lea on 32 bit
18529 code. */
18530 if (dist_use < 0 && split_cost == 0)
18531 return TARGET_64BIT || IX86_LEA_PRIORITY;
18532 else
18533 return true;
18536 /* With longer definitions distance lea is more preferable.
18537 Here we change it to take into account splitting cost and
18538 lea priority. */
18539 dist_define += split_cost + IX86_LEA_PRIORITY;
18541 /* If there is no use in memory addess then we just check
18542 that split cost exceeds AGU stall. */
18543 if (dist_use < 0)
18544 return dist_define > LEA_MAX_STALL;
18546 /* If this insn has both backward non-agu dependence and forward
18547 agu dependence, the one with short distance takes effect. */
18548 return dist_define >= dist_use;
18551 /* Return true if it is legal to clobber flags by INSN and
18552 false otherwise. */
18554 static bool
18555 ix86_ok_to_clobber_flags (rtx_insn *insn)
18557 basic_block bb = BLOCK_FOR_INSN (insn);
18558 df_ref use;
18559 bitmap live;
18561 while (insn)
18563 if (NONDEBUG_INSN_P (insn))
18565 FOR_EACH_INSN_USE (use, insn)
18566 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18567 return false;
18569 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18570 return true;
18573 if (insn == BB_END (bb))
18574 break;
18576 insn = NEXT_INSN (insn);
18579 live = df_get_live_out(bb);
18580 return !REGNO_REG_SET_P (live, FLAGS_REG);
18583 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18584 move and add to avoid AGU stalls. */
18586 bool
18587 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18589 unsigned int regno0, regno1, regno2;
18591 /* Check if we need to optimize. */
18592 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18593 return false;
18595 /* Check it is correct to split here. */
18596 if (!ix86_ok_to_clobber_flags(insn))
18597 return false;
18599 regno0 = true_regnum (operands[0]);
18600 regno1 = true_regnum (operands[1]);
18601 regno2 = true_regnum (operands[2]);
18603 /* We need to split only adds with non destructive
18604 destination operand. */
18605 if (regno0 == regno1 || regno0 == regno2)
18606 return false;
18607 else
18608 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18611 /* Return true if we should emit lea instruction instead of mov
18612 instruction. */
18614 bool
18615 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18617 unsigned int regno0, regno1;
18619 /* Check if we need to optimize. */
18620 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18621 return false;
18623 /* Use lea for reg to reg moves only. */
18624 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18625 return false;
18627 regno0 = true_regnum (operands[0]);
18628 regno1 = true_regnum (operands[1]);
18630 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18633 /* Return true if we need to split lea into a sequence of
18634 instructions to avoid AGU stalls. */
18636 bool
18637 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18639 unsigned int regno0, regno1, regno2;
18640 int split_cost;
18641 struct ix86_address parts;
18642 int ok;
18644 /* Check we need to optimize. */
18645 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18646 return false;
18648 /* The "at least two components" test below might not catch simple
18649 move or zero extension insns if parts.base is non-NULL and parts.disp
18650 is const0_rtx as the only components in the address, e.g. if the
18651 register is %rbp or %r13. As this test is much cheaper and moves or
18652 zero extensions are the common case, do this check first. */
18653 if (REG_P (operands[1])
18654 || (SImode_address_operand (operands[1], VOIDmode)
18655 && REG_P (XEXP (operands[1], 0))))
18656 return false;
18658 /* Check if it is OK to split here. */
18659 if (!ix86_ok_to_clobber_flags (insn))
18660 return false;
18662 ok = ix86_decompose_address (operands[1], &parts);
18663 gcc_assert (ok);
18665 /* There should be at least two components in the address. */
18666 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18667 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18668 return false;
18670 /* We should not split into add if non legitimate pic
18671 operand is used as displacement. */
18672 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18673 return false;
18675 regno0 = true_regnum (operands[0]) ;
18676 regno1 = INVALID_REGNUM;
18677 regno2 = INVALID_REGNUM;
18679 if (parts.base)
18680 regno1 = true_regnum (parts.base);
18681 if (parts.index)
18682 regno2 = true_regnum (parts.index);
18684 split_cost = 0;
18686 /* Compute how many cycles we will add to execution time
18687 if split lea into a sequence of instructions. */
18688 if (parts.base || parts.index)
18690 /* Have to use mov instruction if non desctructive
18691 destination form is used. */
18692 if (regno1 != regno0 && regno2 != regno0)
18693 split_cost += 1;
18695 /* Have to add index to base if both exist. */
18696 if (parts.base && parts.index)
18697 split_cost += 1;
18699 /* Have to use shift and adds if scale is 2 or greater. */
18700 if (parts.scale > 1)
18702 if (regno0 != regno1)
18703 split_cost += 1;
18704 else if (regno2 == regno0)
18705 split_cost += 4;
18706 else
18707 split_cost += parts.scale;
18710 /* Have to use add instruction with immediate if
18711 disp is non zero. */
18712 if (parts.disp && parts.disp != const0_rtx)
18713 split_cost += 1;
18715 /* Subtract the price of lea. */
18716 split_cost -= 1;
18719 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18720 parts.scale > 1);
18723 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18724 matches destination. RTX includes clobber of FLAGS_REG. */
18726 static void
18727 ix86_emit_binop (enum rtx_code code, machine_mode mode,
18728 rtx dst, rtx src)
18730 rtx op, clob;
18732 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18733 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18735 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18738 /* Return true if regno1 def is nearest to the insn. */
18740 static bool
18741 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18743 rtx_insn *prev = insn;
18744 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18746 if (insn == start)
18747 return false;
18748 while (prev && prev != start)
18750 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18752 prev = PREV_INSN (prev);
18753 continue;
18755 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18756 return true;
18757 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18758 return false;
18759 prev = PREV_INSN (prev);
18762 /* None of the regs is defined in the bb. */
18763 return false;
18766 /* Split lea instructions into a sequence of instructions
18767 which are executed on ALU to avoid AGU stalls.
18768 It is assumed that it is allowed to clobber flags register
18769 at lea position. */
18771 void
18772 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
18774 unsigned int regno0, regno1, regno2;
18775 struct ix86_address parts;
18776 rtx target, tmp;
18777 int ok, adds;
18779 ok = ix86_decompose_address (operands[1], &parts);
18780 gcc_assert (ok);
18782 target = gen_lowpart (mode, operands[0]);
18784 regno0 = true_regnum (target);
18785 regno1 = INVALID_REGNUM;
18786 regno2 = INVALID_REGNUM;
18788 if (parts.base)
18790 parts.base = gen_lowpart (mode, parts.base);
18791 regno1 = true_regnum (parts.base);
18794 if (parts.index)
18796 parts.index = gen_lowpart (mode, parts.index);
18797 regno2 = true_regnum (parts.index);
18800 if (parts.disp)
18801 parts.disp = gen_lowpart (mode, parts.disp);
18803 if (parts.scale > 1)
18805 /* Case r1 = r1 + ... */
18806 if (regno1 == regno0)
18808 /* If we have a case r1 = r1 + C * r2 then we
18809 should use multiplication which is very
18810 expensive. Assume cost model is wrong if we
18811 have such case here. */
18812 gcc_assert (regno2 != regno0);
18814 for (adds = parts.scale; adds > 0; adds--)
18815 ix86_emit_binop (PLUS, mode, target, parts.index);
18817 else
18819 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18820 if (regno0 != regno2)
18821 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18823 /* Use shift for scaling. */
18824 ix86_emit_binop (ASHIFT, mode, target,
18825 GEN_INT (exact_log2 (parts.scale)));
18827 if (parts.base)
18828 ix86_emit_binop (PLUS, mode, target, parts.base);
18830 if (parts.disp && parts.disp != const0_rtx)
18831 ix86_emit_binop (PLUS, mode, target, parts.disp);
18834 else if (!parts.base && !parts.index)
18836 gcc_assert(parts.disp);
18837 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18839 else
18841 if (!parts.base)
18843 if (regno0 != regno2)
18844 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18846 else if (!parts.index)
18848 if (regno0 != regno1)
18849 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18851 else
18853 if (regno0 == regno1)
18854 tmp = parts.index;
18855 else if (regno0 == regno2)
18856 tmp = parts.base;
18857 else
18859 rtx tmp1;
18861 /* Find better operand for SET instruction, depending
18862 on which definition is farther from the insn. */
18863 if (find_nearest_reg_def (insn, regno1, regno2))
18864 tmp = parts.index, tmp1 = parts.base;
18865 else
18866 tmp = parts.base, tmp1 = parts.index;
18868 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18870 if (parts.disp && parts.disp != const0_rtx)
18871 ix86_emit_binop (PLUS, mode, target, parts.disp);
18873 ix86_emit_binop (PLUS, mode, target, tmp1);
18874 return;
18877 ix86_emit_binop (PLUS, mode, target, tmp);
18880 if (parts.disp && parts.disp != const0_rtx)
18881 ix86_emit_binop (PLUS, mode, target, parts.disp);
18885 /* Return true if it is ok to optimize an ADD operation to LEA
18886 operation to avoid flag register consumation. For most processors,
18887 ADD is faster than LEA. For the processors like BONNELL, if the
18888 destination register of LEA holds an actual address which will be
18889 used soon, LEA is better and otherwise ADD is better. */
18891 bool
18892 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
18894 unsigned int regno0 = true_regnum (operands[0]);
18895 unsigned int regno1 = true_regnum (operands[1]);
18896 unsigned int regno2 = true_regnum (operands[2]);
18898 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18899 if (regno0 != regno1 && regno0 != regno2)
18900 return true;
18902 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18903 return false;
18905 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18908 /* Return true if destination reg of SET_BODY is shift count of
18909 USE_BODY. */
18911 static bool
18912 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18914 rtx set_dest;
18915 rtx shift_rtx;
18916 int i;
18918 /* Retrieve destination of SET_BODY. */
18919 switch (GET_CODE (set_body))
18921 case SET:
18922 set_dest = SET_DEST (set_body);
18923 if (!set_dest || !REG_P (set_dest))
18924 return false;
18925 break;
18926 case PARALLEL:
18927 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18928 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18929 use_body))
18930 return true;
18931 default:
18932 return false;
18933 break;
18936 /* Retrieve shift count of USE_BODY. */
18937 switch (GET_CODE (use_body))
18939 case SET:
18940 shift_rtx = XEXP (use_body, 1);
18941 break;
18942 case PARALLEL:
18943 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18944 if (ix86_dep_by_shift_count_body (set_body,
18945 XVECEXP (use_body, 0, i)))
18946 return true;
18947 default:
18948 return false;
18949 break;
18952 if (shift_rtx
18953 && (GET_CODE (shift_rtx) == ASHIFT
18954 || GET_CODE (shift_rtx) == LSHIFTRT
18955 || GET_CODE (shift_rtx) == ASHIFTRT
18956 || GET_CODE (shift_rtx) == ROTATE
18957 || GET_CODE (shift_rtx) == ROTATERT))
18959 rtx shift_count = XEXP (shift_rtx, 1);
18961 /* Return true if shift count is dest of SET_BODY. */
18962 if (REG_P (shift_count))
18964 /* Add check since it can be invoked before register
18965 allocation in pre-reload schedule. */
18966 if (reload_completed
18967 && true_regnum (set_dest) == true_regnum (shift_count))
18968 return true;
18969 else if (REGNO(set_dest) == REGNO(shift_count))
18970 return true;
18974 return false;
18977 /* Return true if destination reg of SET_INSN is shift count of
18978 USE_INSN. */
18980 bool
18981 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18983 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18984 PATTERN (use_insn));
18987 /* Return TRUE or FALSE depending on whether the unary operator meets the
18988 appropriate constraints. */
18990 bool
18991 ix86_unary_operator_ok (enum rtx_code,
18992 machine_mode,
18993 rtx operands[2])
18995 /* If one of operands is memory, source and destination must match. */
18996 if ((MEM_P (operands[0])
18997 || MEM_P (operands[1]))
18998 && ! rtx_equal_p (operands[0], operands[1]))
18999 return false;
19000 return true;
19003 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
19004 are ok, keeping in mind the possible movddup alternative. */
19006 bool
19007 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
19009 if (MEM_P (operands[0]))
19010 return rtx_equal_p (operands[0], operands[1 + high]);
19011 if (MEM_P (operands[1]) && MEM_P (operands[2]))
19012 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
19013 return true;
19016 /* Post-reload splitter for converting an SF or DFmode value in an
19017 SSE register into an unsigned SImode. */
19019 void
19020 ix86_split_convert_uns_si_sse (rtx operands[])
19022 machine_mode vecmode;
19023 rtx value, large, zero_or_two31, input, two31, x;
19025 large = operands[1];
19026 zero_or_two31 = operands[2];
19027 input = operands[3];
19028 two31 = operands[4];
19029 vecmode = GET_MODE (large);
19030 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
19032 /* Load up the value into the low element. We must ensure that the other
19033 elements are valid floats -- zero is the easiest such value. */
19034 if (MEM_P (input))
19036 if (vecmode == V4SFmode)
19037 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
19038 else
19039 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
19041 else
19043 input = gen_rtx_REG (vecmode, REGNO (input));
19044 emit_move_insn (value, CONST0_RTX (vecmode));
19045 if (vecmode == V4SFmode)
19046 emit_insn (gen_sse_movss (value, value, input));
19047 else
19048 emit_insn (gen_sse2_movsd (value, value, input));
19051 emit_move_insn (large, two31);
19052 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
19054 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
19055 emit_insn (gen_rtx_SET (VOIDmode, large, x));
19057 x = gen_rtx_AND (vecmode, zero_or_two31, large);
19058 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
19060 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
19061 emit_insn (gen_rtx_SET (VOIDmode, value, x));
19063 large = gen_rtx_REG (V4SImode, REGNO (large));
19064 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
19066 x = gen_rtx_REG (V4SImode, REGNO (value));
19067 if (vecmode == V4SFmode)
19068 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
19069 else
19070 emit_insn (gen_sse2_cvttpd2dq (x, value));
19071 value = x;
19073 emit_insn (gen_xorv4si3 (value, value, large));
19076 /* Convert an unsigned DImode value into a DFmode, using only SSE.
19077 Expects the 64-bit DImode to be supplied in a pair of integral
19078 registers. Requires SSE2; will use SSE3 if available. For x86_32,
19079 -mfpmath=sse, !optimize_size only. */
19081 void
19082 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
19084 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
19085 rtx int_xmm, fp_xmm;
19086 rtx biases, exponents;
19087 rtx x;
19089 int_xmm = gen_reg_rtx (V4SImode);
19090 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
19091 emit_insn (gen_movdi_to_sse (int_xmm, input));
19092 else if (TARGET_SSE_SPLIT_REGS)
19094 emit_clobber (int_xmm);
19095 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
19097 else
19099 x = gen_reg_rtx (V2DImode);
19100 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
19101 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
19104 x = gen_rtx_CONST_VECTOR (V4SImode,
19105 gen_rtvec (4, GEN_INT (0x43300000UL),
19106 GEN_INT (0x45300000UL),
19107 const0_rtx, const0_rtx));
19108 exponents = validize_mem (force_const_mem (V4SImode, x));
19110 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
19111 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
19113 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
19114 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
19115 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
19116 (0x1.0p84 + double(fp_value_hi_xmm)).
19117 Note these exponents differ by 32. */
19119 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
19121 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
19122 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
19123 real_ldexp (&bias_lo_rvt, &dconst1, 52);
19124 real_ldexp (&bias_hi_rvt, &dconst1, 84);
19125 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
19126 x = const_double_from_real_value (bias_hi_rvt, DFmode);
19127 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
19128 biases = validize_mem (force_const_mem (V2DFmode, biases));
19129 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
19131 /* Add the upper and lower DFmode values together. */
19132 if (TARGET_SSE3)
19133 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
19134 else
19136 x = copy_to_mode_reg (V2DFmode, fp_xmm);
19137 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
19138 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
19141 ix86_expand_vector_extract (false, target, fp_xmm, 0);
19144 /* Not used, but eases macroization of patterns. */
19145 void
19146 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
19148 gcc_unreachable ();
19151 /* Convert an unsigned SImode value into a DFmode. Only currently used
19152 for SSE, but applicable anywhere. */
19154 void
19155 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
19157 REAL_VALUE_TYPE TWO31r;
19158 rtx x, fp;
19160 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
19161 NULL, 1, OPTAB_DIRECT);
19163 fp = gen_reg_rtx (DFmode);
19164 emit_insn (gen_floatsidf2 (fp, x));
19166 real_ldexp (&TWO31r, &dconst1, 31);
19167 x = const_double_from_real_value (TWO31r, DFmode);
19169 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
19170 if (x != target)
19171 emit_move_insn (target, x);
19174 /* Convert a signed DImode value into a DFmode. Only used for SSE in
19175 32-bit mode; otherwise we have a direct convert instruction. */
19177 void
19178 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
19180 REAL_VALUE_TYPE TWO32r;
19181 rtx fp_lo, fp_hi, x;
19183 fp_lo = gen_reg_rtx (DFmode);
19184 fp_hi = gen_reg_rtx (DFmode);
19186 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
19188 real_ldexp (&TWO32r, &dconst1, 32);
19189 x = const_double_from_real_value (TWO32r, DFmode);
19190 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
19192 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
19194 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
19195 0, OPTAB_DIRECT);
19196 if (x != target)
19197 emit_move_insn (target, x);
19200 /* Convert an unsigned SImode value into a SFmode, using only SSE.
19201 For x86_32, -mfpmath=sse, !optimize_size only. */
19202 void
19203 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19205 REAL_VALUE_TYPE ONE16r;
19206 rtx fp_hi, fp_lo, int_hi, int_lo, x;
19208 real_ldexp (&ONE16r, &dconst1, 16);
19209 x = const_double_from_real_value (ONE16r, SFmode);
19210 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19211 NULL, 0, OPTAB_DIRECT);
19212 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19213 NULL, 0, OPTAB_DIRECT);
19214 fp_hi = gen_reg_rtx (SFmode);
19215 fp_lo = gen_reg_rtx (SFmode);
19216 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19217 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19218 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19219 0, OPTAB_DIRECT);
19220 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19221 0, OPTAB_DIRECT);
19222 if (!rtx_equal_p (target, fp_hi))
19223 emit_move_insn (target, fp_hi);
19226 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
19227 a vector of unsigned ints VAL to vector of floats TARGET. */
19229 void
19230 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19232 rtx tmp[8];
19233 REAL_VALUE_TYPE TWO16r;
19234 machine_mode intmode = GET_MODE (val);
19235 machine_mode fltmode = GET_MODE (target);
19236 rtx (*cvt) (rtx, rtx);
19238 if (intmode == V4SImode)
19239 cvt = gen_floatv4siv4sf2;
19240 else
19241 cvt = gen_floatv8siv8sf2;
19242 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19243 tmp[0] = force_reg (intmode, tmp[0]);
19244 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19245 OPTAB_DIRECT);
19246 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19247 NULL_RTX, 1, OPTAB_DIRECT);
19248 tmp[3] = gen_reg_rtx (fltmode);
19249 emit_insn (cvt (tmp[3], tmp[1]));
19250 tmp[4] = gen_reg_rtx (fltmode);
19251 emit_insn (cvt (tmp[4], tmp[2]));
19252 real_ldexp (&TWO16r, &dconst1, 16);
19253 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19254 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19255 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19256 OPTAB_DIRECT);
19257 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19258 OPTAB_DIRECT);
19259 if (tmp[7] != target)
19260 emit_move_insn (target, tmp[7]);
19263 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19264 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19265 This is done by doing just signed conversion if < 0x1p31, and otherwise by
19266 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
19269 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19271 REAL_VALUE_TYPE TWO31r;
19272 rtx two31r, tmp[4];
19273 machine_mode mode = GET_MODE (val);
19274 machine_mode scalarmode = GET_MODE_INNER (mode);
19275 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19276 rtx (*cmp) (rtx, rtx, rtx, rtx);
19277 int i;
19279 for (i = 0; i < 3; i++)
19280 tmp[i] = gen_reg_rtx (mode);
19281 real_ldexp (&TWO31r, &dconst1, 31);
19282 two31r = const_double_from_real_value (TWO31r, scalarmode);
19283 two31r = ix86_build_const_vector (mode, 1, two31r);
19284 two31r = force_reg (mode, two31r);
19285 switch (mode)
19287 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19288 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19289 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19290 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19291 default: gcc_unreachable ();
19293 tmp[3] = gen_rtx_LE (mode, two31r, val);
19294 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19295 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19296 0, OPTAB_DIRECT);
19297 if (intmode == V4SImode || TARGET_AVX2)
19298 *xorp = expand_simple_binop (intmode, ASHIFT,
19299 gen_lowpart (intmode, tmp[0]),
19300 GEN_INT (31), NULL_RTX, 0,
19301 OPTAB_DIRECT);
19302 else
19304 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
19305 two31 = ix86_build_const_vector (intmode, 1, two31);
19306 *xorp = expand_simple_binop (intmode, AND,
19307 gen_lowpart (intmode, tmp[0]),
19308 two31, NULL_RTX, 0,
19309 OPTAB_DIRECT);
19311 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19312 0, OPTAB_DIRECT);
19315 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
19316 then replicate the value for all elements of the vector
19317 register. */
19320 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
19322 int i, n_elt;
19323 rtvec v;
19324 machine_mode scalar_mode;
19326 switch (mode)
19328 case V64QImode:
19329 case V32QImode:
19330 case V16QImode:
19331 case V32HImode:
19332 case V16HImode:
19333 case V8HImode:
19334 case V16SImode:
19335 case V8SImode:
19336 case V4SImode:
19337 case V8DImode:
19338 case V4DImode:
19339 case V2DImode:
19340 gcc_assert (vect);
19341 case V16SFmode:
19342 case V8SFmode:
19343 case V4SFmode:
19344 case V8DFmode:
19345 case V4DFmode:
19346 case V2DFmode:
19347 n_elt = GET_MODE_NUNITS (mode);
19348 v = rtvec_alloc (n_elt);
19349 scalar_mode = GET_MODE_INNER (mode);
19351 RTVEC_ELT (v, 0) = value;
19353 for (i = 1; i < n_elt; ++i)
19354 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19356 return gen_rtx_CONST_VECTOR (mode, v);
19358 default:
19359 gcc_unreachable ();
19363 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19364 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19365 for an SSE register. If VECT is true, then replicate the mask for
19366 all elements of the vector register. If INVERT is true, then create
19367 a mask excluding the sign bit. */
19370 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
19372 machine_mode vec_mode, imode;
19373 HOST_WIDE_INT hi, lo;
19374 int shift = 63;
19375 rtx v;
19376 rtx mask;
19378 /* Find the sign bit, sign extended to 2*HWI. */
19379 switch (mode)
19381 case V16SImode:
19382 case V16SFmode:
19383 case V8SImode:
19384 case V4SImode:
19385 case V8SFmode:
19386 case V4SFmode:
19387 vec_mode = mode;
19388 mode = GET_MODE_INNER (mode);
19389 imode = SImode;
19390 lo = 0x80000000, hi = lo < 0;
19391 break;
19393 case V8DImode:
19394 case V4DImode:
19395 case V2DImode:
19396 case V8DFmode:
19397 case V4DFmode:
19398 case V2DFmode:
19399 vec_mode = mode;
19400 mode = GET_MODE_INNER (mode);
19401 imode = DImode;
19402 if (HOST_BITS_PER_WIDE_INT >= 64)
19403 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
19404 else
19405 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19406 break;
19408 case TImode:
19409 case TFmode:
19410 vec_mode = VOIDmode;
19411 if (HOST_BITS_PER_WIDE_INT >= 64)
19413 imode = TImode;
19414 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19416 else
19418 rtvec vec;
19420 imode = DImode;
19421 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19423 if (invert)
19425 lo = ~lo, hi = ~hi;
19426 v = constm1_rtx;
19428 else
19429 v = const0_rtx;
19431 mask = immed_double_const (lo, hi, imode);
19433 vec = gen_rtvec (2, v, mask);
19434 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19435 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19437 return v;
19439 break;
19441 default:
19442 gcc_unreachable ();
19445 if (invert)
19446 lo = ~lo, hi = ~hi;
19448 /* Force this value into the low part of a fp vector constant. */
19449 mask = immed_double_const (lo, hi, imode);
19450 mask = gen_lowpart (mode, mask);
19452 if (vec_mode == VOIDmode)
19453 return force_reg (mode, mask);
19455 v = ix86_build_const_vector (vec_mode, vect, mask);
19456 return force_reg (vec_mode, v);
19459 /* Generate code for floating point ABS or NEG. */
19461 void
19462 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
19463 rtx operands[])
19465 rtx mask, set, dst, src;
19466 bool use_sse = false;
19467 bool vector_mode = VECTOR_MODE_P (mode);
19468 machine_mode vmode = mode;
19470 if (vector_mode)
19471 use_sse = true;
19472 else if (mode == TFmode)
19473 use_sse = true;
19474 else if (TARGET_SSE_MATH)
19476 use_sse = SSE_FLOAT_MODE_P (mode);
19477 if (mode == SFmode)
19478 vmode = V4SFmode;
19479 else if (mode == DFmode)
19480 vmode = V2DFmode;
19483 /* NEG and ABS performed with SSE use bitwise mask operations.
19484 Create the appropriate mask now. */
19485 if (use_sse)
19486 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19487 else
19488 mask = NULL_RTX;
19490 dst = operands[0];
19491 src = operands[1];
19493 set = gen_rtx_fmt_e (code, mode, src);
19494 set = gen_rtx_SET (VOIDmode, dst, set);
19496 if (mask)
19498 rtx use, clob;
19499 rtvec par;
19501 use = gen_rtx_USE (VOIDmode, mask);
19502 if (vector_mode)
19503 par = gen_rtvec (2, set, use);
19504 else
19506 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19507 par = gen_rtvec (3, set, use, clob);
19509 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19511 else
19512 emit_insn (set);
19515 /* Expand a copysign operation. Special case operand 0 being a constant. */
19517 void
19518 ix86_expand_copysign (rtx operands[])
19520 machine_mode mode, vmode;
19521 rtx dest, op0, op1, mask, nmask;
19523 dest = operands[0];
19524 op0 = operands[1];
19525 op1 = operands[2];
19527 mode = GET_MODE (dest);
19529 if (mode == SFmode)
19530 vmode = V4SFmode;
19531 else if (mode == DFmode)
19532 vmode = V2DFmode;
19533 else
19534 vmode = mode;
19536 if (GET_CODE (op0) == CONST_DOUBLE)
19538 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19540 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19541 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19543 if (mode == SFmode || mode == DFmode)
19545 if (op0 == CONST0_RTX (mode))
19546 op0 = CONST0_RTX (vmode);
19547 else
19549 rtx v = ix86_build_const_vector (vmode, false, op0);
19551 op0 = force_reg (vmode, v);
19554 else if (op0 != CONST0_RTX (mode))
19555 op0 = force_reg (mode, op0);
19557 mask = ix86_build_signbit_mask (vmode, 0, 0);
19559 if (mode == SFmode)
19560 copysign_insn = gen_copysignsf3_const;
19561 else if (mode == DFmode)
19562 copysign_insn = gen_copysigndf3_const;
19563 else
19564 copysign_insn = gen_copysigntf3_const;
19566 emit_insn (copysign_insn (dest, op0, op1, mask));
19568 else
19570 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19572 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19573 mask = ix86_build_signbit_mask (vmode, 0, 0);
19575 if (mode == SFmode)
19576 copysign_insn = gen_copysignsf3_var;
19577 else if (mode == DFmode)
19578 copysign_insn = gen_copysigndf3_var;
19579 else
19580 copysign_insn = gen_copysigntf3_var;
19582 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19586 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19587 be a constant, and so has already been expanded into a vector constant. */
19589 void
19590 ix86_split_copysign_const (rtx operands[])
19592 machine_mode mode, vmode;
19593 rtx dest, op0, mask, x;
19595 dest = operands[0];
19596 op0 = operands[1];
19597 mask = operands[3];
19599 mode = GET_MODE (dest);
19600 vmode = GET_MODE (mask);
19602 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19603 x = gen_rtx_AND (vmode, dest, mask);
19604 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19606 if (op0 != CONST0_RTX (vmode))
19608 x = gen_rtx_IOR (vmode, dest, op0);
19609 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19613 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19614 so we have to do two masks. */
19616 void
19617 ix86_split_copysign_var (rtx operands[])
19619 machine_mode mode, vmode;
19620 rtx dest, scratch, op0, op1, mask, nmask, x;
19622 dest = operands[0];
19623 scratch = operands[1];
19624 op0 = operands[2];
19625 op1 = operands[3];
19626 nmask = operands[4];
19627 mask = operands[5];
19629 mode = GET_MODE (dest);
19630 vmode = GET_MODE (mask);
19632 if (rtx_equal_p (op0, op1))
19634 /* Shouldn't happen often (it's useless, obviously), but when it does
19635 we'd generate incorrect code if we continue below. */
19636 emit_move_insn (dest, op0);
19637 return;
19640 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19642 gcc_assert (REGNO (op1) == REGNO (scratch));
19644 x = gen_rtx_AND (vmode, scratch, mask);
19645 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19647 dest = mask;
19648 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19649 x = gen_rtx_NOT (vmode, dest);
19650 x = gen_rtx_AND (vmode, x, op0);
19651 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19653 else
19655 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19657 x = gen_rtx_AND (vmode, scratch, mask);
19659 else /* alternative 2,4 */
19661 gcc_assert (REGNO (mask) == REGNO (scratch));
19662 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19663 x = gen_rtx_AND (vmode, scratch, op1);
19665 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19667 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19669 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19670 x = gen_rtx_AND (vmode, dest, nmask);
19672 else /* alternative 3,4 */
19674 gcc_assert (REGNO (nmask) == REGNO (dest));
19675 dest = nmask;
19676 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19677 x = gen_rtx_AND (vmode, dest, op0);
19679 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19682 x = gen_rtx_IOR (vmode, dest, scratch);
19683 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19686 /* Return TRUE or FALSE depending on whether the first SET in INSN
19687 has source and destination with matching CC modes, and that the
19688 CC mode is at least as constrained as REQ_MODE. */
19690 bool
19691 ix86_match_ccmode (rtx insn, machine_mode req_mode)
19693 rtx set;
19694 machine_mode set_mode;
19696 set = PATTERN (insn);
19697 if (GET_CODE (set) == PARALLEL)
19698 set = XVECEXP (set, 0, 0);
19699 gcc_assert (GET_CODE (set) == SET);
19700 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19702 set_mode = GET_MODE (SET_DEST (set));
19703 switch (set_mode)
19705 case CCNOmode:
19706 if (req_mode != CCNOmode
19707 && (req_mode != CCmode
19708 || XEXP (SET_SRC (set), 1) != const0_rtx))
19709 return false;
19710 break;
19711 case CCmode:
19712 if (req_mode == CCGCmode)
19713 return false;
19714 /* FALLTHRU */
19715 case CCGCmode:
19716 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19717 return false;
19718 /* FALLTHRU */
19719 case CCGOCmode:
19720 if (req_mode == CCZmode)
19721 return false;
19722 /* FALLTHRU */
19723 case CCZmode:
19724 break;
19726 case CCAmode:
19727 case CCCmode:
19728 case CCOmode:
19729 case CCSmode:
19730 if (set_mode != req_mode)
19731 return false;
19732 break;
19734 default:
19735 gcc_unreachable ();
19738 return GET_MODE (SET_SRC (set)) == set_mode;
19741 /* Generate insn patterns to do an integer compare of OPERANDS. */
19743 static rtx
19744 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19746 machine_mode cmpmode;
19747 rtx tmp, flags;
19749 cmpmode = SELECT_CC_MODE (code, op0, op1);
19750 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19752 /* This is very simple, but making the interface the same as in the
19753 FP case makes the rest of the code easier. */
19754 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19755 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19757 /* Return the test that should be put into the flags user, i.e.
19758 the bcc, scc, or cmov instruction. */
19759 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19762 /* Figure out whether to use ordered or unordered fp comparisons.
19763 Return the appropriate mode to use. */
19765 machine_mode
19766 ix86_fp_compare_mode (enum rtx_code)
19768 /* ??? In order to make all comparisons reversible, we do all comparisons
19769 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19770 all forms trapping and nontrapping comparisons, we can make inequality
19771 comparisons trapping again, since it results in better code when using
19772 FCOM based compares. */
19773 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19776 machine_mode
19777 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19779 machine_mode mode = GET_MODE (op0);
19781 if (SCALAR_FLOAT_MODE_P (mode))
19783 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19784 return ix86_fp_compare_mode (code);
19787 switch (code)
19789 /* Only zero flag is needed. */
19790 case EQ: /* ZF=0 */
19791 case NE: /* ZF!=0 */
19792 return CCZmode;
19793 /* Codes needing carry flag. */
19794 case GEU: /* CF=0 */
19795 case LTU: /* CF=1 */
19796 /* Detect overflow checks. They need just the carry flag. */
19797 if (GET_CODE (op0) == PLUS
19798 && rtx_equal_p (op1, XEXP (op0, 0)))
19799 return CCCmode;
19800 else
19801 return CCmode;
19802 case GTU: /* CF=0 & ZF=0 */
19803 case LEU: /* CF=1 | ZF=1 */
19804 return CCmode;
19805 /* Codes possibly doable only with sign flag when
19806 comparing against zero. */
19807 case GE: /* SF=OF or SF=0 */
19808 case LT: /* SF<>OF or SF=1 */
19809 if (op1 == const0_rtx)
19810 return CCGOCmode;
19811 else
19812 /* For other cases Carry flag is not required. */
19813 return CCGCmode;
19814 /* Codes doable only with sign flag when comparing
19815 against zero, but we miss jump instruction for it
19816 so we need to use relational tests against overflow
19817 that thus needs to be zero. */
19818 case GT: /* ZF=0 & SF=OF */
19819 case LE: /* ZF=1 | SF<>OF */
19820 if (op1 == const0_rtx)
19821 return CCNOmode;
19822 else
19823 return CCGCmode;
19824 /* strcmp pattern do (use flags) and combine may ask us for proper
19825 mode. */
19826 case USE:
19827 return CCmode;
19828 default:
19829 gcc_unreachable ();
19833 /* Return the fixed registers used for condition codes. */
19835 static bool
19836 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19838 *p1 = FLAGS_REG;
19839 *p2 = FPSR_REG;
19840 return true;
19843 /* If two condition code modes are compatible, return a condition code
19844 mode which is compatible with both. Otherwise, return
19845 VOIDmode. */
19847 static machine_mode
19848 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
19850 if (m1 == m2)
19851 return m1;
19853 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19854 return VOIDmode;
19856 if ((m1 == CCGCmode && m2 == CCGOCmode)
19857 || (m1 == CCGOCmode && m2 == CCGCmode))
19858 return CCGCmode;
19860 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19861 return m2;
19862 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19863 return m1;
19865 switch (m1)
19867 default:
19868 gcc_unreachable ();
19870 case CCmode:
19871 case CCGCmode:
19872 case CCGOCmode:
19873 case CCNOmode:
19874 case CCAmode:
19875 case CCCmode:
19876 case CCOmode:
19877 case CCSmode:
19878 case CCZmode:
19879 switch (m2)
19881 default:
19882 return VOIDmode;
19884 case CCmode:
19885 case CCGCmode:
19886 case CCGOCmode:
19887 case CCNOmode:
19888 case CCAmode:
19889 case CCCmode:
19890 case CCOmode:
19891 case CCSmode:
19892 case CCZmode:
19893 return CCmode;
19896 case CCFPmode:
19897 case CCFPUmode:
19898 /* These are only compatible with themselves, which we already
19899 checked above. */
19900 return VOIDmode;
19905 /* Return a comparison we can do and that it is equivalent to
19906 swap_condition (code) apart possibly from orderedness.
19907 But, never change orderedness if TARGET_IEEE_FP, returning
19908 UNKNOWN in that case if necessary. */
19910 static enum rtx_code
19911 ix86_fp_swap_condition (enum rtx_code code)
19913 switch (code)
19915 case GT: /* GTU - CF=0 & ZF=0 */
19916 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19917 case GE: /* GEU - CF=0 */
19918 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19919 case UNLT: /* LTU - CF=1 */
19920 return TARGET_IEEE_FP ? UNKNOWN : GT;
19921 case UNLE: /* LEU - CF=1 | ZF=1 */
19922 return TARGET_IEEE_FP ? UNKNOWN : GE;
19923 default:
19924 return swap_condition (code);
19928 /* Return cost of comparison CODE using the best strategy for performance.
19929 All following functions do use number of instructions as a cost metrics.
19930 In future this should be tweaked to compute bytes for optimize_size and
19931 take into account performance of various instructions on various CPUs. */
19933 static int
19934 ix86_fp_comparison_cost (enum rtx_code code)
19936 int arith_cost;
19938 /* The cost of code using bit-twiddling on %ah. */
19939 switch (code)
19941 case UNLE:
19942 case UNLT:
19943 case LTGT:
19944 case GT:
19945 case GE:
19946 case UNORDERED:
19947 case ORDERED:
19948 case UNEQ:
19949 arith_cost = 4;
19950 break;
19951 case LT:
19952 case NE:
19953 case EQ:
19954 case UNGE:
19955 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19956 break;
19957 case LE:
19958 case UNGT:
19959 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19960 break;
19961 default:
19962 gcc_unreachable ();
19965 switch (ix86_fp_comparison_strategy (code))
19967 case IX86_FPCMP_COMI:
19968 return arith_cost > 4 ? 3 : 2;
19969 case IX86_FPCMP_SAHF:
19970 return arith_cost > 4 ? 4 : 3;
19971 default:
19972 return arith_cost;
19976 /* Return strategy to use for floating-point. We assume that fcomi is always
19977 preferrable where available, since that is also true when looking at size
19978 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19980 enum ix86_fpcmp_strategy
19981 ix86_fp_comparison_strategy (enum rtx_code)
19983 /* Do fcomi/sahf based test when profitable. */
19985 if (TARGET_CMOVE)
19986 return IX86_FPCMP_COMI;
19988 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19989 return IX86_FPCMP_SAHF;
19991 return IX86_FPCMP_ARITH;
19994 /* Swap, force into registers, or otherwise massage the two operands
19995 to a fp comparison. The operands are updated in place; the new
19996 comparison code is returned. */
19998 static enum rtx_code
19999 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
20001 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
20002 rtx op0 = *pop0, op1 = *pop1;
20003 machine_mode op_mode = GET_MODE (op0);
20004 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
20006 /* All of the unordered compare instructions only work on registers.
20007 The same is true of the fcomi compare instructions. The XFmode
20008 compare instructions require registers except when comparing
20009 against zero or when converting operand 1 from fixed point to
20010 floating point. */
20012 if (!is_sse
20013 && (fpcmp_mode == CCFPUmode
20014 || (op_mode == XFmode
20015 && ! (standard_80387_constant_p (op0) == 1
20016 || standard_80387_constant_p (op1) == 1)
20017 && GET_CODE (op1) != FLOAT)
20018 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
20020 op0 = force_reg (op_mode, op0);
20021 op1 = force_reg (op_mode, op1);
20023 else
20025 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
20026 things around if they appear profitable, otherwise force op0
20027 into a register. */
20029 if (standard_80387_constant_p (op0) == 0
20030 || (MEM_P (op0)
20031 && ! (standard_80387_constant_p (op1) == 0
20032 || MEM_P (op1))))
20034 enum rtx_code new_code = ix86_fp_swap_condition (code);
20035 if (new_code != UNKNOWN)
20037 std::swap (op0, op1);
20038 code = new_code;
20042 if (!REG_P (op0))
20043 op0 = force_reg (op_mode, op0);
20045 if (CONSTANT_P (op1))
20047 int tmp = standard_80387_constant_p (op1);
20048 if (tmp == 0)
20049 op1 = validize_mem (force_const_mem (op_mode, op1));
20050 else if (tmp == 1)
20052 if (TARGET_CMOVE)
20053 op1 = force_reg (op_mode, op1);
20055 else
20056 op1 = force_reg (op_mode, op1);
20060 /* Try to rearrange the comparison to make it cheaper. */
20061 if (ix86_fp_comparison_cost (code)
20062 > ix86_fp_comparison_cost (swap_condition (code))
20063 && (REG_P (op1) || can_create_pseudo_p ()))
20065 std::swap (op0, op1);
20066 code = swap_condition (code);
20067 if (!REG_P (op0))
20068 op0 = force_reg (op_mode, op0);
20071 *pop0 = op0;
20072 *pop1 = op1;
20073 return code;
20076 /* Convert comparison codes we use to represent FP comparison to integer
20077 code that will result in proper branch. Return UNKNOWN if no such code
20078 is available. */
20080 enum rtx_code
20081 ix86_fp_compare_code_to_integer (enum rtx_code code)
20083 switch (code)
20085 case GT:
20086 return GTU;
20087 case GE:
20088 return GEU;
20089 case ORDERED:
20090 case UNORDERED:
20091 return code;
20092 break;
20093 case UNEQ:
20094 return EQ;
20095 break;
20096 case UNLT:
20097 return LTU;
20098 break;
20099 case UNLE:
20100 return LEU;
20101 break;
20102 case LTGT:
20103 return NE;
20104 break;
20105 default:
20106 return UNKNOWN;
20110 /* Generate insn patterns to do a floating point compare of OPERANDS. */
20112 static rtx
20113 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
20115 machine_mode fpcmp_mode, intcmp_mode;
20116 rtx tmp, tmp2;
20118 fpcmp_mode = ix86_fp_compare_mode (code);
20119 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
20121 /* Do fcomi/sahf based test when profitable. */
20122 switch (ix86_fp_comparison_strategy (code))
20124 case IX86_FPCMP_COMI:
20125 intcmp_mode = fpcmp_mode;
20126 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20127 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20128 tmp);
20129 emit_insn (tmp);
20130 break;
20132 case IX86_FPCMP_SAHF:
20133 intcmp_mode = fpcmp_mode;
20134 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20135 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20136 tmp);
20138 if (!scratch)
20139 scratch = gen_reg_rtx (HImode);
20140 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
20141 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
20142 break;
20144 case IX86_FPCMP_ARITH:
20145 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
20146 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20147 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
20148 if (!scratch)
20149 scratch = gen_reg_rtx (HImode);
20150 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
20152 /* In the unordered case, we have to check C2 for NaN's, which
20153 doesn't happen to work out to anything nice combination-wise.
20154 So do some bit twiddling on the value we've got in AH to come
20155 up with an appropriate set of condition codes. */
20157 intcmp_mode = CCNOmode;
20158 switch (code)
20160 case GT:
20161 case UNGT:
20162 if (code == GT || !TARGET_IEEE_FP)
20164 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20165 code = EQ;
20167 else
20169 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20170 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20171 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
20172 intcmp_mode = CCmode;
20173 code = GEU;
20175 break;
20176 case LT:
20177 case UNLT:
20178 if (code == LT && TARGET_IEEE_FP)
20180 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20181 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
20182 intcmp_mode = CCmode;
20183 code = EQ;
20185 else
20187 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
20188 code = NE;
20190 break;
20191 case GE:
20192 case UNGE:
20193 if (code == GE || !TARGET_IEEE_FP)
20195 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
20196 code = EQ;
20198 else
20200 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20201 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20202 code = NE;
20204 break;
20205 case LE:
20206 case UNLE:
20207 if (code == LE && TARGET_IEEE_FP)
20209 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20210 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20211 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20212 intcmp_mode = CCmode;
20213 code = LTU;
20215 else
20217 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20218 code = NE;
20220 break;
20221 case EQ:
20222 case UNEQ:
20223 if (code == EQ && TARGET_IEEE_FP)
20225 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20226 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20227 intcmp_mode = CCmode;
20228 code = EQ;
20230 else
20232 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20233 code = NE;
20235 break;
20236 case NE:
20237 case LTGT:
20238 if (code == NE && TARGET_IEEE_FP)
20240 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20241 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20242 GEN_INT (0x40)));
20243 code = NE;
20245 else
20247 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20248 code = EQ;
20250 break;
20252 case UNORDERED:
20253 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20254 code = NE;
20255 break;
20256 case ORDERED:
20257 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20258 code = EQ;
20259 break;
20261 default:
20262 gcc_unreachable ();
20264 break;
20266 default:
20267 gcc_unreachable();
20270 /* Return the test that should be put into the flags user, i.e.
20271 the bcc, scc, or cmov instruction. */
20272 return gen_rtx_fmt_ee (code, VOIDmode,
20273 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20274 const0_rtx);
20277 static rtx
20278 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20280 rtx ret;
20282 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20283 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20285 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20287 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20288 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20290 else
20291 ret = ix86_expand_int_compare (code, op0, op1);
20293 return ret;
20296 void
20297 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20299 machine_mode mode = GET_MODE (op0);
20300 rtx tmp;
20302 switch (mode)
20304 case SFmode:
20305 case DFmode:
20306 case XFmode:
20307 case QImode:
20308 case HImode:
20309 case SImode:
20310 simple:
20311 tmp = ix86_expand_compare (code, op0, op1);
20312 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20313 gen_rtx_LABEL_REF (VOIDmode, label),
20314 pc_rtx);
20315 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20316 return;
20318 case DImode:
20319 if (TARGET_64BIT)
20320 goto simple;
20321 case TImode:
20322 /* Expand DImode branch into multiple compare+branch. */
20324 rtx lo[2], hi[2];
20325 rtx_code_label *label2;
20326 enum rtx_code code1, code2, code3;
20327 machine_mode submode;
20329 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20331 std::swap (op0, op1);
20332 code = swap_condition (code);
20335 split_double_mode (mode, &op0, 1, lo+0, hi+0);
20336 split_double_mode (mode, &op1, 1, lo+1, hi+1);
20338 submode = mode == DImode ? SImode : DImode;
20340 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20341 avoid two branches. This costs one extra insn, so disable when
20342 optimizing for size. */
20344 if ((code == EQ || code == NE)
20345 && (!optimize_insn_for_size_p ()
20346 || hi[1] == const0_rtx || lo[1] == const0_rtx))
20348 rtx xor0, xor1;
20350 xor1 = hi[0];
20351 if (hi[1] != const0_rtx)
20352 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20353 NULL_RTX, 0, OPTAB_WIDEN);
20355 xor0 = lo[0];
20356 if (lo[1] != const0_rtx)
20357 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20358 NULL_RTX, 0, OPTAB_WIDEN);
20360 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20361 NULL_RTX, 0, OPTAB_WIDEN);
20363 ix86_expand_branch (code, tmp, const0_rtx, label);
20364 return;
20367 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20368 op1 is a constant and the low word is zero, then we can just
20369 examine the high word. Similarly for low word -1 and
20370 less-or-equal-than or greater-than. */
20372 if (CONST_INT_P (hi[1]))
20373 switch (code)
20375 case LT: case LTU: case GE: case GEU:
20376 if (lo[1] == const0_rtx)
20378 ix86_expand_branch (code, hi[0], hi[1], label);
20379 return;
20381 break;
20382 case LE: case LEU: case GT: case GTU:
20383 if (lo[1] == constm1_rtx)
20385 ix86_expand_branch (code, hi[0], hi[1], label);
20386 return;
20388 break;
20389 default:
20390 break;
20393 /* Otherwise, we need two or three jumps. */
20395 label2 = gen_label_rtx ();
20397 code1 = code;
20398 code2 = swap_condition (code);
20399 code3 = unsigned_condition (code);
20401 switch (code)
20403 case LT: case GT: case LTU: case GTU:
20404 break;
20406 case LE: code1 = LT; code2 = GT; break;
20407 case GE: code1 = GT; code2 = LT; break;
20408 case LEU: code1 = LTU; code2 = GTU; break;
20409 case GEU: code1 = GTU; code2 = LTU; break;
20411 case EQ: code1 = UNKNOWN; code2 = NE; break;
20412 case NE: code2 = UNKNOWN; break;
20414 default:
20415 gcc_unreachable ();
20419 * a < b =>
20420 * if (hi(a) < hi(b)) goto true;
20421 * if (hi(a) > hi(b)) goto false;
20422 * if (lo(a) < lo(b)) goto true;
20423 * false:
20426 if (code1 != UNKNOWN)
20427 ix86_expand_branch (code1, hi[0], hi[1], label);
20428 if (code2 != UNKNOWN)
20429 ix86_expand_branch (code2, hi[0], hi[1], label2);
20431 ix86_expand_branch (code3, lo[0], lo[1], label);
20433 if (code2 != UNKNOWN)
20434 emit_label (label2);
20435 return;
20438 default:
20439 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20440 goto simple;
20444 /* Split branch based on floating point condition. */
20445 void
20446 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20447 rtx target1, rtx target2, rtx tmp)
20449 rtx condition;
20450 rtx i;
20452 if (target2 != pc_rtx)
20454 rtx tmp = target2;
20455 code = reverse_condition_maybe_unordered (code);
20456 target2 = target1;
20457 target1 = tmp;
20460 condition = ix86_expand_fp_compare (code, op1, op2,
20461 tmp);
20463 i = emit_jump_insn (gen_rtx_SET
20464 (VOIDmode, pc_rtx,
20465 gen_rtx_IF_THEN_ELSE (VOIDmode,
20466 condition, target1, target2)));
20467 if (split_branch_probability >= 0)
20468 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20471 void
20472 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20474 rtx ret;
20476 gcc_assert (GET_MODE (dest) == QImode);
20478 ret = ix86_expand_compare (code, op0, op1);
20479 PUT_MODE (ret, QImode);
20480 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20483 /* Expand comparison setting or clearing carry flag. Return true when
20484 successful and set pop for the operation. */
20485 static bool
20486 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20488 machine_mode mode =
20489 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20491 /* Do not handle double-mode compares that go through special path. */
20492 if (mode == (TARGET_64BIT ? TImode : DImode))
20493 return false;
20495 if (SCALAR_FLOAT_MODE_P (mode))
20497 rtx compare_op;
20498 rtx_insn *compare_seq;
20500 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20502 /* Shortcut: following common codes never translate
20503 into carry flag compares. */
20504 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20505 || code == ORDERED || code == UNORDERED)
20506 return false;
20508 /* These comparisons require zero flag; swap operands so they won't. */
20509 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20510 && !TARGET_IEEE_FP)
20512 std::swap (op0, op1);
20513 code = swap_condition (code);
20516 /* Try to expand the comparison and verify that we end up with
20517 carry flag based comparison. This fails to be true only when
20518 we decide to expand comparison using arithmetic that is not
20519 too common scenario. */
20520 start_sequence ();
20521 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20522 compare_seq = get_insns ();
20523 end_sequence ();
20525 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20526 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20527 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20528 else
20529 code = GET_CODE (compare_op);
20531 if (code != LTU && code != GEU)
20532 return false;
20534 emit_insn (compare_seq);
20535 *pop = compare_op;
20536 return true;
20539 if (!INTEGRAL_MODE_P (mode))
20540 return false;
20542 switch (code)
20544 case LTU:
20545 case GEU:
20546 break;
20548 /* Convert a==0 into (unsigned)a<1. */
20549 case EQ:
20550 case NE:
20551 if (op1 != const0_rtx)
20552 return false;
20553 op1 = const1_rtx;
20554 code = (code == EQ ? LTU : GEU);
20555 break;
20557 /* Convert a>b into b<a or a>=b-1. */
20558 case GTU:
20559 case LEU:
20560 if (CONST_INT_P (op1))
20562 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20563 /* Bail out on overflow. We still can swap operands but that
20564 would force loading of the constant into register. */
20565 if (op1 == const0_rtx
20566 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20567 return false;
20568 code = (code == GTU ? GEU : LTU);
20570 else
20572 std::swap (op1, op0);
20573 code = (code == GTU ? LTU : GEU);
20575 break;
20577 /* Convert a>=0 into (unsigned)a<0x80000000. */
20578 case LT:
20579 case GE:
20580 if (mode == DImode || op1 != const0_rtx)
20581 return false;
20582 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20583 code = (code == LT ? GEU : LTU);
20584 break;
20585 case LE:
20586 case GT:
20587 if (mode == DImode || op1 != constm1_rtx)
20588 return false;
20589 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20590 code = (code == LE ? GEU : LTU);
20591 break;
20593 default:
20594 return false;
20596 /* Swapping operands may cause constant to appear as first operand. */
20597 if (!nonimmediate_operand (op0, VOIDmode))
20599 if (!can_create_pseudo_p ())
20600 return false;
20601 op0 = force_reg (mode, op0);
20603 *pop = ix86_expand_compare (code, op0, op1);
20604 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20605 return true;
20608 bool
20609 ix86_expand_int_movcc (rtx operands[])
20611 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20612 rtx_insn *compare_seq;
20613 rtx compare_op;
20614 machine_mode mode = GET_MODE (operands[0]);
20615 bool sign_bit_compare_p = false;
20616 rtx op0 = XEXP (operands[1], 0);
20617 rtx op1 = XEXP (operands[1], 1);
20619 if (GET_MODE (op0) == TImode
20620 || (GET_MODE (op0) == DImode
20621 && !TARGET_64BIT))
20622 return false;
20624 start_sequence ();
20625 compare_op = ix86_expand_compare (code, op0, op1);
20626 compare_seq = get_insns ();
20627 end_sequence ();
20629 compare_code = GET_CODE (compare_op);
20631 if ((op1 == const0_rtx && (code == GE || code == LT))
20632 || (op1 == constm1_rtx && (code == GT || code == LE)))
20633 sign_bit_compare_p = true;
20635 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20636 HImode insns, we'd be swallowed in word prefix ops. */
20638 if ((mode != HImode || TARGET_FAST_PREFIX)
20639 && (mode != (TARGET_64BIT ? TImode : DImode))
20640 && CONST_INT_P (operands[2])
20641 && CONST_INT_P (operands[3]))
20643 rtx out = operands[0];
20644 HOST_WIDE_INT ct = INTVAL (operands[2]);
20645 HOST_WIDE_INT cf = INTVAL (operands[3]);
20646 HOST_WIDE_INT diff;
20648 diff = ct - cf;
20649 /* Sign bit compares are better done using shifts than we do by using
20650 sbb. */
20651 if (sign_bit_compare_p
20652 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20654 /* Detect overlap between destination and compare sources. */
20655 rtx tmp = out;
20657 if (!sign_bit_compare_p)
20659 rtx flags;
20660 bool fpcmp = false;
20662 compare_code = GET_CODE (compare_op);
20664 flags = XEXP (compare_op, 0);
20666 if (GET_MODE (flags) == CCFPmode
20667 || GET_MODE (flags) == CCFPUmode)
20669 fpcmp = true;
20670 compare_code
20671 = ix86_fp_compare_code_to_integer (compare_code);
20674 /* To simplify rest of code, restrict to the GEU case. */
20675 if (compare_code == LTU)
20677 HOST_WIDE_INT tmp = ct;
20678 ct = cf;
20679 cf = tmp;
20680 compare_code = reverse_condition (compare_code);
20681 code = reverse_condition (code);
20683 else
20685 if (fpcmp)
20686 PUT_CODE (compare_op,
20687 reverse_condition_maybe_unordered
20688 (GET_CODE (compare_op)));
20689 else
20690 PUT_CODE (compare_op,
20691 reverse_condition (GET_CODE (compare_op)));
20693 diff = ct - cf;
20695 if (reg_overlap_mentioned_p (out, op0)
20696 || reg_overlap_mentioned_p (out, op1))
20697 tmp = gen_reg_rtx (mode);
20699 if (mode == DImode)
20700 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20701 else
20702 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20703 flags, compare_op));
20705 else
20707 if (code == GT || code == GE)
20708 code = reverse_condition (code);
20709 else
20711 HOST_WIDE_INT tmp = ct;
20712 ct = cf;
20713 cf = tmp;
20714 diff = ct - cf;
20716 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20719 if (diff == 1)
20722 * cmpl op0,op1
20723 * sbbl dest,dest
20724 * [addl dest, ct]
20726 * Size 5 - 8.
20728 if (ct)
20729 tmp = expand_simple_binop (mode, PLUS,
20730 tmp, GEN_INT (ct),
20731 copy_rtx (tmp), 1, OPTAB_DIRECT);
20733 else if (cf == -1)
20736 * cmpl op0,op1
20737 * sbbl dest,dest
20738 * orl $ct, dest
20740 * Size 8.
20742 tmp = expand_simple_binop (mode, IOR,
20743 tmp, GEN_INT (ct),
20744 copy_rtx (tmp), 1, OPTAB_DIRECT);
20746 else if (diff == -1 && ct)
20749 * cmpl op0,op1
20750 * sbbl dest,dest
20751 * notl dest
20752 * [addl dest, cf]
20754 * Size 8 - 11.
20756 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20757 if (cf)
20758 tmp = expand_simple_binop (mode, PLUS,
20759 copy_rtx (tmp), GEN_INT (cf),
20760 copy_rtx (tmp), 1, OPTAB_DIRECT);
20762 else
20765 * cmpl op0,op1
20766 * sbbl dest,dest
20767 * [notl dest]
20768 * andl cf - ct, dest
20769 * [addl dest, ct]
20771 * Size 8 - 11.
20774 if (cf == 0)
20776 cf = ct;
20777 ct = 0;
20778 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20781 tmp = expand_simple_binop (mode, AND,
20782 copy_rtx (tmp),
20783 gen_int_mode (cf - ct, mode),
20784 copy_rtx (tmp), 1, OPTAB_DIRECT);
20785 if (ct)
20786 tmp = expand_simple_binop (mode, PLUS,
20787 copy_rtx (tmp), GEN_INT (ct),
20788 copy_rtx (tmp), 1, OPTAB_DIRECT);
20791 if (!rtx_equal_p (tmp, out))
20792 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20794 return true;
20797 if (diff < 0)
20799 machine_mode cmp_mode = GET_MODE (op0);
20801 std::swap (ct, cf);
20802 diff = -diff;
20804 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20806 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20808 /* We may be reversing unordered compare to normal compare, that
20809 is not valid in general (we may convert non-trapping condition
20810 to trapping one), however on i386 we currently emit all
20811 comparisons unordered. */
20812 compare_code = reverse_condition_maybe_unordered (compare_code);
20813 code = reverse_condition_maybe_unordered (code);
20815 else
20817 compare_code = reverse_condition (compare_code);
20818 code = reverse_condition (code);
20822 compare_code = UNKNOWN;
20823 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20824 && CONST_INT_P (op1))
20826 if (op1 == const0_rtx
20827 && (code == LT || code == GE))
20828 compare_code = code;
20829 else if (op1 == constm1_rtx)
20831 if (code == LE)
20832 compare_code = LT;
20833 else if (code == GT)
20834 compare_code = GE;
20838 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20839 if (compare_code != UNKNOWN
20840 && GET_MODE (op0) == GET_MODE (out)
20841 && (cf == -1 || ct == -1))
20843 /* If lea code below could be used, only optimize
20844 if it results in a 2 insn sequence. */
20846 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20847 || diff == 3 || diff == 5 || diff == 9)
20848 || (compare_code == LT && ct == -1)
20849 || (compare_code == GE && cf == -1))
20852 * notl op1 (if necessary)
20853 * sarl $31, op1
20854 * orl cf, op1
20856 if (ct != -1)
20858 cf = ct;
20859 ct = -1;
20860 code = reverse_condition (code);
20863 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20865 out = expand_simple_binop (mode, IOR,
20866 out, GEN_INT (cf),
20867 out, 1, OPTAB_DIRECT);
20868 if (out != operands[0])
20869 emit_move_insn (operands[0], out);
20871 return true;
20876 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20877 || diff == 3 || diff == 5 || diff == 9)
20878 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20879 && (mode != DImode
20880 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20883 * xorl dest,dest
20884 * cmpl op1,op2
20885 * setcc dest
20886 * lea cf(dest*(ct-cf)),dest
20888 * Size 14.
20890 * This also catches the degenerate setcc-only case.
20893 rtx tmp;
20894 int nops;
20896 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20898 nops = 0;
20899 /* On x86_64 the lea instruction operates on Pmode, so we need
20900 to get arithmetics done in proper mode to match. */
20901 if (diff == 1)
20902 tmp = copy_rtx (out);
20903 else
20905 rtx out1;
20906 out1 = copy_rtx (out);
20907 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20908 nops++;
20909 if (diff & 1)
20911 tmp = gen_rtx_PLUS (mode, tmp, out1);
20912 nops++;
20915 if (cf != 0)
20917 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20918 nops++;
20920 if (!rtx_equal_p (tmp, out))
20922 if (nops == 1)
20923 out = force_operand (tmp, copy_rtx (out));
20924 else
20925 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20927 if (!rtx_equal_p (out, operands[0]))
20928 emit_move_insn (operands[0], copy_rtx (out));
20930 return true;
20934 * General case: Jumpful:
20935 * xorl dest,dest cmpl op1, op2
20936 * cmpl op1, op2 movl ct, dest
20937 * setcc dest jcc 1f
20938 * decl dest movl cf, dest
20939 * andl (cf-ct),dest 1:
20940 * addl ct,dest
20942 * Size 20. Size 14.
20944 * This is reasonably steep, but branch mispredict costs are
20945 * high on modern cpus, so consider failing only if optimizing
20946 * for space.
20949 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20950 && BRANCH_COST (optimize_insn_for_speed_p (),
20951 false) >= 2)
20953 if (cf == 0)
20955 machine_mode cmp_mode = GET_MODE (op0);
20957 cf = ct;
20958 ct = 0;
20960 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20962 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20964 /* We may be reversing unordered compare to normal compare,
20965 that is not valid in general (we may convert non-trapping
20966 condition to trapping one), however on i386 we currently
20967 emit all comparisons unordered. */
20968 code = reverse_condition_maybe_unordered (code);
20970 else
20972 code = reverse_condition (code);
20973 if (compare_code != UNKNOWN)
20974 compare_code = reverse_condition (compare_code);
20978 if (compare_code != UNKNOWN)
20980 /* notl op1 (if needed)
20981 sarl $31, op1
20982 andl (cf-ct), op1
20983 addl ct, op1
20985 For x < 0 (resp. x <= -1) there will be no notl,
20986 so if possible swap the constants to get rid of the
20987 complement.
20988 True/false will be -1/0 while code below (store flag
20989 followed by decrement) is 0/-1, so the constants need
20990 to be exchanged once more. */
20992 if (compare_code == GE || !cf)
20994 code = reverse_condition (code);
20995 compare_code = LT;
20997 else
20998 std::swap (cf, ct);
21000 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21002 else
21004 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21006 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
21007 constm1_rtx,
21008 copy_rtx (out), 1, OPTAB_DIRECT);
21011 out = expand_simple_binop (mode, AND, copy_rtx (out),
21012 gen_int_mode (cf - ct, mode),
21013 copy_rtx (out), 1, OPTAB_DIRECT);
21014 if (ct)
21015 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
21016 copy_rtx (out), 1, OPTAB_DIRECT);
21017 if (!rtx_equal_p (out, operands[0]))
21018 emit_move_insn (operands[0], copy_rtx (out));
21020 return true;
21024 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21026 /* Try a few things more with specific constants and a variable. */
21028 optab op;
21029 rtx var, orig_out, out, tmp;
21031 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
21032 return false;
21034 /* If one of the two operands is an interesting constant, load a
21035 constant with the above and mask it in with a logical operation. */
21037 if (CONST_INT_P (operands[2]))
21039 var = operands[3];
21040 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
21041 operands[3] = constm1_rtx, op = and_optab;
21042 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
21043 operands[3] = const0_rtx, op = ior_optab;
21044 else
21045 return false;
21047 else if (CONST_INT_P (operands[3]))
21049 var = operands[2];
21050 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
21051 operands[2] = constm1_rtx, op = and_optab;
21052 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
21053 operands[2] = const0_rtx, op = ior_optab;
21054 else
21055 return false;
21057 else
21058 return false;
21060 orig_out = operands[0];
21061 tmp = gen_reg_rtx (mode);
21062 operands[0] = tmp;
21064 /* Recurse to get the constant loaded. */
21065 if (ix86_expand_int_movcc (operands) == 0)
21066 return false;
21068 /* Mask in the interesting variable. */
21069 out = expand_binop (mode, op, var, tmp, orig_out, 0,
21070 OPTAB_WIDEN);
21071 if (!rtx_equal_p (out, orig_out))
21072 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
21074 return true;
21078 * For comparison with above,
21080 * movl cf,dest
21081 * movl ct,tmp
21082 * cmpl op1,op2
21083 * cmovcc tmp,dest
21085 * Size 15.
21088 if (! nonimmediate_operand (operands[2], mode))
21089 operands[2] = force_reg (mode, operands[2]);
21090 if (! nonimmediate_operand (operands[3], mode))
21091 operands[3] = force_reg (mode, operands[3]);
21093 if (! register_operand (operands[2], VOIDmode)
21094 && (mode == QImode
21095 || ! register_operand (operands[3], VOIDmode)))
21096 operands[2] = force_reg (mode, operands[2]);
21098 if (mode == QImode
21099 && ! register_operand (operands[3], VOIDmode))
21100 operands[3] = force_reg (mode, operands[3]);
21102 emit_insn (compare_seq);
21103 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21104 gen_rtx_IF_THEN_ELSE (mode,
21105 compare_op, operands[2],
21106 operands[3])));
21107 return true;
21110 /* Swap, force into registers, or otherwise massage the two operands
21111 to an sse comparison with a mask result. Thus we differ a bit from
21112 ix86_prepare_fp_compare_args which expects to produce a flags result.
21114 The DEST operand exists to help determine whether to commute commutative
21115 operators. The POP0/POP1 operands are updated in place. The new
21116 comparison code is returned, or UNKNOWN if not implementable. */
21118 static enum rtx_code
21119 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
21120 rtx *pop0, rtx *pop1)
21122 switch (code)
21124 case LTGT:
21125 case UNEQ:
21126 /* AVX supports all the needed comparisons. */
21127 if (TARGET_AVX)
21128 break;
21129 /* We have no LTGT as an operator. We could implement it with
21130 NE & ORDERED, but this requires an extra temporary. It's
21131 not clear that it's worth it. */
21132 return UNKNOWN;
21134 case LT:
21135 case LE:
21136 case UNGT:
21137 case UNGE:
21138 /* These are supported directly. */
21139 break;
21141 case EQ:
21142 case NE:
21143 case UNORDERED:
21144 case ORDERED:
21145 /* AVX has 3 operand comparisons, no need to swap anything. */
21146 if (TARGET_AVX)
21147 break;
21148 /* For commutative operators, try to canonicalize the destination
21149 operand to be first in the comparison - this helps reload to
21150 avoid extra moves. */
21151 if (!dest || !rtx_equal_p (dest, *pop1))
21152 break;
21153 /* FALLTHRU */
21155 case GE:
21156 case GT:
21157 case UNLE:
21158 case UNLT:
21159 /* These are not supported directly before AVX, and furthermore
21160 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
21161 comparison operands to transform into something that is
21162 supported. */
21163 std::swap (*pop0, *pop1);
21164 code = swap_condition (code);
21165 break;
21167 default:
21168 gcc_unreachable ();
21171 return code;
21174 /* Detect conditional moves that exactly match min/max operational
21175 semantics. Note that this is IEEE safe, as long as we don't
21176 interchange the operands.
21178 Returns FALSE if this conditional move doesn't match a MIN/MAX,
21179 and TRUE if the operation is successful and instructions are emitted. */
21181 static bool
21182 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
21183 rtx cmp_op1, rtx if_true, rtx if_false)
21185 machine_mode mode;
21186 bool is_min;
21187 rtx tmp;
21189 if (code == LT)
21191 else if (code == UNGE)
21192 std::swap (if_true, if_false);
21193 else
21194 return false;
21196 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21197 is_min = true;
21198 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21199 is_min = false;
21200 else
21201 return false;
21203 mode = GET_MODE (dest);
21205 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21206 but MODE may be a vector mode and thus not appropriate. */
21207 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21209 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21210 rtvec v;
21212 if_true = force_reg (mode, if_true);
21213 v = gen_rtvec (2, if_true, if_false);
21214 tmp = gen_rtx_UNSPEC (mode, v, u);
21216 else
21218 code = is_min ? SMIN : SMAX;
21219 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21222 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
21223 return true;
21226 /* Expand an sse vector comparison. Return the register with the result. */
21228 static rtx
21229 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21230 rtx op_true, rtx op_false)
21232 machine_mode mode = GET_MODE (dest);
21233 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21235 /* In general case result of comparison can differ from operands' type. */
21236 machine_mode cmp_mode;
21238 /* In AVX512F the result of comparison is an integer mask. */
21239 bool maskcmp = false;
21240 rtx x;
21242 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21244 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21245 gcc_assert (cmp_mode != BLKmode);
21247 maskcmp = true;
21249 else
21250 cmp_mode = cmp_ops_mode;
21253 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21254 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21255 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21257 if (optimize
21258 || reg_overlap_mentioned_p (dest, op_true)
21259 || reg_overlap_mentioned_p (dest, op_false))
21260 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21262 /* Compare patterns for int modes are unspec in AVX512F only. */
21263 if (maskcmp && (code == GT || code == EQ))
21265 rtx (*gen)(rtx, rtx, rtx);
21267 switch (cmp_ops_mode)
21269 case V16SImode:
21270 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21271 break;
21272 case V8DImode:
21273 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21274 break;
21275 default:
21276 gen = NULL;
21279 if (gen)
21281 emit_insn (gen (dest, cmp_op0, cmp_op1));
21282 return dest;
21285 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21287 if (cmp_mode != mode && !maskcmp)
21289 x = force_reg (cmp_ops_mode, x);
21290 convert_move (dest, x, false);
21292 else
21293 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21295 return dest;
21298 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21299 operations. This is used for both scalar and vector conditional moves. */
21301 static void
21302 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21304 machine_mode mode = GET_MODE (dest);
21305 machine_mode cmpmode = GET_MODE (cmp);
21307 /* In AVX512F the result of comparison is an integer mask. */
21308 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21310 rtx t2, t3, x;
21312 if (vector_all_ones_operand (op_true, mode)
21313 && rtx_equal_p (op_false, CONST0_RTX (mode))
21314 && !maskcmp)
21316 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
21318 else if (op_false == CONST0_RTX (mode)
21319 && !maskcmp)
21321 op_true = force_reg (mode, op_true);
21322 x = gen_rtx_AND (mode, cmp, op_true);
21323 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21325 else if (op_true == CONST0_RTX (mode)
21326 && !maskcmp)
21328 op_false = force_reg (mode, op_false);
21329 x = gen_rtx_NOT (mode, cmp);
21330 x = gen_rtx_AND (mode, x, op_false);
21331 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21333 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21334 && !maskcmp)
21336 op_false = force_reg (mode, op_false);
21337 x = gen_rtx_IOR (mode, cmp, op_false);
21338 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21340 else if (TARGET_XOP
21341 && !maskcmp)
21343 op_true = force_reg (mode, op_true);
21345 if (!nonimmediate_operand (op_false, mode))
21346 op_false = force_reg (mode, op_false);
21348 emit_insn (gen_rtx_SET (mode, dest,
21349 gen_rtx_IF_THEN_ELSE (mode, cmp,
21350 op_true,
21351 op_false)));
21353 else
21355 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21356 rtx d = dest;
21358 if (!nonimmediate_operand (op_true, mode))
21359 op_true = force_reg (mode, op_true);
21361 op_false = force_reg (mode, op_false);
21363 switch (mode)
21365 case V4SFmode:
21366 if (TARGET_SSE4_1)
21367 gen = gen_sse4_1_blendvps;
21368 break;
21369 case V2DFmode:
21370 if (TARGET_SSE4_1)
21371 gen = gen_sse4_1_blendvpd;
21372 break;
21373 case V16QImode:
21374 case V8HImode:
21375 case V4SImode:
21376 case V2DImode:
21377 if (TARGET_SSE4_1)
21379 gen = gen_sse4_1_pblendvb;
21380 if (mode != V16QImode)
21381 d = gen_reg_rtx (V16QImode);
21382 op_false = gen_lowpart (V16QImode, op_false);
21383 op_true = gen_lowpart (V16QImode, op_true);
21384 cmp = gen_lowpart (V16QImode, cmp);
21386 break;
21387 case V8SFmode:
21388 if (TARGET_AVX)
21389 gen = gen_avx_blendvps256;
21390 break;
21391 case V4DFmode:
21392 if (TARGET_AVX)
21393 gen = gen_avx_blendvpd256;
21394 break;
21395 case V32QImode:
21396 case V16HImode:
21397 case V8SImode:
21398 case V4DImode:
21399 if (TARGET_AVX2)
21401 gen = gen_avx2_pblendvb;
21402 if (mode != V32QImode)
21403 d = gen_reg_rtx (V32QImode);
21404 op_false = gen_lowpart (V32QImode, op_false);
21405 op_true = gen_lowpart (V32QImode, op_true);
21406 cmp = gen_lowpart (V32QImode, cmp);
21408 break;
21410 case V64QImode:
21411 gen = gen_avx512bw_blendmv64qi;
21412 break;
21413 case V32HImode:
21414 gen = gen_avx512bw_blendmv32hi;
21415 break;
21416 case V16SImode:
21417 gen = gen_avx512f_blendmv16si;
21418 break;
21419 case V8DImode:
21420 gen = gen_avx512f_blendmv8di;
21421 break;
21422 case V8DFmode:
21423 gen = gen_avx512f_blendmv8df;
21424 break;
21425 case V16SFmode:
21426 gen = gen_avx512f_blendmv16sf;
21427 break;
21429 default:
21430 break;
21433 if (gen != NULL)
21435 emit_insn (gen (d, op_false, op_true, cmp));
21436 if (d != dest)
21437 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21439 else
21441 op_true = force_reg (mode, op_true);
21443 t2 = gen_reg_rtx (mode);
21444 if (optimize)
21445 t3 = gen_reg_rtx (mode);
21446 else
21447 t3 = dest;
21449 x = gen_rtx_AND (mode, op_true, cmp);
21450 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21452 x = gen_rtx_NOT (mode, cmp);
21453 x = gen_rtx_AND (mode, x, op_false);
21454 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21456 x = gen_rtx_IOR (mode, t3, t2);
21457 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21462 /* Expand a floating-point conditional move. Return true if successful. */
21464 bool
21465 ix86_expand_fp_movcc (rtx operands[])
21467 machine_mode mode = GET_MODE (operands[0]);
21468 enum rtx_code code = GET_CODE (operands[1]);
21469 rtx tmp, compare_op;
21470 rtx op0 = XEXP (operands[1], 0);
21471 rtx op1 = XEXP (operands[1], 1);
21473 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21475 machine_mode cmode;
21477 /* Since we've no cmove for sse registers, don't force bad register
21478 allocation just to gain access to it. Deny movcc when the
21479 comparison mode doesn't match the move mode. */
21480 cmode = GET_MODE (op0);
21481 if (cmode == VOIDmode)
21482 cmode = GET_MODE (op1);
21483 if (cmode != mode)
21484 return false;
21486 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21487 if (code == UNKNOWN)
21488 return false;
21490 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21491 operands[2], operands[3]))
21492 return true;
21494 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21495 operands[2], operands[3]);
21496 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21497 return true;
21500 if (GET_MODE (op0) == TImode
21501 || (GET_MODE (op0) == DImode
21502 && !TARGET_64BIT))
21503 return false;
21505 /* The floating point conditional move instructions don't directly
21506 support conditions resulting from a signed integer comparison. */
21508 compare_op = ix86_expand_compare (code, op0, op1);
21509 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21511 tmp = gen_reg_rtx (QImode);
21512 ix86_expand_setcc (tmp, code, op0, op1);
21514 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21517 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21518 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21519 operands[2], operands[3])));
21521 return true;
21524 /* Expand a floating-point vector conditional move; a vcond operation
21525 rather than a movcc operation. */
21527 bool
21528 ix86_expand_fp_vcond (rtx operands[])
21530 enum rtx_code code = GET_CODE (operands[3]);
21531 rtx cmp;
21533 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21534 &operands[4], &operands[5]);
21535 if (code == UNKNOWN)
21537 rtx temp;
21538 switch (GET_CODE (operands[3]))
21540 case LTGT:
21541 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21542 operands[5], operands[0], operands[0]);
21543 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21544 operands[5], operands[1], operands[2]);
21545 code = AND;
21546 break;
21547 case UNEQ:
21548 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21549 operands[5], operands[0], operands[0]);
21550 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21551 operands[5], operands[1], operands[2]);
21552 code = IOR;
21553 break;
21554 default:
21555 gcc_unreachable ();
21557 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21558 OPTAB_DIRECT);
21559 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21560 return true;
21563 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21564 operands[5], operands[1], operands[2]))
21565 return true;
21567 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21568 operands[1], operands[2]);
21569 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21570 return true;
21573 /* Expand a signed/unsigned integral vector conditional move. */
21575 bool
21576 ix86_expand_int_vcond (rtx operands[])
21578 machine_mode data_mode = GET_MODE (operands[0]);
21579 machine_mode mode = GET_MODE (operands[4]);
21580 enum rtx_code code = GET_CODE (operands[3]);
21581 bool negate = false;
21582 rtx x, cop0, cop1;
21584 cop0 = operands[4];
21585 cop1 = operands[5];
21587 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21588 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21589 if ((code == LT || code == GE)
21590 && data_mode == mode
21591 && cop1 == CONST0_RTX (mode)
21592 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21593 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21594 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21595 && (GET_MODE_SIZE (data_mode) == 16
21596 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21598 rtx negop = operands[2 - (code == LT)];
21599 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21600 if (negop == CONST1_RTX (data_mode))
21602 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21603 operands[0], 1, OPTAB_DIRECT);
21604 if (res != operands[0])
21605 emit_move_insn (operands[0], res);
21606 return true;
21608 else if (GET_MODE_INNER (data_mode) != DImode
21609 && vector_all_ones_operand (negop, data_mode))
21611 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21612 operands[0], 0, OPTAB_DIRECT);
21613 if (res != operands[0])
21614 emit_move_insn (operands[0], res);
21615 return true;
21619 if (!nonimmediate_operand (cop1, mode))
21620 cop1 = force_reg (mode, cop1);
21621 if (!general_operand (operands[1], data_mode))
21622 operands[1] = force_reg (data_mode, operands[1]);
21623 if (!general_operand (operands[2], data_mode))
21624 operands[2] = force_reg (data_mode, operands[2]);
21626 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21627 if (TARGET_XOP
21628 && (mode == V16QImode || mode == V8HImode
21629 || mode == V4SImode || mode == V2DImode))
21631 else
21633 /* Canonicalize the comparison to EQ, GT, GTU. */
21634 switch (code)
21636 case EQ:
21637 case GT:
21638 case GTU:
21639 break;
21641 case NE:
21642 case LE:
21643 case LEU:
21644 code = reverse_condition (code);
21645 negate = true;
21646 break;
21648 case GE:
21649 case GEU:
21650 code = reverse_condition (code);
21651 negate = true;
21652 /* FALLTHRU */
21654 case LT:
21655 case LTU:
21656 std::swap (cop0, cop1);
21657 code = swap_condition (code);
21658 break;
21660 default:
21661 gcc_unreachable ();
21664 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21665 if (mode == V2DImode)
21667 switch (code)
21669 case EQ:
21670 /* SSE4.1 supports EQ. */
21671 if (!TARGET_SSE4_1)
21672 return false;
21673 break;
21675 case GT:
21676 case GTU:
21677 /* SSE4.2 supports GT/GTU. */
21678 if (!TARGET_SSE4_2)
21679 return false;
21680 break;
21682 default:
21683 gcc_unreachable ();
21687 /* Unsigned parallel compare is not supported by the hardware.
21688 Play some tricks to turn this into a signed comparison
21689 against 0. */
21690 if (code == GTU)
21692 cop0 = force_reg (mode, cop0);
21694 switch (mode)
21696 case V16SImode:
21697 case V8DImode:
21698 case V8SImode:
21699 case V4DImode:
21700 case V4SImode:
21701 case V2DImode:
21703 rtx t1, t2, mask;
21704 rtx (*gen_sub3) (rtx, rtx, rtx);
21706 switch (mode)
21708 case V16SImode: gen_sub3 = gen_subv16si3; break;
21709 case V8DImode: gen_sub3 = gen_subv8di3; break;
21710 case V8SImode: gen_sub3 = gen_subv8si3; break;
21711 case V4DImode: gen_sub3 = gen_subv4di3; break;
21712 case V4SImode: gen_sub3 = gen_subv4si3; break;
21713 case V2DImode: gen_sub3 = gen_subv2di3; break;
21714 default:
21715 gcc_unreachable ();
21717 /* Subtract (-(INT MAX) - 1) from both operands to make
21718 them signed. */
21719 mask = ix86_build_signbit_mask (mode, true, false);
21720 t1 = gen_reg_rtx (mode);
21721 emit_insn (gen_sub3 (t1, cop0, mask));
21723 t2 = gen_reg_rtx (mode);
21724 emit_insn (gen_sub3 (t2, cop1, mask));
21726 cop0 = t1;
21727 cop1 = t2;
21728 code = GT;
21730 break;
21732 case V64QImode:
21733 case V32HImode:
21734 case V32QImode:
21735 case V16HImode:
21736 case V16QImode:
21737 case V8HImode:
21738 /* Perform a parallel unsigned saturating subtraction. */
21739 x = gen_reg_rtx (mode);
21740 emit_insn (gen_rtx_SET (VOIDmode, x,
21741 gen_rtx_US_MINUS (mode, cop0, cop1)));
21743 cop0 = x;
21744 cop1 = CONST0_RTX (mode);
21745 code = EQ;
21746 negate = !negate;
21747 break;
21749 default:
21750 gcc_unreachable ();
21755 /* Allow the comparison to be done in one mode, but the movcc to
21756 happen in another mode. */
21757 if (data_mode == mode)
21759 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21760 operands[1+negate], operands[2-negate]);
21762 else
21764 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21765 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21766 operands[1+negate], operands[2-negate]);
21767 if (GET_MODE (x) == mode)
21768 x = gen_lowpart (data_mode, x);
21771 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21772 operands[2-negate]);
21773 return true;
21776 /* AVX512F does support 64-byte integer vector operations,
21777 thus the longest vector we are faced with is V64QImode. */
21778 #define MAX_VECT_LEN 64
21780 struct expand_vec_perm_d
21782 rtx target, op0, op1;
21783 unsigned char perm[MAX_VECT_LEN];
21784 machine_mode vmode;
21785 unsigned char nelt;
21786 bool one_operand_p;
21787 bool testing_p;
21790 static bool
21791 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
21792 struct expand_vec_perm_d *d)
21794 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21795 expander, so args are either in d, or in op0, op1 etc. */
21796 machine_mode mode = GET_MODE (d ? d->op0 : op0);
21797 machine_mode maskmode = mode;
21798 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21800 switch (mode)
21802 case V8HImode:
21803 if (TARGET_AVX512VL && TARGET_AVX512BW)
21804 gen = gen_avx512vl_vpermi2varv8hi3;
21805 break;
21806 case V16HImode:
21807 if (TARGET_AVX512VL && TARGET_AVX512BW)
21808 gen = gen_avx512vl_vpermi2varv16hi3;
21809 break;
21810 case V32HImode:
21811 if (TARGET_AVX512BW)
21812 gen = gen_avx512bw_vpermi2varv32hi3;
21813 break;
21814 case V4SImode:
21815 if (TARGET_AVX512VL)
21816 gen = gen_avx512vl_vpermi2varv4si3;
21817 break;
21818 case V8SImode:
21819 if (TARGET_AVX512VL)
21820 gen = gen_avx512vl_vpermi2varv8si3;
21821 break;
21822 case V16SImode:
21823 if (TARGET_AVX512F)
21824 gen = gen_avx512f_vpermi2varv16si3;
21825 break;
21826 case V4SFmode:
21827 if (TARGET_AVX512VL)
21829 gen = gen_avx512vl_vpermi2varv4sf3;
21830 maskmode = V4SImode;
21832 break;
21833 case V8SFmode:
21834 if (TARGET_AVX512VL)
21836 gen = gen_avx512vl_vpermi2varv8sf3;
21837 maskmode = V8SImode;
21839 break;
21840 case V16SFmode:
21841 if (TARGET_AVX512F)
21843 gen = gen_avx512f_vpermi2varv16sf3;
21844 maskmode = V16SImode;
21846 break;
21847 case V2DImode:
21848 if (TARGET_AVX512VL)
21849 gen = gen_avx512vl_vpermi2varv2di3;
21850 break;
21851 case V4DImode:
21852 if (TARGET_AVX512VL)
21853 gen = gen_avx512vl_vpermi2varv4di3;
21854 break;
21855 case V8DImode:
21856 if (TARGET_AVX512F)
21857 gen = gen_avx512f_vpermi2varv8di3;
21858 break;
21859 case V2DFmode:
21860 if (TARGET_AVX512VL)
21862 gen = gen_avx512vl_vpermi2varv2df3;
21863 maskmode = V2DImode;
21865 break;
21866 case V4DFmode:
21867 if (TARGET_AVX512VL)
21869 gen = gen_avx512vl_vpermi2varv4df3;
21870 maskmode = V4DImode;
21872 break;
21873 case V8DFmode:
21874 if (TARGET_AVX512F)
21876 gen = gen_avx512f_vpermi2varv8df3;
21877 maskmode = V8DImode;
21879 break;
21880 default:
21881 break;
21884 if (gen == NULL)
21885 return false;
21887 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21888 expander, so args are either in d, or in op0, op1 etc. */
21889 if (d)
21891 rtx vec[64];
21892 target = d->target;
21893 op0 = d->op0;
21894 op1 = d->op1;
21895 for (int i = 0; i < d->nelt; ++i)
21896 vec[i] = GEN_INT (d->perm[i]);
21897 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
21900 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
21901 return true;
21904 /* Expand a variable vector permutation. */
21906 void
21907 ix86_expand_vec_perm (rtx operands[])
21909 rtx target = operands[0];
21910 rtx op0 = operands[1];
21911 rtx op1 = operands[2];
21912 rtx mask = operands[3];
21913 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21914 machine_mode mode = GET_MODE (op0);
21915 machine_mode maskmode = GET_MODE (mask);
21916 int w, e, i;
21917 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21919 /* Number of elements in the vector. */
21920 w = GET_MODE_NUNITS (mode);
21921 e = GET_MODE_UNIT_SIZE (mode);
21922 gcc_assert (w <= 64);
21924 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
21925 return;
21927 if (TARGET_AVX2)
21929 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21931 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21932 an constant shuffle operand. With a tiny bit of effort we can
21933 use VPERMD instead. A re-interpretation stall for V4DFmode is
21934 unfortunate but there's no avoiding it.
21935 Similarly for V16HImode we don't have instructions for variable
21936 shuffling, while for V32QImode we can use after preparing suitable
21937 masks vpshufb; vpshufb; vpermq; vpor. */
21939 if (mode == V16HImode)
21941 maskmode = mode = V32QImode;
21942 w = 32;
21943 e = 1;
21945 else
21947 maskmode = mode = V8SImode;
21948 w = 8;
21949 e = 4;
21951 t1 = gen_reg_rtx (maskmode);
21953 /* Replicate the low bits of the V4DImode mask into V8SImode:
21954 mask = { A B C D }
21955 t1 = { A A B B C C D D }. */
21956 for (i = 0; i < w / 2; ++i)
21957 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21958 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21959 vt = force_reg (maskmode, vt);
21960 mask = gen_lowpart (maskmode, mask);
21961 if (maskmode == V8SImode)
21962 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21963 else
21964 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21966 /* Multiply the shuffle indicies by two. */
21967 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21968 OPTAB_DIRECT);
21970 /* Add one to the odd shuffle indicies:
21971 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21972 for (i = 0; i < w / 2; ++i)
21974 vec[i * 2] = const0_rtx;
21975 vec[i * 2 + 1] = const1_rtx;
21977 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21978 vt = validize_mem (force_const_mem (maskmode, vt));
21979 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21980 OPTAB_DIRECT);
21982 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21983 operands[3] = mask = t1;
21984 target = gen_reg_rtx (mode);
21985 op0 = gen_lowpart (mode, op0);
21986 op1 = gen_lowpart (mode, op1);
21989 switch (mode)
21991 case V8SImode:
21992 /* The VPERMD and VPERMPS instructions already properly ignore
21993 the high bits of the shuffle elements. No need for us to
21994 perform an AND ourselves. */
21995 if (one_operand_shuffle)
21997 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21998 if (target != operands[0])
21999 emit_move_insn (operands[0],
22000 gen_lowpart (GET_MODE (operands[0]), target));
22002 else
22004 t1 = gen_reg_rtx (V8SImode);
22005 t2 = gen_reg_rtx (V8SImode);
22006 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
22007 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
22008 goto merge_two;
22010 return;
22012 case V8SFmode:
22013 mask = gen_lowpart (V8SImode, mask);
22014 if (one_operand_shuffle)
22015 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
22016 else
22018 t1 = gen_reg_rtx (V8SFmode);
22019 t2 = gen_reg_rtx (V8SFmode);
22020 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
22021 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
22022 goto merge_two;
22024 return;
22026 case V4SImode:
22027 /* By combining the two 128-bit input vectors into one 256-bit
22028 input vector, we can use VPERMD and VPERMPS for the full
22029 two-operand shuffle. */
22030 t1 = gen_reg_rtx (V8SImode);
22031 t2 = gen_reg_rtx (V8SImode);
22032 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
22033 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22034 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
22035 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
22036 return;
22038 case V4SFmode:
22039 t1 = gen_reg_rtx (V8SFmode);
22040 t2 = gen_reg_rtx (V8SImode);
22041 mask = gen_lowpart (V4SImode, mask);
22042 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
22043 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22044 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
22045 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
22046 return;
22048 case V32QImode:
22049 t1 = gen_reg_rtx (V32QImode);
22050 t2 = gen_reg_rtx (V32QImode);
22051 t3 = gen_reg_rtx (V32QImode);
22052 vt2 = GEN_INT (-128);
22053 for (i = 0; i < 32; i++)
22054 vec[i] = vt2;
22055 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22056 vt = force_reg (V32QImode, vt);
22057 for (i = 0; i < 32; i++)
22058 vec[i] = i < 16 ? vt2 : const0_rtx;
22059 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22060 vt2 = force_reg (V32QImode, vt2);
22061 /* From mask create two adjusted masks, which contain the same
22062 bits as mask in the low 7 bits of each vector element.
22063 The first mask will have the most significant bit clear
22064 if it requests element from the same 128-bit lane
22065 and MSB set if it requests element from the other 128-bit lane.
22066 The second mask will have the opposite values of the MSB,
22067 and additionally will have its 128-bit lanes swapped.
22068 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
22069 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
22070 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
22071 stands for other 12 bytes. */
22072 /* The bit whether element is from the same lane or the other
22073 lane is bit 4, so shift it up by 3 to the MSB position. */
22074 t5 = gen_reg_rtx (V4DImode);
22075 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
22076 GEN_INT (3)));
22077 /* Clear MSB bits from the mask just in case it had them set. */
22078 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
22079 /* After this t1 will have MSB set for elements from other lane. */
22080 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
22081 /* Clear bits other than MSB. */
22082 emit_insn (gen_andv32qi3 (t1, t1, vt));
22083 /* Or in the lower bits from mask into t3. */
22084 emit_insn (gen_iorv32qi3 (t3, t1, t2));
22085 /* And invert MSB bits in t1, so MSB is set for elements from the same
22086 lane. */
22087 emit_insn (gen_xorv32qi3 (t1, t1, vt));
22088 /* Swap 128-bit lanes in t3. */
22089 t6 = gen_reg_rtx (V4DImode);
22090 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
22091 const2_rtx, GEN_INT (3),
22092 const0_rtx, const1_rtx));
22093 /* And or in the lower bits from mask into t1. */
22094 emit_insn (gen_iorv32qi3 (t1, t1, t2));
22095 if (one_operand_shuffle)
22097 /* Each of these shuffles will put 0s in places where
22098 element from the other 128-bit lane is needed, otherwise
22099 will shuffle in the requested value. */
22100 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
22101 gen_lowpart (V32QImode, t6)));
22102 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
22103 /* For t3 the 128-bit lanes are swapped again. */
22104 t7 = gen_reg_rtx (V4DImode);
22105 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
22106 const2_rtx, GEN_INT (3),
22107 const0_rtx, const1_rtx));
22108 /* And oring both together leads to the result. */
22109 emit_insn (gen_iorv32qi3 (target, t1,
22110 gen_lowpart (V32QImode, t7)));
22111 if (target != operands[0])
22112 emit_move_insn (operands[0],
22113 gen_lowpart (GET_MODE (operands[0]), target));
22114 return;
22117 t4 = gen_reg_rtx (V32QImode);
22118 /* Similarly to the above one_operand_shuffle code,
22119 just for repeated twice for each operand. merge_two:
22120 code will merge the two results together. */
22121 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
22122 gen_lowpart (V32QImode, t6)));
22123 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
22124 gen_lowpart (V32QImode, t6)));
22125 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
22126 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
22127 t7 = gen_reg_rtx (V4DImode);
22128 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
22129 const2_rtx, GEN_INT (3),
22130 const0_rtx, const1_rtx));
22131 t8 = gen_reg_rtx (V4DImode);
22132 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
22133 const2_rtx, GEN_INT (3),
22134 const0_rtx, const1_rtx));
22135 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
22136 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
22137 t1 = t4;
22138 t2 = t3;
22139 goto merge_two;
22141 default:
22142 gcc_assert (GET_MODE_SIZE (mode) <= 16);
22143 break;
22147 if (TARGET_XOP)
22149 /* The XOP VPPERM insn supports three inputs. By ignoring the
22150 one_operand_shuffle special case, we avoid creating another
22151 set of constant vectors in memory. */
22152 one_operand_shuffle = false;
22154 /* mask = mask & {2*w-1, ...} */
22155 vt = GEN_INT (2*w - 1);
22157 else
22159 /* mask = mask & {w-1, ...} */
22160 vt = GEN_INT (w - 1);
22163 for (i = 0; i < w; i++)
22164 vec[i] = vt;
22165 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22166 mask = expand_simple_binop (maskmode, AND, mask, vt,
22167 NULL_RTX, 0, OPTAB_DIRECT);
22169 /* For non-QImode operations, convert the word permutation control
22170 into a byte permutation control. */
22171 if (mode != V16QImode)
22173 mask = expand_simple_binop (maskmode, ASHIFT, mask,
22174 GEN_INT (exact_log2 (e)),
22175 NULL_RTX, 0, OPTAB_DIRECT);
22177 /* Convert mask to vector of chars. */
22178 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
22180 /* Replicate each of the input bytes into byte positions:
22181 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
22182 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
22183 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
22184 for (i = 0; i < 16; ++i)
22185 vec[i] = GEN_INT (i/e * e);
22186 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22187 vt = validize_mem (force_const_mem (V16QImode, vt));
22188 if (TARGET_XOP)
22189 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
22190 else
22191 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
22193 /* Convert it into the byte positions by doing
22194 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
22195 for (i = 0; i < 16; ++i)
22196 vec[i] = GEN_INT (i % e);
22197 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22198 vt = validize_mem (force_const_mem (V16QImode, vt));
22199 emit_insn (gen_addv16qi3 (mask, mask, vt));
22202 /* The actual shuffle operations all operate on V16QImode. */
22203 op0 = gen_lowpart (V16QImode, op0);
22204 op1 = gen_lowpart (V16QImode, op1);
22206 if (TARGET_XOP)
22208 if (GET_MODE (target) != V16QImode)
22209 target = gen_reg_rtx (V16QImode);
22210 emit_insn (gen_xop_pperm (target, op0, op1, mask));
22211 if (target != operands[0])
22212 emit_move_insn (operands[0],
22213 gen_lowpart (GET_MODE (operands[0]), target));
22215 else if (one_operand_shuffle)
22217 if (GET_MODE (target) != V16QImode)
22218 target = gen_reg_rtx (V16QImode);
22219 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
22220 if (target != operands[0])
22221 emit_move_insn (operands[0],
22222 gen_lowpart (GET_MODE (operands[0]), target));
22224 else
22226 rtx xops[6];
22227 bool ok;
22229 /* Shuffle the two input vectors independently. */
22230 t1 = gen_reg_rtx (V16QImode);
22231 t2 = gen_reg_rtx (V16QImode);
22232 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
22233 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
22235 merge_two:
22236 /* Then merge them together. The key is whether any given control
22237 element contained a bit set that indicates the second word. */
22238 mask = operands[3];
22239 vt = GEN_INT (w);
22240 if (maskmode == V2DImode && !TARGET_SSE4_1)
22242 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
22243 more shuffle to convert the V2DI input mask into a V4SI
22244 input mask. At which point the masking that expand_int_vcond
22245 will work as desired. */
22246 rtx t3 = gen_reg_rtx (V4SImode);
22247 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
22248 const0_rtx, const0_rtx,
22249 const2_rtx, const2_rtx));
22250 mask = t3;
22251 maskmode = V4SImode;
22252 e = w = 4;
22255 for (i = 0; i < w; i++)
22256 vec[i] = vt;
22257 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22258 vt = force_reg (maskmode, vt);
22259 mask = expand_simple_binop (maskmode, AND, mask, vt,
22260 NULL_RTX, 0, OPTAB_DIRECT);
22262 if (GET_MODE (target) != mode)
22263 target = gen_reg_rtx (mode);
22264 xops[0] = target;
22265 xops[1] = gen_lowpart (mode, t2);
22266 xops[2] = gen_lowpart (mode, t1);
22267 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
22268 xops[4] = mask;
22269 xops[5] = vt;
22270 ok = ix86_expand_int_vcond (xops);
22271 gcc_assert (ok);
22272 if (target != operands[0])
22273 emit_move_insn (operands[0],
22274 gen_lowpart (GET_MODE (operands[0]), target));
22278 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
22279 true if we should do zero extension, else sign extension. HIGH_P is
22280 true if we want the N/2 high elements, else the low elements. */
22282 void
22283 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
22285 machine_mode imode = GET_MODE (src);
22286 rtx tmp;
22288 if (TARGET_SSE4_1)
22290 rtx (*unpack)(rtx, rtx);
22291 rtx (*extract)(rtx, rtx) = NULL;
22292 machine_mode halfmode = BLKmode;
22294 switch (imode)
22296 case V64QImode:
22297 if (unsigned_p)
22298 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
22299 else
22300 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
22301 halfmode = V32QImode;
22302 extract
22303 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
22304 break;
22305 case V32QImode:
22306 if (unsigned_p)
22307 unpack = gen_avx2_zero_extendv16qiv16hi2;
22308 else
22309 unpack = gen_avx2_sign_extendv16qiv16hi2;
22310 halfmode = V16QImode;
22311 extract
22312 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22313 break;
22314 case V32HImode:
22315 if (unsigned_p)
22316 unpack = gen_avx512f_zero_extendv16hiv16si2;
22317 else
22318 unpack = gen_avx512f_sign_extendv16hiv16si2;
22319 halfmode = V16HImode;
22320 extract
22321 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22322 break;
22323 case V16HImode:
22324 if (unsigned_p)
22325 unpack = gen_avx2_zero_extendv8hiv8si2;
22326 else
22327 unpack = gen_avx2_sign_extendv8hiv8si2;
22328 halfmode = V8HImode;
22329 extract
22330 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22331 break;
22332 case V16SImode:
22333 if (unsigned_p)
22334 unpack = gen_avx512f_zero_extendv8siv8di2;
22335 else
22336 unpack = gen_avx512f_sign_extendv8siv8di2;
22337 halfmode = V8SImode;
22338 extract
22339 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22340 break;
22341 case V8SImode:
22342 if (unsigned_p)
22343 unpack = gen_avx2_zero_extendv4siv4di2;
22344 else
22345 unpack = gen_avx2_sign_extendv4siv4di2;
22346 halfmode = V4SImode;
22347 extract
22348 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22349 break;
22350 case V16QImode:
22351 if (unsigned_p)
22352 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22353 else
22354 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22355 break;
22356 case V8HImode:
22357 if (unsigned_p)
22358 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22359 else
22360 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22361 break;
22362 case V4SImode:
22363 if (unsigned_p)
22364 unpack = gen_sse4_1_zero_extendv2siv2di2;
22365 else
22366 unpack = gen_sse4_1_sign_extendv2siv2di2;
22367 break;
22368 default:
22369 gcc_unreachable ();
22372 if (GET_MODE_SIZE (imode) >= 32)
22374 tmp = gen_reg_rtx (halfmode);
22375 emit_insn (extract (tmp, src));
22377 else if (high_p)
22379 /* Shift higher 8 bytes to lower 8 bytes. */
22380 tmp = gen_reg_rtx (V1TImode);
22381 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22382 GEN_INT (64)));
22383 tmp = gen_lowpart (imode, tmp);
22385 else
22386 tmp = src;
22388 emit_insn (unpack (dest, tmp));
22390 else
22392 rtx (*unpack)(rtx, rtx, rtx);
22394 switch (imode)
22396 case V16QImode:
22397 if (high_p)
22398 unpack = gen_vec_interleave_highv16qi;
22399 else
22400 unpack = gen_vec_interleave_lowv16qi;
22401 break;
22402 case V8HImode:
22403 if (high_p)
22404 unpack = gen_vec_interleave_highv8hi;
22405 else
22406 unpack = gen_vec_interleave_lowv8hi;
22407 break;
22408 case V4SImode:
22409 if (high_p)
22410 unpack = gen_vec_interleave_highv4si;
22411 else
22412 unpack = gen_vec_interleave_lowv4si;
22413 break;
22414 default:
22415 gcc_unreachable ();
22418 if (unsigned_p)
22419 tmp = force_reg (imode, CONST0_RTX (imode));
22420 else
22421 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22422 src, pc_rtx, pc_rtx);
22424 rtx tmp2 = gen_reg_rtx (imode);
22425 emit_insn (unpack (tmp2, src, tmp));
22426 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22430 /* Expand conditional increment or decrement using adb/sbb instructions.
22431 The default case using setcc followed by the conditional move can be
22432 done by generic code. */
22433 bool
22434 ix86_expand_int_addcc (rtx operands[])
22436 enum rtx_code code = GET_CODE (operands[1]);
22437 rtx flags;
22438 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22439 rtx compare_op;
22440 rtx val = const0_rtx;
22441 bool fpcmp = false;
22442 machine_mode mode;
22443 rtx op0 = XEXP (operands[1], 0);
22444 rtx op1 = XEXP (operands[1], 1);
22446 if (operands[3] != const1_rtx
22447 && operands[3] != constm1_rtx)
22448 return false;
22449 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22450 return false;
22451 code = GET_CODE (compare_op);
22453 flags = XEXP (compare_op, 0);
22455 if (GET_MODE (flags) == CCFPmode
22456 || GET_MODE (flags) == CCFPUmode)
22458 fpcmp = true;
22459 code = ix86_fp_compare_code_to_integer (code);
22462 if (code != LTU)
22464 val = constm1_rtx;
22465 if (fpcmp)
22466 PUT_CODE (compare_op,
22467 reverse_condition_maybe_unordered
22468 (GET_CODE (compare_op)));
22469 else
22470 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22473 mode = GET_MODE (operands[0]);
22475 /* Construct either adc or sbb insn. */
22476 if ((code == LTU) == (operands[3] == constm1_rtx))
22478 switch (mode)
22480 case QImode:
22481 insn = gen_subqi3_carry;
22482 break;
22483 case HImode:
22484 insn = gen_subhi3_carry;
22485 break;
22486 case SImode:
22487 insn = gen_subsi3_carry;
22488 break;
22489 case DImode:
22490 insn = gen_subdi3_carry;
22491 break;
22492 default:
22493 gcc_unreachable ();
22496 else
22498 switch (mode)
22500 case QImode:
22501 insn = gen_addqi3_carry;
22502 break;
22503 case HImode:
22504 insn = gen_addhi3_carry;
22505 break;
22506 case SImode:
22507 insn = gen_addsi3_carry;
22508 break;
22509 case DImode:
22510 insn = gen_adddi3_carry;
22511 break;
22512 default:
22513 gcc_unreachable ();
22516 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22518 return true;
22522 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22523 but works for floating pointer parameters and nonoffsetable memories.
22524 For pushes, it returns just stack offsets; the values will be saved
22525 in the right order. Maximally three parts are generated. */
22527 static int
22528 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
22530 int size;
22532 if (!TARGET_64BIT)
22533 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22534 else
22535 size = (GET_MODE_SIZE (mode) + 4) / 8;
22537 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22538 gcc_assert (size >= 2 && size <= 4);
22540 /* Optimize constant pool reference to immediates. This is used by fp
22541 moves, that force all constants to memory to allow combining. */
22542 if (MEM_P (operand) && MEM_READONLY_P (operand))
22544 rtx tmp = maybe_get_pool_constant (operand);
22545 if (tmp)
22546 operand = tmp;
22549 if (MEM_P (operand) && !offsettable_memref_p (operand))
22551 /* The only non-offsetable memories we handle are pushes. */
22552 int ok = push_operand (operand, VOIDmode);
22554 gcc_assert (ok);
22556 operand = copy_rtx (operand);
22557 PUT_MODE (operand, word_mode);
22558 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22559 return size;
22562 if (GET_CODE (operand) == CONST_VECTOR)
22564 machine_mode imode = int_mode_for_mode (mode);
22565 /* Caution: if we looked through a constant pool memory above,
22566 the operand may actually have a different mode now. That's
22567 ok, since we want to pun this all the way back to an integer. */
22568 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22569 gcc_assert (operand != NULL);
22570 mode = imode;
22573 if (!TARGET_64BIT)
22575 if (mode == DImode)
22576 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22577 else
22579 int i;
22581 if (REG_P (operand))
22583 gcc_assert (reload_completed);
22584 for (i = 0; i < size; i++)
22585 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22587 else if (offsettable_memref_p (operand))
22589 operand = adjust_address (operand, SImode, 0);
22590 parts[0] = operand;
22591 for (i = 1; i < size; i++)
22592 parts[i] = adjust_address (operand, SImode, 4 * i);
22594 else if (GET_CODE (operand) == CONST_DOUBLE)
22596 REAL_VALUE_TYPE r;
22597 long l[4];
22599 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22600 switch (mode)
22602 case TFmode:
22603 real_to_target (l, &r, mode);
22604 parts[3] = gen_int_mode (l[3], SImode);
22605 parts[2] = gen_int_mode (l[2], SImode);
22606 break;
22607 case XFmode:
22608 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22609 long double may not be 80-bit. */
22610 real_to_target (l, &r, mode);
22611 parts[2] = gen_int_mode (l[2], SImode);
22612 break;
22613 case DFmode:
22614 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22615 break;
22616 default:
22617 gcc_unreachable ();
22619 parts[1] = gen_int_mode (l[1], SImode);
22620 parts[0] = gen_int_mode (l[0], SImode);
22622 else
22623 gcc_unreachable ();
22626 else
22628 if (mode == TImode)
22629 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22630 if (mode == XFmode || mode == TFmode)
22632 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22633 if (REG_P (operand))
22635 gcc_assert (reload_completed);
22636 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22637 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22639 else if (offsettable_memref_p (operand))
22641 operand = adjust_address (operand, DImode, 0);
22642 parts[0] = operand;
22643 parts[1] = adjust_address (operand, upper_mode, 8);
22645 else if (GET_CODE (operand) == CONST_DOUBLE)
22647 REAL_VALUE_TYPE r;
22648 long l[4];
22650 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22651 real_to_target (l, &r, mode);
22653 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22654 if (HOST_BITS_PER_WIDE_INT >= 64)
22655 parts[0]
22656 = gen_int_mode
22657 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22658 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22659 DImode);
22660 else
22661 parts[0] = immed_double_const (l[0], l[1], DImode);
22663 if (upper_mode == SImode)
22664 parts[1] = gen_int_mode (l[2], SImode);
22665 else if (HOST_BITS_PER_WIDE_INT >= 64)
22666 parts[1]
22667 = gen_int_mode
22668 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22669 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22670 DImode);
22671 else
22672 parts[1] = immed_double_const (l[2], l[3], DImode);
22674 else
22675 gcc_unreachable ();
22679 return size;
22682 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22683 Return false when normal moves are needed; true when all required
22684 insns have been emitted. Operands 2-4 contain the input values
22685 int the correct order; operands 5-7 contain the output values. */
22687 void
22688 ix86_split_long_move (rtx operands[])
22690 rtx part[2][4];
22691 int nparts, i, j;
22692 int push = 0;
22693 int collisions = 0;
22694 machine_mode mode = GET_MODE (operands[0]);
22695 bool collisionparts[4];
22697 /* The DFmode expanders may ask us to move double.
22698 For 64bit target this is single move. By hiding the fact
22699 here we simplify i386.md splitters. */
22700 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22702 /* Optimize constant pool reference to immediates. This is used by
22703 fp moves, that force all constants to memory to allow combining. */
22705 if (MEM_P (operands[1])
22706 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22707 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22708 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22709 if (push_operand (operands[0], VOIDmode))
22711 operands[0] = copy_rtx (operands[0]);
22712 PUT_MODE (operands[0], word_mode);
22714 else
22715 operands[0] = gen_lowpart (DImode, operands[0]);
22716 operands[1] = gen_lowpart (DImode, operands[1]);
22717 emit_move_insn (operands[0], operands[1]);
22718 return;
22721 /* The only non-offsettable memory we handle is push. */
22722 if (push_operand (operands[0], VOIDmode))
22723 push = 1;
22724 else
22725 gcc_assert (!MEM_P (operands[0])
22726 || offsettable_memref_p (operands[0]));
22728 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22729 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22731 /* When emitting push, take care for source operands on the stack. */
22732 if (push && MEM_P (operands[1])
22733 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22735 rtx src_base = XEXP (part[1][nparts - 1], 0);
22737 /* Compensate for the stack decrement by 4. */
22738 if (!TARGET_64BIT && nparts == 3
22739 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22740 src_base = plus_constant (Pmode, src_base, 4);
22742 /* src_base refers to the stack pointer and is
22743 automatically decreased by emitted push. */
22744 for (i = 0; i < nparts; i++)
22745 part[1][i] = change_address (part[1][i],
22746 GET_MODE (part[1][i]), src_base);
22749 /* We need to do copy in the right order in case an address register
22750 of the source overlaps the destination. */
22751 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22753 rtx tmp;
22755 for (i = 0; i < nparts; i++)
22757 collisionparts[i]
22758 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22759 if (collisionparts[i])
22760 collisions++;
22763 /* Collision in the middle part can be handled by reordering. */
22764 if (collisions == 1 && nparts == 3 && collisionparts [1])
22766 std::swap (part[0][1], part[0][2]);
22767 std::swap (part[1][1], part[1][2]);
22769 else if (collisions == 1
22770 && nparts == 4
22771 && (collisionparts [1] || collisionparts [2]))
22773 if (collisionparts [1])
22775 std::swap (part[0][1], part[0][2]);
22776 std::swap (part[1][1], part[1][2]);
22778 else
22780 std::swap (part[0][2], part[0][3]);
22781 std::swap (part[1][2], part[1][3]);
22785 /* If there are more collisions, we can't handle it by reordering.
22786 Do an lea to the last part and use only one colliding move. */
22787 else if (collisions > 1)
22789 rtx base;
22791 collisions = 1;
22793 base = part[0][nparts - 1];
22795 /* Handle the case when the last part isn't valid for lea.
22796 Happens in 64-bit mode storing the 12-byte XFmode. */
22797 if (GET_MODE (base) != Pmode)
22798 base = gen_rtx_REG (Pmode, REGNO (base));
22800 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22801 part[1][0] = replace_equiv_address (part[1][0], base);
22802 for (i = 1; i < nparts; i++)
22804 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22805 part[1][i] = replace_equiv_address (part[1][i], tmp);
22810 if (push)
22812 if (!TARGET_64BIT)
22814 if (nparts == 3)
22816 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22817 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22818 stack_pointer_rtx, GEN_INT (-4)));
22819 emit_move_insn (part[0][2], part[1][2]);
22821 else if (nparts == 4)
22823 emit_move_insn (part[0][3], part[1][3]);
22824 emit_move_insn (part[0][2], part[1][2]);
22827 else
22829 /* In 64bit mode we don't have 32bit push available. In case this is
22830 register, it is OK - we will just use larger counterpart. We also
22831 retype memory - these comes from attempt to avoid REX prefix on
22832 moving of second half of TFmode value. */
22833 if (GET_MODE (part[1][1]) == SImode)
22835 switch (GET_CODE (part[1][1]))
22837 case MEM:
22838 part[1][1] = adjust_address (part[1][1], DImode, 0);
22839 break;
22841 case REG:
22842 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22843 break;
22845 default:
22846 gcc_unreachable ();
22849 if (GET_MODE (part[1][0]) == SImode)
22850 part[1][0] = part[1][1];
22853 emit_move_insn (part[0][1], part[1][1]);
22854 emit_move_insn (part[0][0], part[1][0]);
22855 return;
22858 /* Choose correct order to not overwrite the source before it is copied. */
22859 if ((REG_P (part[0][0])
22860 && REG_P (part[1][1])
22861 && (REGNO (part[0][0]) == REGNO (part[1][1])
22862 || (nparts == 3
22863 && REGNO (part[0][0]) == REGNO (part[1][2]))
22864 || (nparts == 4
22865 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22866 || (collisions > 0
22867 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22869 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22871 operands[2 + i] = part[0][j];
22872 operands[6 + i] = part[1][j];
22875 else
22877 for (i = 0; i < nparts; i++)
22879 operands[2 + i] = part[0][i];
22880 operands[6 + i] = part[1][i];
22884 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22885 if (optimize_insn_for_size_p ())
22887 for (j = 0; j < nparts - 1; j++)
22888 if (CONST_INT_P (operands[6 + j])
22889 && operands[6 + j] != const0_rtx
22890 && REG_P (operands[2 + j]))
22891 for (i = j; i < nparts - 1; i++)
22892 if (CONST_INT_P (operands[7 + i])
22893 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22894 operands[7 + i] = operands[2 + j];
22897 for (i = 0; i < nparts; i++)
22898 emit_move_insn (operands[2 + i], operands[6 + i]);
22900 return;
22903 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22904 left shift by a constant, either using a single shift or
22905 a sequence of add instructions. */
22907 static void
22908 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
22910 rtx (*insn)(rtx, rtx, rtx);
22912 if (count == 1
22913 || (count * ix86_cost->add <= ix86_cost->shift_const
22914 && !optimize_insn_for_size_p ()))
22916 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22917 while (count-- > 0)
22918 emit_insn (insn (operand, operand, operand));
22920 else
22922 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22923 emit_insn (insn (operand, operand, GEN_INT (count)));
22927 void
22928 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
22930 rtx (*gen_ashl3)(rtx, rtx, rtx);
22931 rtx (*gen_shld)(rtx, rtx, rtx);
22932 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22934 rtx low[2], high[2];
22935 int count;
22937 if (CONST_INT_P (operands[2]))
22939 split_double_mode (mode, operands, 2, low, high);
22940 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22942 if (count >= half_width)
22944 emit_move_insn (high[0], low[1]);
22945 emit_move_insn (low[0], const0_rtx);
22947 if (count > half_width)
22948 ix86_expand_ashl_const (high[0], count - half_width, mode);
22950 else
22952 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22954 if (!rtx_equal_p (operands[0], operands[1]))
22955 emit_move_insn (operands[0], operands[1]);
22957 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22958 ix86_expand_ashl_const (low[0], count, mode);
22960 return;
22963 split_double_mode (mode, operands, 1, low, high);
22965 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22967 if (operands[1] == const1_rtx)
22969 /* Assuming we've chosen a QImode capable registers, then 1 << N
22970 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22971 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22973 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22975 ix86_expand_clear (low[0]);
22976 ix86_expand_clear (high[0]);
22977 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22979 d = gen_lowpart (QImode, low[0]);
22980 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22981 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22982 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22984 d = gen_lowpart (QImode, high[0]);
22985 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22986 s = gen_rtx_NE (QImode, flags, const0_rtx);
22987 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22990 /* Otherwise, we can get the same results by manually performing
22991 a bit extract operation on bit 5/6, and then performing the two
22992 shifts. The two methods of getting 0/1 into low/high are exactly
22993 the same size. Avoiding the shift in the bit extract case helps
22994 pentium4 a bit; no one else seems to care much either way. */
22995 else
22997 machine_mode half_mode;
22998 rtx (*gen_lshr3)(rtx, rtx, rtx);
22999 rtx (*gen_and3)(rtx, rtx, rtx);
23000 rtx (*gen_xor3)(rtx, rtx, rtx);
23001 HOST_WIDE_INT bits;
23002 rtx x;
23004 if (mode == DImode)
23006 half_mode = SImode;
23007 gen_lshr3 = gen_lshrsi3;
23008 gen_and3 = gen_andsi3;
23009 gen_xor3 = gen_xorsi3;
23010 bits = 5;
23012 else
23014 half_mode = DImode;
23015 gen_lshr3 = gen_lshrdi3;
23016 gen_and3 = gen_anddi3;
23017 gen_xor3 = gen_xordi3;
23018 bits = 6;
23021 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
23022 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
23023 else
23024 x = gen_lowpart (half_mode, operands[2]);
23025 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
23027 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
23028 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
23029 emit_move_insn (low[0], high[0]);
23030 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
23033 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23034 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
23035 return;
23038 if (operands[1] == constm1_rtx)
23040 /* For -1 << N, we can avoid the shld instruction, because we
23041 know that we're shifting 0...31/63 ones into a -1. */
23042 emit_move_insn (low[0], constm1_rtx);
23043 if (optimize_insn_for_size_p ())
23044 emit_move_insn (high[0], low[0]);
23045 else
23046 emit_move_insn (high[0], constm1_rtx);
23048 else
23050 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23052 if (!rtx_equal_p (operands[0], operands[1]))
23053 emit_move_insn (operands[0], operands[1]);
23055 split_double_mode (mode, operands, 1, low, high);
23056 emit_insn (gen_shld (high[0], low[0], operands[2]));
23059 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23061 if (TARGET_CMOVE && scratch)
23063 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23064 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23066 ix86_expand_clear (scratch);
23067 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
23069 else
23071 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23072 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23074 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
23078 void
23079 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
23081 rtx (*gen_ashr3)(rtx, rtx, rtx)
23082 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
23083 rtx (*gen_shrd)(rtx, rtx, rtx);
23084 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23086 rtx low[2], high[2];
23087 int count;
23089 if (CONST_INT_P (operands[2]))
23091 split_double_mode (mode, operands, 2, low, high);
23092 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23094 if (count == GET_MODE_BITSIZE (mode) - 1)
23096 emit_move_insn (high[0], high[1]);
23097 emit_insn (gen_ashr3 (high[0], high[0],
23098 GEN_INT (half_width - 1)));
23099 emit_move_insn (low[0], high[0]);
23102 else if (count >= half_width)
23104 emit_move_insn (low[0], high[1]);
23105 emit_move_insn (high[0], low[0]);
23106 emit_insn (gen_ashr3 (high[0], high[0],
23107 GEN_INT (half_width - 1)));
23109 if (count > half_width)
23110 emit_insn (gen_ashr3 (low[0], low[0],
23111 GEN_INT (count - half_width)));
23113 else
23115 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23117 if (!rtx_equal_p (operands[0], operands[1]))
23118 emit_move_insn (operands[0], operands[1]);
23120 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23121 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
23124 else
23126 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23128 if (!rtx_equal_p (operands[0], operands[1]))
23129 emit_move_insn (operands[0], operands[1]);
23131 split_double_mode (mode, operands, 1, low, high);
23133 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23134 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
23136 if (TARGET_CMOVE && scratch)
23138 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23139 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23141 emit_move_insn (scratch, high[0]);
23142 emit_insn (gen_ashr3 (scratch, scratch,
23143 GEN_INT (half_width - 1)));
23144 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23145 scratch));
23147 else
23149 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
23150 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
23152 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
23157 void
23158 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
23160 rtx (*gen_lshr3)(rtx, rtx, rtx)
23161 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
23162 rtx (*gen_shrd)(rtx, rtx, rtx);
23163 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23165 rtx low[2], high[2];
23166 int count;
23168 if (CONST_INT_P (operands[2]))
23170 split_double_mode (mode, operands, 2, low, high);
23171 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23173 if (count >= half_width)
23175 emit_move_insn (low[0], high[1]);
23176 ix86_expand_clear (high[0]);
23178 if (count > half_width)
23179 emit_insn (gen_lshr3 (low[0], low[0],
23180 GEN_INT (count - half_width)));
23182 else
23184 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23186 if (!rtx_equal_p (operands[0], operands[1]))
23187 emit_move_insn (operands[0], operands[1]);
23189 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23190 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
23193 else
23195 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23197 if (!rtx_equal_p (operands[0], operands[1]))
23198 emit_move_insn (operands[0], operands[1]);
23200 split_double_mode (mode, operands, 1, low, high);
23202 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23203 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
23205 if (TARGET_CMOVE && scratch)
23207 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23208 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23210 ix86_expand_clear (scratch);
23211 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23212 scratch));
23214 else
23216 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23217 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23219 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
23224 /* Predict just emitted jump instruction to be taken with probability PROB. */
23225 static void
23226 predict_jump (int prob)
23228 rtx insn = get_last_insn ();
23229 gcc_assert (JUMP_P (insn));
23230 add_int_reg_note (insn, REG_BR_PROB, prob);
23233 /* Helper function for the string operations below. Dest VARIABLE whether
23234 it is aligned to VALUE bytes. If true, jump to the label. */
23235 static rtx_code_label *
23236 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
23238 rtx_code_label *label = gen_label_rtx ();
23239 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
23240 if (GET_MODE (variable) == DImode)
23241 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
23242 else
23243 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
23244 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
23245 1, label);
23246 if (epilogue)
23247 predict_jump (REG_BR_PROB_BASE * 50 / 100);
23248 else
23249 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23250 return label;
23253 /* Adjust COUNTER by the VALUE. */
23254 static void
23255 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
23257 rtx (*gen_add)(rtx, rtx, rtx)
23258 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
23260 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
23263 /* Zero extend possibly SImode EXP to Pmode register. */
23265 ix86_zero_extend_to_Pmode (rtx exp)
23267 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
23270 /* Divide COUNTREG by SCALE. */
23271 static rtx
23272 scale_counter (rtx countreg, int scale)
23274 rtx sc;
23276 if (scale == 1)
23277 return countreg;
23278 if (CONST_INT_P (countreg))
23279 return GEN_INT (INTVAL (countreg) / scale);
23280 gcc_assert (REG_P (countreg));
23282 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
23283 GEN_INT (exact_log2 (scale)),
23284 NULL, 1, OPTAB_DIRECT);
23285 return sc;
23288 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
23289 DImode for constant loop counts. */
23291 static machine_mode
23292 counter_mode (rtx count_exp)
23294 if (GET_MODE (count_exp) != VOIDmode)
23295 return GET_MODE (count_exp);
23296 if (!CONST_INT_P (count_exp))
23297 return Pmode;
23298 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
23299 return DImode;
23300 return SImode;
23303 /* Copy the address to a Pmode register. This is used for x32 to
23304 truncate DImode TLS address to a SImode register. */
23306 static rtx
23307 ix86_copy_addr_to_reg (rtx addr)
23309 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23310 return copy_addr_to_reg (addr);
23311 else
23313 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23314 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
23318 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23319 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23320 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
23321 memory by VALUE (supposed to be in MODE).
23323 The size is rounded down to whole number of chunk size moved at once.
23324 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
23327 static void
23328 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23329 rtx destptr, rtx srcptr, rtx value,
23330 rtx count, machine_mode mode, int unroll,
23331 int expected_size, bool issetmem)
23333 rtx_code_label *out_label, *top_label;
23334 rtx iter, tmp;
23335 machine_mode iter_mode = counter_mode (count);
23336 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23337 rtx piece_size = GEN_INT (piece_size_n);
23338 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23339 rtx size;
23340 int i;
23342 top_label = gen_label_rtx ();
23343 out_label = gen_label_rtx ();
23344 iter = gen_reg_rtx (iter_mode);
23346 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23347 NULL, 1, OPTAB_DIRECT);
23348 /* Those two should combine. */
23349 if (piece_size == const1_rtx)
23351 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23352 true, out_label);
23353 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23355 emit_move_insn (iter, const0_rtx);
23357 emit_label (top_label);
23359 tmp = convert_modes (Pmode, iter_mode, iter, true);
23361 /* This assert could be relaxed - in this case we'll need to compute
23362 smallest power of two, containing in PIECE_SIZE_N and pass it to
23363 offset_address. */
23364 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23365 destmem = offset_address (destmem, tmp, piece_size_n);
23366 destmem = adjust_address (destmem, mode, 0);
23368 if (!issetmem)
23370 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23371 srcmem = adjust_address (srcmem, mode, 0);
23373 /* When unrolling for chips that reorder memory reads and writes,
23374 we can save registers by using single temporary.
23375 Also using 4 temporaries is overkill in 32bit mode. */
23376 if (!TARGET_64BIT && 0)
23378 for (i = 0; i < unroll; i++)
23380 if (i)
23382 destmem =
23383 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23384 srcmem =
23385 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23387 emit_move_insn (destmem, srcmem);
23390 else
23392 rtx tmpreg[4];
23393 gcc_assert (unroll <= 4);
23394 for (i = 0; i < unroll; i++)
23396 tmpreg[i] = gen_reg_rtx (mode);
23397 if (i)
23399 srcmem =
23400 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23402 emit_move_insn (tmpreg[i], srcmem);
23404 for (i = 0; i < unroll; i++)
23406 if (i)
23408 destmem =
23409 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23411 emit_move_insn (destmem, tmpreg[i]);
23415 else
23416 for (i = 0; i < unroll; i++)
23418 if (i)
23419 destmem =
23420 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23421 emit_move_insn (destmem, value);
23424 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23425 true, OPTAB_LIB_WIDEN);
23426 if (tmp != iter)
23427 emit_move_insn (iter, tmp);
23429 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23430 true, top_label);
23431 if (expected_size != -1)
23433 expected_size /= GET_MODE_SIZE (mode) * unroll;
23434 if (expected_size == 0)
23435 predict_jump (0);
23436 else if (expected_size > REG_BR_PROB_BASE)
23437 predict_jump (REG_BR_PROB_BASE - 1);
23438 else
23439 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23441 else
23442 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23443 iter = ix86_zero_extend_to_Pmode (iter);
23444 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23445 true, OPTAB_LIB_WIDEN);
23446 if (tmp != destptr)
23447 emit_move_insn (destptr, tmp);
23448 if (!issetmem)
23450 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23451 true, OPTAB_LIB_WIDEN);
23452 if (tmp != srcptr)
23453 emit_move_insn (srcptr, tmp);
23455 emit_label (out_label);
23458 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23459 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23460 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23461 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23462 ORIG_VALUE is the original value passed to memset to fill the memory with.
23463 Other arguments have same meaning as for previous function. */
23465 static void
23466 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23467 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23468 rtx count,
23469 machine_mode mode, bool issetmem)
23471 rtx destexp;
23472 rtx srcexp;
23473 rtx countreg;
23474 HOST_WIDE_INT rounded_count;
23476 /* If possible, it is shorter to use rep movs.
23477 TODO: Maybe it is better to move this logic to decide_alg. */
23478 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23479 && (!issetmem || orig_value == const0_rtx))
23480 mode = SImode;
23482 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23483 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23485 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23486 GET_MODE_SIZE (mode)));
23487 if (mode != QImode)
23489 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23490 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23491 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23493 else
23494 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23495 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23497 rounded_count = (INTVAL (count)
23498 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23499 destmem = shallow_copy_rtx (destmem);
23500 set_mem_size (destmem, rounded_count);
23502 else if (MEM_SIZE_KNOWN_P (destmem))
23503 clear_mem_size (destmem);
23505 if (issetmem)
23507 value = force_reg (mode, gen_lowpart (mode, value));
23508 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23510 else
23512 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23513 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23514 if (mode != QImode)
23516 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23517 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23518 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23520 else
23521 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23522 if (CONST_INT_P (count))
23524 rounded_count = (INTVAL (count)
23525 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23526 srcmem = shallow_copy_rtx (srcmem);
23527 set_mem_size (srcmem, rounded_count);
23529 else
23531 if (MEM_SIZE_KNOWN_P (srcmem))
23532 clear_mem_size (srcmem);
23534 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23535 destexp, srcexp));
23539 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23540 DESTMEM.
23541 SRC is passed by pointer to be updated on return.
23542 Return value is updated DST. */
23543 static rtx
23544 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23545 HOST_WIDE_INT size_to_move)
23547 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23548 enum insn_code code;
23549 machine_mode move_mode;
23550 int piece_size, i;
23552 /* Find the widest mode in which we could perform moves.
23553 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23554 it until move of such size is supported. */
23555 piece_size = 1 << floor_log2 (size_to_move);
23556 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23557 code = optab_handler (mov_optab, move_mode);
23558 while (code == CODE_FOR_nothing && piece_size > 1)
23560 piece_size >>= 1;
23561 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23562 code = optab_handler (mov_optab, move_mode);
23565 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23566 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23567 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23569 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23570 move_mode = mode_for_vector (word_mode, nunits);
23571 code = optab_handler (mov_optab, move_mode);
23572 if (code == CODE_FOR_nothing)
23574 move_mode = word_mode;
23575 piece_size = GET_MODE_SIZE (move_mode);
23576 code = optab_handler (mov_optab, move_mode);
23579 gcc_assert (code != CODE_FOR_nothing);
23581 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23582 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23584 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23585 gcc_assert (size_to_move % piece_size == 0);
23586 adjust = GEN_INT (piece_size);
23587 for (i = 0; i < size_to_move; i += piece_size)
23589 /* We move from memory to memory, so we'll need to do it via
23590 a temporary register. */
23591 tempreg = gen_reg_rtx (move_mode);
23592 emit_insn (GEN_FCN (code) (tempreg, src));
23593 emit_insn (GEN_FCN (code) (dst, tempreg));
23595 emit_move_insn (destptr,
23596 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23597 emit_move_insn (srcptr,
23598 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23600 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23601 piece_size);
23602 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23603 piece_size);
23606 /* Update DST and SRC rtx. */
23607 *srcmem = src;
23608 return dst;
23611 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23612 static void
23613 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23614 rtx destptr, rtx srcptr, rtx count, int max_size)
23616 rtx src, dest;
23617 if (CONST_INT_P (count))
23619 HOST_WIDE_INT countval = INTVAL (count);
23620 HOST_WIDE_INT epilogue_size = countval % max_size;
23621 int i;
23623 /* For now MAX_SIZE should be a power of 2. This assert could be
23624 relaxed, but it'll require a bit more complicated epilogue
23625 expanding. */
23626 gcc_assert ((max_size & (max_size - 1)) == 0);
23627 for (i = max_size; i >= 1; i >>= 1)
23629 if (epilogue_size & i)
23630 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23632 return;
23634 if (max_size > 8)
23636 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23637 count, 1, OPTAB_DIRECT);
23638 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23639 count, QImode, 1, 4, false);
23640 return;
23643 /* When there are stringops, we can cheaply increase dest and src pointers.
23644 Otherwise we save code size by maintaining offset (zero is readily
23645 available from preceding rep operation) and using x86 addressing modes.
23647 if (TARGET_SINGLE_STRINGOP)
23649 if (max_size > 4)
23651 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23652 src = change_address (srcmem, SImode, srcptr);
23653 dest = change_address (destmem, SImode, destptr);
23654 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23655 emit_label (label);
23656 LABEL_NUSES (label) = 1;
23658 if (max_size > 2)
23660 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23661 src = change_address (srcmem, HImode, srcptr);
23662 dest = change_address (destmem, HImode, destptr);
23663 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23664 emit_label (label);
23665 LABEL_NUSES (label) = 1;
23667 if (max_size > 1)
23669 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23670 src = change_address (srcmem, QImode, srcptr);
23671 dest = change_address (destmem, QImode, destptr);
23672 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23673 emit_label (label);
23674 LABEL_NUSES (label) = 1;
23677 else
23679 rtx offset = force_reg (Pmode, const0_rtx);
23680 rtx tmp;
23682 if (max_size > 4)
23684 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23685 src = change_address (srcmem, SImode, srcptr);
23686 dest = change_address (destmem, SImode, destptr);
23687 emit_move_insn (dest, src);
23688 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23689 true, OPTAB_LIB_WIDEN);
23690 if (tmp != offset)
23691 emit_move_insn (offset, tmp);
23692 emit_label (label);
23693 LABEL_NUSES (label) = 1;
23695 if (max_size > 2)
23697 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23698 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23699 src = change_address (srcmem, HImode, tmp);
23700 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23701 dest = change_address (destmem, HImode, tmp);
23702 emit_move_insn (dest, src);
23703 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23704 true, OPTAB_LIB_WIDEN);
23705 if (tmp != offset)
23706 emit_move_insn (offset, tmp);
23707 emit_label (label);
23708 LABEL_NUSES (label) = 1;
23710 if (max_size > 1)
23712 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23713 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23714 src = change_address (srcmem, QImode, tmp);
23715 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23716 dest = change_address (destmem, QImode, tmp);
23717 emit_move_insn (dest, src);
23718 emit_label (label);
23719 LABEL_NUSES (label) = 1;
23724 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23725 with value PROMOTED_VAL.
23726 SRC is passed by pointer to be updated on return.
23727 Return value is updated DST. */
23728 static rtx
23729 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23730 HOST_WIDE_INT size_to_move)
23732 rtx dst = destmem, adjust;
23733 enum insn_code code;
23734 machine_mode move_mode;
23735 int piece_size, i;
23737 /* Find the widest mode in which we could perform moves.
23738 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23739 it until move of such size is supported. */
23740 move_mode = GET_MODE (promoted_val);
23741 if (move_mode == VOIDmode)
23742 move_mode = QImode;
23743 if (size_to_move < GET_MODE_SIZE (move_mode))
23745 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23746 promoted_val = gen_lowpart (move_mode, promoted_val);
23748 piece_size = GET_MODE_SIZE (move_mode);
23749 code = optab_handler (mov_optab, move_mode);
23750 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23752 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23754 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23755 gcc_assert (size_to_move % piece_size == 0);
23756 adjust = GEN_INT (piece_size);
23757 for (i = 0; i < size_to_move; i += piece_size)
23759 if (piece_size <= GET_MODE_SIZE (word_mode))
23761 emit_insn (gen_strset (destptr, dst, promoted_val));
23762 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23763 piece_size);
23764 continue;
23767 emit_insn (GEN_FCN (code) (dst, promoted_val));
23769 emit_move_insn (destptr,
23770 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23772 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23773 piece_size);
23776 /* Update DST rtx. */
23777 return dst;
23779 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23780 static void
23781 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23782 rtx count, int max_size)
23784 count =
23785 expand_simple_binop (counter_mode (count), AND, count,
23786 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23787 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23788 gen_lowpart (QImode, value), count, QImode,
23789 1, max_size / 2, true);
23792 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23793 static void
23794 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23795 rtx count, int max_size)
23797 rtx dest;
23799 if (CONST_INT_P (count))
23801 HOST_WIDE_INT countval = INTVAL (count);
23802 HOST_WIDE_INT epilogue_size = countval % max_size;
23803 int i;
23805 /* For now MAX_SIZE should be a power of 2. This assert could be
23806 relaxed, but it'll require a bit more complicated epilogue
23807 expanding. */
23808 gcc_assert ((max_size & (max_size - 1)) == 0);
23809 for (i = max_size; i >= 1; i >>= 1)
23811 if (epilogue_size & i)
23813 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23814 destmem = emit_memset (destmem, destptr, vec_value, i);
23815 else
23816 destmem = emit_memset (destmem, destptr, value, i);
23819 return;
23821 if (max_size > 32)
23823 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23824 return;
23826 if (max_size > 16)
23828 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23829 if (TARGET_64BIT)
23831 dest = change_address (destmem, DImode, destptr);
23832 emit_insn (gen_strset (destptr, dest, value));
23833 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23834 emit_insn (gen_strset (destptr, dest, value));
23836 else
23838 dest = change_address (destmem, SImode, destptr);
23839 emit_insn (gen_strset (destptr, dest, value));
23840 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23841 emit_insn (gen_strset (destptr, dest, value));
23842 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23843 emit_insn (gen_strset (destptr, dest, value));
23844 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23845 emit_insn (gen_strset (destptr, dest, value));
23847 emit_label (label);
23848 LABEL_NUSES (label) = 1;
23850 if (max_size > 8)
23852 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
23853 if (TARGET_64BIT)
23855 dest = change_address (destmem, DImode, destptr);
23856 emit_insn (gen_strset (destptr, dest, value));
23858 else
23860 dest = change_address (destmem, SImode, destptr);
23861 emit_insn (gen_strset (destptr, dest, value));
23862 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23863 emit_insn (gen_strset (destptr, dest, value));
23865 emit_label (label);
23866 LABEL_NUSES (label) = 1;
23868 if (max_size > 4)
23870 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23871 dest = change_address (destmem, SImode, destptr);
23872 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23873 emit_label (label);
23874 LABEL_NUSES (label) = 1;
23876 if (max_size > 2)
23878 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23879 dest = change_address (destmem, HImode, destptr);
23880 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23881 emit_label (label);
23882 LABEL_NUSES (label) = 1;
23884 if (max_size > 1)
23886 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23887 dest = change_address (destmem, QImode, destptr);
23888 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23889 emit_label (label);
23890 LABEL_NUSES (label) = 1;
23894 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23895 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23896 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23897 ignored.
23898 Return value is updated DESTMEM. */
23899 static rtx
23900 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23901 rtx destptr, rtx srcptr, rtx value,
23902 rtx vec_value, rtx count, int align,
23903 int desired_alignment, bool issetmem)
23905 int i;
23906 for (i = 1; i < desired_alignment; i <<= 1)
23908 if (align <= i)
23910 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
23911 if (issetmem)
23913 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23914 destmem = emit_memset (destmem, destptr, vec_value, i);
23915 else
23916 destmem = emit_memset (destmem, destptr, value, i);
23918 else
23919 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23920 ix86_adjust_counter (count, i);
23921 emit_label (label);
23922 LABEL_NUSES (label) = 1;
23923 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23926 return destmem;
23929 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23930 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23931 and jump to DONE_LABEL. */
23932 static void
23933 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23934 rtx destptr, rtx srcptr,
23935 rtx value, rtx vec_value,
23936 rtx count, int size,
23937 rtx done_label, bool issetmem)
23939 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
23940 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23941 rtx modesize;
23942 int n;
23944 /* If we do not have vector value to copy, we must reduce size. */
23945 if (issetmem)
23947 if (!vec_value)
23949 if (GET_MODE (value) == VOIDmode && size > 8)
23950 mode = Pmode;
23951 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23952 mode = GET_MODE (value);
23954 else
23955 mode = GET_MODE (vec_value), value = vec_value;
23957 else
23959 /* Choose appropriate vector mode. */
23960 if (size >= 32)
23961 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23962 else if (size >= 16)
23963 mode = TARGET_SSE ? V16QImode : DImode;
23964 srcmem = change_address (srcmem, mode, srcptr);
23966 destmem = change_address (destmem, mode, destptr);
23967 modesize = GEN_INT (GET_MODE_SIZE (mode));
23968 gcc_assert (GET_MODE_SIZE (mode) <= size);
23969 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23971 if (issetmem)
23972 emit_move_insn (destmem, gen_lowpart (mode, value));
23973 else
23975 emit_move_insn (destmem, srcmem);
23976 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23978 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23981 destmem = offset_address (destmem, count, 1);
23982 destmem = offset_address (destmem, GEN_INT (-2 * size),
23983 GET_MODE_SIZE (mode));
23984 if (!issetmem)
23986 srcmem = offset_address (srcmem, count, 1);
23987 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23988 GET_MODE_SIZE (mode));
23990 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23992 if (issetmem)
23993 emit_move_insn (destmem, gen_lowpart (mode, value));
23994 else
23996 emit_move_insn (destmem, srcmem);
23997 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23999 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24001 emit_jump_insn (gen_jump (done_label));
24002 emit_barrier ();
24004 emit_label (label);
24005 LABEL_NUSES (label) = 1;
24008 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
24009 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
24010 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
24011 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
24012 DONE_LABEL is a label after the whole copying sequence. The label is created
24013 on demand if *DONE_LABEL is NULL.
24014 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
24015 bounds after the initial copies.
24017 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
24018 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
24019 we will dispatch to a library call for large blocks.
24021 In pseudocode we do:
24023 if (COUNT < SIZE)
24025 Assume that SIZE is 4. Bigger sizes are handled analogously
24026 if (COUNT & 4)
24028 copy 4 bytes from SRCPTR to DESTPTR
24029 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
24030 goto done_label
24032 if (!COUNT)
24033 goto done_label;
24034 copy 1 byte from SRCPTR to DESTPTR
24035 if (COUNT & 2)
24037 copy 2 bytes from SRCPTR to DESTPTR
24038 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
24041 else
24043 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
24044 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
24046 OLD_DESPTR = DESTPTR;
24047 Align DESTPTR up to DESIRED_ALIGN
24048 SRCPTR += DESTPTR - OLD_DESTPTR
24049 COUNT -= DEST_PTR - OLD_DESTPTR
24050 if (DYNAMIC_CHECK)
24051 Round COUNT down to multiple of SIZE
24052 << optional caller supplied zero size guard is here >>
24053 << optional caller suppplied dynamic check is here >>
24054 << caller supplied main copy loop is here >>
24056 done_label:
24058 static void
24059 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
24060 rtx *destptr, rtx *srcptr,
24061 machine_mode mode,
24062 rtx value, rtx vec_value,
24063 rtx *count,
24064 rtx_code_label **done_label,
24065 int size,
24066 int desired_align,
24067 int align,
24068 unsigned HOST_WIDE_INT *min_size,
24069 bool dynamic_check,
24070 bool issetmem)
24072 rtx_code_label *loop_label = NULL, *label;
24073 int n;
24074 rtx modesize;
24075 int prolog_size = 0;
24076 rtx mode_value;
24078 /* Chose proper value to copy. */
24079 if (issetmem && VECTOR_MODE_P (mode))
24080 mode_value = vec_value;
24081 else
24082 mode_value = value;
24083 gcc_assert (GET_MODE_SIZE (mode) <= size);
24085 /* See if block is big or small, handle small blocks. */
24086 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
24088 int size2 = size;
24089 loop_label = gen_label_rtx ();
24091 if (!*done_label)
24092 *done_label = gen_label_rtx ();
24094 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
24095 1, loop_label);
24096 size2 >>= 1;
24098 /* Handle sizes > 3. */
24099 for (;size2 > 2; size2 >>= 1)
24100 expand_small_movmem_or_setmem (destmem, srcmem,
24101 *destptr, *srcptr,
24102 value, vec_value,
24103 *count,
24104 size2, *done_label, issetmem);
24105 /* Nothing to copy? Jump to DONE_LABEL if so */
24106 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
24107 1, *done_label);
24109 /* Do a byte copy. */
24110 destmem = change_address (destmem, QImode, *destptr);
24111 if (issetmem)
24112 emit_move_insn (destmem, gen_lowpart (QImode, value));
24113 else
24115 srcmem = change_address (srcmem, QImode, *srcptr);
24116 emit_move_insn (destmem, srcmem);
24119 /* Handle sizes 2 and 3. */
24120 label = ix86_expand_aligntest (*count, 2, false);
24121 destmem = change_address (destmem, HImode, *destptr);
24122 destmem = offset_address (destmem, *count, 1);
24123 destmem = offset_address (destmem, GEN_INT (-2), 2);
24124 if (issetmem)
24125 emit_move_insn (destmem, gen_lowpart (HImode, value));
24126 else
24128 srcmem = change_address (srcmem, HImode, *srcptr);
24129 srcmem = offset_address (srcmem, *count, 1);
24130 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
24131 emit_move_insn (destmem, srcmem);
24134 emit_label (label);
24135 LABEL_NUSES (label) = 1;
24136 emit_jump_insn (gen_jump (*done_label));
24137 emit_barrier ();
24139 else
24140 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
24141 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
24143 /* Start memcpy for COUNT >= SIZE. */
24144 if (loop_label)
24146 emit_label (loop_label);
24147 LABEL_NUSES (loop_label) = 1;
24150 /* Copy first desired_align bytes. */
24151 if (!issetmem)
24152 srcmem = change_address (srcmem, mode, *srcptr);
24153 destmem = change_address (destmem, mode, *destptr);
24154 modesize = GEN_INT (GET_MODE_SIZE (mode));
24155 for (n = 0; prolog_size < desired_align - align; n++)
24157 if (issetmem)
24158 emit_move_insn (destmem, mode_value);
24159 else
24161 emit_move_insn (destmem, srcmem);
24162 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24164 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24165 prolog_size += GET_MODE_SIZE (mode);
24169 /* Copy last SIZE bytes. */
24170 destmem = offset_address (destmem, *count, 1);
24171 destmem = offset_address (destmem,
24172 GEN_INT (-size - prolog_size),
24174 if (issetmem)
24175 emit_move_insn (destmem, mode_value);
24176 else
24178 srcmem = offset_address (srcmem, *count, 1);
24179 srcmem = offset_address (srcmem,
24180 GEN_INT (-size - prolog_size),
24182 emit_move_insn (destmem, srcmem);
24184 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
24186 destmem = offset_address (destmem, modesize, 1);
24187 if (issetmem)
24188 emit_move_insn (destmem, mode_value);
24189 else
24191 srcmem = offset_address (srcmem, modesize, 1);
24192 emit_move_insn (destmem, srcmem);
24196 /* Align destination. */
24197 if (desired_align > 1 && desired_align > align)
24199 rtx saveddest = *destptr;
24201 gcc_assert (desired_align <= size);
24202 /* Align destptr up, place it to new register. */
24203 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
24204 GEN_INT (prolog_size),
24205 NULL_RTX, 1, OPTAB_DIRECT);
24206 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
24207 GEN_INT (-desired_align),
24208 *destptr, 1, OPTAB_DIRECT);
24209 /* See how many bytes we skipped. */
24210 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
24211 *destptr,
24212 saveddest, 1, OPTAB_DIRECT);
24213 /* Adjust srcptr and count. */
24214 if (!issetmem)
24215 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
24216 *srcptr, 1, OPTAB_DIRECT);
24217 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24218 saveddest, *count, 1, OPTAB_DIRECT);
24219 /* We copied at most size + prolog_size. */
24220 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
24221 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
24222 else
24223 *min_size = 0;
24225 /* Our loops always round down the bock size, but for dispatch to library
24226 we need precise value. */
24227 if (dynamic_check)
24228 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
24229 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
24231 else
24233 gcc_assert (prolog_size == 0);
24234 /* Decrease count, so we won't end up copying last word twice. */
24235 if (!CONST_INT_P (*count))
24236 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24237 constm1_rtx, *count, 1, OPTAB_DIRECT);
24238 else
24239 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
24240 if (*min_size)
24241 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
24246 /* This function is like the previous one, except here we know how many bytes
24247 need to be copied. That allows us to update alignment not only of DST, which
24248 is returned, but also of SRC, which is passed as a pointer for that
24249 reason. */
24250 static rtx
24251 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
24252 rtx srcreg, rtx value, rtx vec_value,
24253 int desired_align, int align_bytes,
24254 bool issetmem)
24256 rtx src = NULL;
24257 rtx orig_dst = dst;
24258 rtx orig_src = NULL;
24259 int piece_size = 1;
24260 int copied_bytes = 0;
24262 if (!issetmem)
24264 gcc_assert (srcp != NULL);
24265 src = *srcp;
24266 orig_src = src;
24269 for (piece_size = 1;
24270 piece_size <= desired_align && copied_bytes < align_bytes;
24271 piece_size <<= 1)
24273 if (align_bytes & piece_size)
24275 if (issetmem)
24277 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
24278 dst = emit_memset (dst, destreg, vec_value, piece_size);
24279 else
24280 dst = emit_memset (dst, destreg, value, piece_size);
24282 else
24283 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
24284 copied_bytes += piece_size;
24287 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
24288 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24289 if (MEM_SIZE_KNOWN_P (orig_dst))
24290 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
24292 if (!issetmem)
24294 int src_align_bytes = get_mem_align_offset (src, desired_align
24295 * BITS_PER_UNIT);
24296 if (src_align_bytes >= 0)
24297 src_align_bytes = desired_align - src_align_bytes;
24298 if (src_align_bytes >= 0)
24300 unsigned int src_align;
24301 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
24303 if ((src_align_bytes & (src_align - 1))
24304 == (align_bytes & (src_align - 1)))
24305 break;
24307 if (src_align > (unsigned int) desired_align)
24308 src_align = desired_align;
24309 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24310 set_mem_align (src, src_align * BITS_PER_UNIT);
24312 if (MEM_SIZE_KNOWN_P (orig_src))
24313 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24314 *srcp = src;
24317 return dst;
24320 /* Return true if ALG can be used in current context.
24321 Assume we expand memset if MEMSET is true. */
24322 static bool
24323 alg_usable_p (enum stringop_alg alg, bool memset)
24325 if (alg == no_stringop)
24326 return false;
24327 if (alg == vector_loop)
24328 return TARGET_SSE || TARGET_AVX;
24329 /* Algorithms using the rep prefix want at least edi and ecx;
24330 additionally, memset wants eax and memcpy wants esi. Don't
24331 consider such algorithms if the user has appropriated those
24332 registers for their own purposes. */
24333 if (alg == rep_prefix_1_byte
24334 || alg == rep_prefix_4_byte
24335 || alg == rep_prefix_8_byte)
24336 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24337 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24338 return true;
24341 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
24342 static enum stringop_alg
24343 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24344 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24345 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24347 const struct stringop_algs * algs;
24348 bool optimize_for_speed;
24349 int max = 0;
24350 const struct processor_costs *cost;
24351 int i;
24352 bool any_alg_usable_p = false;
24354 *noalign = false;
24355 *dynamic_check = -1;
24357 /* Even if the string operation call is cold, we still might spend a lot
24358 of time processing large blocks. */
24359 if (optimize_function_for_size_p (cfun)
24360 || (optimize_insn_for_size_p ()
24361 && (max_size < 256
24362 || (expected_size != -1 && expected_size < 256))))
24363 optimize_for_speed = false;
24364 else
24365 optimize_for_speed = true;
24367 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24368 if (memset)
24369 algs = &cost->memset[TARGET_64BIT != 0];
24370 else
24371 algs = &cost->memcpy[TARGET_64BIT != 0];
24373 /* See maximal size for user defined algorithm. */
24374 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24376 enum stringop_alg candidate = algs->size[i].alg;
24377 bool usable = alg_usable_p (candidate, memset);
24378 any_alg_usable_p |= usable;
24380 if (candidate != libcall && candidate && usable)
24381 max = algs->size[i].max;
24384 /* If expected size is not known but max size is small enough
24385 so inline version is a win, set expected size into
24386 the range. */
24387 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24388 && expected_size == -1)
24389 expected_size = min_size / 2 + max_size / 2;
24391 /* If user specified the algorithm, honnor it if possible. */
24392 if (ix86_stringop_alg != no_stringop
24393 && alg_usable_p (ix86_stringop_alg, memset))
24394 return ix86_stringop_alg;
24395 /* rep; movq or rep; movl is the smallest variant. */
24396 else if (!optimize_for_speed)
24398 *noalign = true;
24399 if (!count || (count & 3) || (memset && !zero_memset))
24400 return alg_usable_p (rep_prefix_1_byte, memset)
24401 ? rep_prefix_1_byte : loop_1_byte;
24402 else
24403 return alg_usable_p (rep_prefix_4_byte, memset)
24404 ? rep_prefix_4_byte : loop;
24406 /* Very tiny blocks are best handled via the loop, REP is expensive to
24407 setup. */
24408 else if (expected_size != -1 && expected_size < 4)
24409 return loop_1_byte;
24410 else if (expected_size != -1)
24412 enum stringop_alg alg = libcall;
24413 bool alg_noalign = false;
24414 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24416 /* We get here if the algorithms that were not libcall-based
24417 were rep-prefix based and we are unable to use rep prefixes
24418 based on global register usage. Break out of the loop and
24419 use the heuristic below. */
24420 if (algs->size[i].max == 0)
24421 break;
24422 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24424 enum stringop_alg candidate = algs->size[i].alg;
24426 if (candidate != libcall && alg_usable_p (candidate, memset))
24428 alg = candidate;
24429 alg_noalign = algs->size[i].noalign;
24431 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24432 last non-libcall inline algorithm. */
24433 if (TARGET_INLINE_ALL_STRINGOPS)
24435 /* When the current size is best to be copied by a libcall,
24436 but we are still forced to inline, run the heuristic below
24437 that will pick code for medium sized blocks. */
24438 if (alg != libcall)
24440 *noalign = alg_noalign;
24441 return alg;
24443 break;
24445 else if (alg_usable_p (candidate, memset))
24447 *noalign = algs->size[i].noalign;
24448 return candidate;
24453 /* When asked to inline the call anyway, try to pick meaningful choice.
24454 We look for maximal size of block that is faster to copy by hand and
24455 take blocks of at most of that size guessing that average size will
24456 be roughly half of the block.
24458 If this turns out to be bad, we might simply specify the preferred
24459 choice in ix86_costs. */
24460 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24461 && (algs->unknown_size == libcall
24462 || !alg_usable_p (algs->unknown_size, memset)))
24464 enum stringop_alg alg;
24466 /* If there aren't any usable algorithms, then recursing on
24467 smaller sizes isn't going to find anything. Just return the
24468 simple byte-at-a-time copy loop. */
24469 if (!any_alg_usable_p)
24471 /* Pick something reasonable. */
24472 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24473 *dynamic_check = 128;
24474 return loop_1_byte;
24476 if (max <= 0)
24477 max = 4096;
24478 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24479 zero_memset, dynamic_check, noalign);
24480 gcc_assert (*dynamic_check == -1);
24481 gcc_assert (alg != libcall);
24482 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24483 *dynamic_check = max;
24484 return alg;
24486 return (alg_usable_p (algs->unknown_size, memset)
24487 ? algs->unknown_size : libcall);
24490 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24491 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24492 static int
24493 decide_alignment (int align,
24494 enum stringop_alg alg,
24495 int expected_size,
24496 machine_mode move_mode)
24498 int desired_align = 0;
24500 gcc_assert (alg != no_stringop);
24502 if (alg == libcall)
24503 return 0;
24504 if (move_mode == VOIDmode)
24505 return 0;
24507 desired_align = GET_MODE_SIZE (move_mode);
24508 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24509 copying whole cacheline at once. */
24510 if (TARGET_PENTIUMPRO
24511 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24512 desired_align = 8;
24514 if (optimize_size)
24515 desired_align = 1;
24516 if (desired_align < align)
24517 desired_align = align;
24518 if (expected_size != -1 && expected_size < 4)
24519 desired_align = align;
24521 return desired_align;
24525 /* Helper function for memcpy. For QImode value 0xXY produce
24526 0xXYXYXYXY of wide specified by MODE. This is essentially
24527 a * 0x10101010, but we can do slightly better than
24528 synth_mult by unwinding the sequence by hand on CPUs with
24529 slow multiply. */
24530 static rtx
24531 promote_duplicated_reg (machine_mode mode, rtx val)
24533 machine_mode valmode = GET_MODE (val);
24534 rtx tmp;
24535 int nops = mode == DImode ? 3 : 2;
24537 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24538 if (val == const0_rtx)
24539 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24540 if (CONST_INT_P (val))
24542 HOST_WIDE_INT v = INTVAL (val) & 255;
24544 v |= v << 8;
24545 v |= v << 16;
24546 if (mode == DImode)
24547 v |= (v << 16) << 16;
24548 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24551 if (valmode == VOIDmode)
24552 valmode = QImode;
24553 if (valmode != QImode)
24554 val = gen_lowpart (QImode, val);
24555 if (mode == QImode)
24556 return val;
24557 if (!TARGET_PARTIAL_REG_STALL)
24558 nops--;
24559 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24560 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24561 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24562 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24564 rtx reg = convert_modes (mode, QImode, val, true);
24565 tmp = promote_duplicated_reg (mode, const1_rtx);
24566 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24567 OPTAB_DIRECT);
24569 else
24571 rtx reg = convert_modes (mode, QImode, val, true);
24573 if (!TARGET_PARTIAL_REG_STALL)
24574 if (mode == SImode)
24575 emit_insn (gen_movsi_insv_1 (reg, reg));
24576 else
24577 emit_insn (gen_movdi_insv_1 (reg, reg));
24578 else
24580 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24581 NULL, 1, OPTAB_DIRECT);
24582 reg =
24583 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24585 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24586 NULL, 1, OPTAB_DIRECT);
24587 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24588 if (mode == SImode)
24589 return reg;
24590 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24591 NULL, 1, OPTAB_DIRECT);
24592 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24593 return reg;
24597 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24598 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24599 alignment from ALIGN to DESIRED_ALIGN. */
24600 static rtx
24601 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24602 int align)
24604 rtx promoted_val;
24606 if (TARGET_64BIT
24607 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24608 promoted_val = promote_duplicated_reg (DImode, val);
24609 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24610 promoted_val = promote_duplicated_reg (SImode, val);
24611 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24612 promoted_val = promote_duplicated_reg (HImode, val);
24613 else
24614 promoted_val = val;
24616 return promoted_val;
24619 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24620 operations when profitable. The code depends upon architecture, block size
24621 and alignment, but always has one of the following overall structures:
24623 Aligned move sequence:
24625 1) Prologue guard: Conditional that jumps up to epilogues for small
24626 blocks that can be handled by epilogue alone. This is faster
24627 but also needed for correctness, since prologue assume the block
24628 is larger than the desired alignment.
24630 Optional dynamic check for size and libcall for large
24631 blocks is emitted here too, with -minline-stringops-dynamically.
24633 2) Prologue: copy first few bytes in order to get destination
24634 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24635 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24636 copied. We emit either a jump tree on power of two sized
24637 blocks, or a byte loop.
24639 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24640 with specified algorithm.
24642 4) Epilogue: code copying tail of the block that is too small to be
24643 handled by main body (or up to size guarded by prologue guard).
24645 Misaligned move sequence
24647 1) missaligned move prologue/epilogue containing:
24648 a) Prologue handling small memory blocks and jumping to done_label
24649 (skipped if blocks are known to be large enough)
24650 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24651 needed by single possibly misaligned move
24652 (skipped if alignment is not needed)
24653 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24655 2) Zero size guard dispatching to done_label, if needed
24657 3) dispatch to library call, if needed,
24659 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24660 with specified algorithm. */
24661 bool
24662 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24663 rtx align_exp, rtx expected_align_exp,
24664 rtx expected_size_exp, rtx min_size_exp,
24665 rtx max_size_exp, rtx probable_max_size_exp,
24666 bool issetmem)
24668 rtx destreg;
24669 rtx srcreg = NULL;
24670 rtx_code_label *label = NULL;
24671 rtx tmp;
24672 rtx_code_label *jump_around_label = NULL;
24673 HOST_WIDE_INT align = 1;
24674 unsigned HOST_WIDE_INT count = 0;
24675 HOST_WIDE_INT expected_size = -1;
24676 int size_needed = 0, epilogue_size_needed;
24677 int desired_align = 0, align_bytes = 0;
24678 enum stringop_alg alg;
24679 rtx promoted_val = NULL;
24680 rtx vec_promoted_val = NULL;
24681 bool force_loopy_epilogue = false;
24682 int dynamic_check;
24683 bool need_zero_guard = false;
24684 bool noalign;
24685 machine_mode move_mode = VOIDmode;
24686 int unroll_factor = 1;
24687 /* TODO: Once value ranges are available, fill in proper data. */
24688 unsigned HOST_WIDE_INT min_size = 0;
24689 unsigned HOST_WIDE_INT max_size = -1;
24690 unsigned HOST_WIDE_INT probable_max_size = -1;
24691 bool misaligned_prologue_used = false;
24693 if (CONST_INT_P (align_exp))
24694 align = INTVAL (align_exp);
24695 /* i386 can do misaligned access on reasonably increased cost. */
24696 if (CONST_INT_P (expected_align_exp)
24697 && INTVAL (expected_align_exp) > align)
24698 align = INTVAL (expected_align_exp);
24699 /* ALIGN is the minimum of destination and source alignment, but we care here
24700 just about destination alignment. */
24701 else if (!issetmem
24702 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24703 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24705 if (CONST_INT_P (count_exp))
24707 min_size = max_size = probable_max_size = count = expected_size
24708 = INTVAL (count_exp);
24709 /* When COUNT is 0, there is nothing to do. */
24710 if (!count)
24711 return true;
24713 else
24715 if (min_size_exp)
24716 min_size = INTVAL (min_size_exp);
24717 if (max_size_exp)
24718 max_size = INTVAL (max_size_exp);
24719 if (probable_max_size_exp)
24720 probable_max_size = INTVAL (probable_max_size_exp);
24721 if (CONST_INT_P (expected_size_exp))
24722 expected_size = INTVAL (expected_size_exp);
24725 /* Make sure we don't need to care about overflow later on. */
24726 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24727 return false;
24729 /* Step 0: Decide on preferred algorithm, desired alignment and
24730 size of chunks to be copied by main loop. */
24731 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24732 issetmem,
24733 issetmem && val_exp == const0_rtx,
24734 &dynamic_check, &noalign);
24735 if (alg == libcall)
24736 return false;
24737 gcc_assert (alg != no_stringop);
24739 /* For now vector-version of memset is generated only for memory zeroing, as
24740 creating of promoted vector value is very cheap in this case. */
24741 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24742 alg = unrolled_loop;
24744 if (!count)
24745 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24746 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24747 if (!issetmem)
24748 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24750 unroll_factor = 1;
24751 move_mode = word_mode;
24752 switch (alg)
24754 case libcall:
24755 case no_stringop:
24756 case last_alg:
24757 gcc_unreachable ();
24758 case loop_1_byte:
24759 need_zero_guard = true;
24760 move_mode = QImode;
24761 break;
24762 case loop:
24763 need_zero_guard = true;
24764 break;
24765 case unrolled_loop:
24766 need_zero_guard = true;
24767 unroll_factor = (TARGET_64BIT ? 4 : 2);
24768 break;
24769 case vector_loop:
24770 need_zero_guard = true;
24771 unroll_factor = 4;
24772 /* Find the widest supported mode. */
24773 move_mode = word_mode;
24774 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24775 != CODE_FOR_nothing)
24776 move_mode = GET_MODE_WIDER_MODE (move_mode);
24778 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24779 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24780 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24782 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24783 move_mode = mode_for_vector (word_mode, nunits);
24784 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24785 move_mode = word_mode;
24787 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24788 break;
24789 case rep_prefix_8_byte:
24790 move_mode = DImode;
24791 break;
24792 case rep_prefix_4_byte:
24793 move_mode = SImode;
24794 break;
24795 case rep_prefix_1_byte:
24796 move_mode = QImode;
24797 break;
24799 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24800 epilogue_size_needed = size_needed;
24802 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24803 if (!TARGET_ALIGN_STRINGOPS || noalign)
24804 align = desired_align;
24806 /* Step 1: Prologue guard. */
24808 /* Alignment code needs count to be in register. */
24809 if (CONST_INT_P (count_exp) && desired_align > align)
24811 if (INTVAL (count_exp) > desired_align
24812 && INTVAL (count_exp) > size_needed)
24814 align_bytes
24815 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24816 if (align_bytes <= 0)
24817 align_bytes = 0;
24818 else
24819 align_bytes = desired_align - align_bytes;
24821 if (align_bytes == 0)
24822 count_exp = force_reg (counter_mode (count_exp), count_exp);
24824 gcc_assert (desired_align >= 1 && align >= 1);
24826 /* Misaligned move sequences handle both prologue and epilogue at once.
24827 Default code generation results in a smaller code for large alignments
24828 and also avoids redundant job when sizes are known precisely. */
24829 misaligned_prologue_used
24830 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24831 && MAX (desired_align, epilogue_size_needed) <= 32
24832 && desired_align <= epilogue_size_needed
24833 && ((desired_align > align && !align_bytes)
24834 || (!count && epilogue_size_needed > 1)));
24836 /* Do the cheap promotion to allow better CSE across the
24837 main loop and epilogue (ie one load of the big constant in the
24838 front of all code.
24839 For now the misaligned move sequences do not have fast path
24840 without broadcasting. */
24841 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24843 if (alg == vector_loop)
24845 gcc_assert (val_exp == const0_rtx);
24846 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24847 promoted_val = promote_duplicated_reg_to_size (val_exp,
24848 GET_MODE_SIZE (word_mode),
24849 desired_align, align);
24851 else
24853 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24854 desired_align, align);
24857 /* Misaligned move sequences handles both prologues and epilogues at once.
24858 Default code generation results in smaller code for large alignments and
24859 also avoids redundant job when sizes are known precisely. */
24860 if (misaligned_prologue_used)
24862 /* Misaligned move prologue handled small blocks by itself. */
24863 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24864 (dst, src, &destreg, &srcreg,
24865 move_mode, promoted_val, vec_promoted_val,
24866 &count_exp,
24867 &jump_around_label,
24868 desired_align < align
24869 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24870 desired_align, align, &min_size, dynamic_check, issetmem);
24871 if (!issetmem)
24872 src = change_address (src, BLKmode, srcreg);
24873 dst = change_address (dst, BLKmode, destreg);
24874 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24875 epilogue_size_needed = 0;
24876 if (need_zero_guard && !min_size)
24878 /* It is possible that we copied enough so the main loop will not
24879 execute. */
24880 gcc_assert (size_needed > 1);
24881 if (jump_around_label == NULL_RTX)
24882 jump_around_label = gen_label_rtx ();
24883 emit_cmp_and_jump_insns (count_exp,
24884 GEN_INT (size_needed),
24885 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24886 if (expected_size == -1
24887 || expected_size < (desired_align - align) / 2 + size_needed)
24888 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24889 else
24890 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24893 /* Ensure that alignment prologue won't copy past end of block. */
24894 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24896 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24897 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24898 Make sure it is power of 2. */
24899 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24901 /* To improve performance of small blocks, we jump around the VAL
24902 promoting mode. This mean that if the promoted VAL is not constant,
24903 we might not use it in the epilogue and have to use byte
24904 loop variant. */
24905 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24906 force_loopy_epilogue = true;
24907 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24908 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24910 /* If main algorithm works on QImode, no epilogue is needed.
24911 For small sizes just don't align anything. */
24912 if (size_needed == 1)
24913 desired_align = align;
24914 else
24915 goto epilogue;
24917 else if (!count
24918 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24920 label = gen_label_rtx ();
24921 emit_cmp_and_jump_insns (count_exp,
24922 GEN_INT (epilogue_size_needed),
24923 LTU, 0, counter_mode (count_exp), 1, label);
24924 if (expected_size == -1 || expected_size < epilogue_size_needed)
24925 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24926 else
24927 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24931 /* Emit code to decide on runtime whether library call or inline should be
24932 used. */
24933 if (dynamic_check != -1)
24935 if (!issetmem && CONST_INT_P (count_exp))
24937 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24939 emit_block_move_via_libcall (dst, src, count_exp, false);
24940 count_exp = const0_rtx;
24941 goto epilogue;
24944 else
24946 rtx_code_label *hot_label = gen_label_rtx ();
24947 if (jump_around_label == NULL_RTX)
24948 jump_around_label = gen_label_rtx ();
24949 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24950 LEU, 0, counter_mode (count_exp),
24951 1, hot_label);
24952 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24953 if (issetmem)
24954 set_storage_via_libcall (dst, count_exp, val_exp, false);
24955 else
24956 emit_block_move_via_libcall (dst, src, count_exp, false);
24957 emit_jump (jump_around_label);
24958 emit_label (hot_label);
24962 /* Step 2: Alignment prologue. */
24963 /* Do the expensive promotion once we branched off the small blocks. */
24964 if (issetmem && !promoted_val)
24965 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24966 desired_align, align);
24968 if (desired_align > align && !misaligned_prologue_used)
24970 if (align_bytes == 0)
24972 /* Except for the first move in prologue, we no longer know
24973 constant offset in aliasing info. It don't seems to worth
24974 the pain to maintain it for the first move, so throw away
24975 the info early. */
24976 dst = change_address (dst, BLKmode, destreg);
24977 if (!issetmem)
24978 src = change_address (src, BLKmode, srcreg);
24979 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24980 promoted_val, vec_promoted_val,
24981 count_exp, align, desired_align,
24982 issetmem);
24983 /* At most desired_align - align bytes are copied. */
24984 if (min_size < (unsigned)(desired_align - align))
24985 min_size = 0;
24986 else
24987 min_size -= desired_align - align;
24989 else
24991 /* If we know how many bytes need to be stored before dst is
24992 sufficiently aligned, maintain aliasing info accurately. */
24993 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24994 srcreg,
24995 promoted_val,
24996 vec_promoted_val,
24997 desired_align,
24998 align_bytes,
24999 issetmem);
25001 count_exp = plus_constant (counter_mode (count_exp),
25002 count_exp, -align_bytes);
25003 count -= align_bytes;
25004 min_size -= align_bytes;
25005 max_size -= align_bytes;
25007 if (need_zero_guard
25008 && !min_size
25009 && (count < (unsigned HOST_WIDE_INT) size_needed
25010 || (align_bytes == 0
25011 && count < ((unsigned HOST_WIDE_INT) size_needed
25012 + desired_align - align))))
25014 /* It is possible that we copied enough so the main loop will not
25015 execute. */
25016 gcc_assert (size_needed > 1);
25017 if (label == NULL_RTX)
25018 label = gen_label_rtx ();
25019 emit_cmp_and_jump_insns (count_exp,
25020 GEN_INT (size_needed),
25021 LTU, 0, counter_mode (count_exp), 1, label);
25022 if (expected_size == -1
25023 || expected_size < (desired_align - align) / 2 + size_needed)
25024 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25025 else
25026 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25029 if (label && size_needed == 1)
25031 emit_label (label);
25032 LABEL_NUSES (label) = 1;
25033 label = NULL;
25034 epilogue_size_needed = 1;
25035 if (issetmem)
25036 promoted_val = val_exp;
25038 else if (label == NULL_RTX && !misaligned_prologue_used)
25039 epilogue_size_needed = size_needed;
25041 /* Step 3: Main loop. */
25043 switch (alg)
25045 case libcall:
25046 case no_stringop:
25047 case last_alg:
25048 gcc_unreachable ();
25049 case loop_1_byte:
25050 case loop:
25051 case unrolled_loop:
25052 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
25053 count_exp, move_mode, unroll_factor,
25054 expected_size, issetmem);
25055 break;
25056 case vector_loop:
25057 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
25058 vec_promoted_val, count_exp, move_mode,
25059 unroll_factor, expected_size, issetmem);
25060 break;
25061 case rep_prefix_8_byte:
25062 case rep_prefix_4_byte:
25063 case rep_prefix_1_byte:
25064 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
25065 val_exp, count_exp, move_mode, issetmem);
25066 break;
25068 /* Adjust properly the offset of src and dest memory for aliasing. */
25069 if (CONST_INT_P (count_exp))
25071 if (!issetmem)
25072 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
25073 (count / size_needed) * size_needed);
25074 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
25075 (count / size_needed) * size_needed);
25077 else
25079 if (!issetmem)
25080 src = change_address (src, BLKmode, srcreg);
25081 dst = change_address (dst, BLKmode, destreg);
25084 /* Step 4: Epilogue to copy the remaining bytes. */
25085 epilogue:
25086 if (label)
25088 /* When the main loop is done, COUNT_EXP might hold original count,
25089 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
25090 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
25091 bytes. Compensate if needed. */
25093 if (size_needed < epilogue_size_needed)
25095 tmp =
25096 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
25097 GEN_INT (size_needed - 1), count_exp, 1,
25098 OPTAB_DIRECT);
25099 if (tmp != count_exp)
25100 emit_move_insn (count_exp, tmp);
25102 emit_label (label);
25103 LABEL_NUSES (label) = 1;
25106 if (count_exp != const0_rtx && epilogue_size_needed > 1)
25108 if (force_loopy_epilogue)
25109 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
25110 epilogue_size_needed);
25111 else
25113 if (issetmem)
25114 expand_setmem_epilogue (dst, destreg, promoted_val,
25115 vec_promoted_val, count_exp,
25116 epilogue_size_needed);
25117 else
25118 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
25119 epilogue_size_needed);
25122 if (jump_around_label)
25123 emit_label (jump_around_label);
25124 return true;
25128 /* Expand the appropriate insns for doing strlen if not just doing
25129 repnz; scasb
25131 out = result, initialized with the start address
25132 align_rtx = alignment of the address.
25133 scratch = scratch register, initialized with the startaddress when
25134 not aligned, otherwise undefined
25136 This is just the body. It needs the initializations mentioned above and
25137 some address computing at the end. These things are done in i386.md. */
25139 static void
25140 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
25142 int align;
25143 rtx tmp;
25144 rtx_code_label *align_2_label = NULL;
25145 rtx_code_label *align_3_label = NULL;
25146 rtx_code_label *align_4_label = gen_label_rtx ();
25147 rtx_code_label *end_0_label = gen_label_rtx ();
25148 rtx mem;
25149 rtx tmpreg = gen_reg_rtx (SImode);
25150 rtx scratch = gen_reg_rtx (SImode);
25151 rtx cmp;
25153 align = 0;
25154 if (CONST_INT_P (align_rtx))
25155 align = INTVAL (align_rtx);
25157 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
25159 /* Is there a known alignment and is it less than 4? */
25160 if (align < 4)
25162 rtx scratch1 = gen_reg_rtx (Pmode);
25163 emit_move_insn (scratch1, out);
25164 /* Is there a known alignment and is it not 2? */
25165 if (align != 2)
25167 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
25168 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
25170 /* Leave just the 3 lower bits. */
25171 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
25172 NULL_RTX, 0, OPTAB_WIDEN);
25174 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25175 Pmode, 1, align_4_label);
25176 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
25177 Pmode, 1, align_2_label);
25178 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
25179 Pmode, 1, align_3_label);
25181 else
25183 /* Since the alignment is 2, we have to check 2 or 0 bytes;
25184 check if is aligned to 4 - byte. */
25186 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
25187 NULL_RTX, 0, OPTAB_WIDEN);
25189 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25190 Pmode, 1, align_4_label);
25193 mem = change_address (src, QImode, out);
25195 /* Now compare the bytes. */
25197 /* Compare the first n unaligned byte on a byte per byte basis. */
25198 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
25199 QImode, 1, end_0_label);
25201 /* Increment the address. */
25202 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25204 /* Not needed with an alignment of 2 */
25205 if (align != 2)
25207 emit_label (align_2_label);
25209 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25210 end_0_label);
25212 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25214 emit_label (align_3_label);
25217 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25218 end_0_label);
25220 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25223 /* Generate loop to check 4 bytes at a time. It is not a good idea to
25224 align this loop. It gives only huge programs, but does not help to
25225 speed up. */
25226 emit_label (align_4_label);
25228 mem = change_address (src, SImode, out);
25229 emit_move_insn (scratch, mem);
25230 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
25232 /* This formula yields a nonzero result iff one of the bytes is zero.
25233 This saves three branches inside loop and many cycles. */
25235 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
25236 emit_insn (gen_one_cmplsi2 (scratch, scratch));
25237 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
25238 emit_insn (gen_andsi3 (tmpreg, tmpreg,
25239 gen_int_mode (0x80808080, SImode)));
25240 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
25241 align_4_label);
25243 if (TARGET_CMOVE)
25245 rtx reg = gen_reg_rtx (SImode);
25246 rtx reg2 = gen_reg_rtx (Pmode);
25247 emit_move_insn (reg, tmpreg);
25248 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
25250 /* If zero is not in the first two bytes, move two bytes forward. */
25251 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25252 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25253 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25254 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
25255 gen_rtx_IF_THEN_ELSE (SImode, tmp,
25256 reg,
25257 tmpreg)));
25258 /* Emit lea manually to avoid clobbering of flags. */
25259 emit_insn (gen_rtx_SET (SImode, reg2,
25260 gen_rtx_PLUS (Pmode, out, const2_rtx)));
25262 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25263 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25264 emit_insn (gen_rtx_SET (VOIDmode, out,
25265 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
25266 reg2,
25267 out)));
25269 else
25271 rtx_code_label *end_2_label = gen_label_rtx ();
25272 /* Is zero in the first two bytes? */
25274 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25275 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25276 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
25277 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25278 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
25279 pc_rtx);
25280 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
25281 JUMP_LABEL (tmp) = end_2_label;
25283 /* Not in the first two. Move two bytes forward. */
25284 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
25285 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
25287 emit_label (end_2_label);
25291 /* Avoid branch in fixing the byte. */
25292 tmpreg = gen_lowpart (QImode, tmpreg);
25293 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
25294 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
25295 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
25296 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
25298 emit_label (end_0_label);
25301 /* Expand strlen. */
25303 bool
25304 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
25306 rtx addr, scratch1, scratch2, scratch3, scratch4;
25308 /* The generic case of strlen expander is long. Avoid it's
25309 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
25311 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25312 && !TARGET_INLINE_ALL_STRINGOPS
25313 && !optimize_insn_for_size_p ()
25314 && (!CONST_INT_P (align) || INTVAL (align) < 4))
25315 return false;
25317 addr = force_reg (Pmode, XEXP (src, 0));
25318 scratch1 = gen_reg_rtx (Pmode);
25320 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25321 && !optimize_insn_for_size_p ())
25323 /* Well it seems that some optimizer does not combine a call like
25324 foo(strlen(bar), strlen(bar));
25325 when the move and the subtraction is done here. It does calculate
25326 the length just once when these instructions are done inside of
25327 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
25328 often used and I use one fewer register for the lifetime of
25329 output_strlen_unroll() this is better. */
25331 emit_move_insn (out, addr);
25333 ix86_expand_strlensi_unroll_1 (out, src, align);
25335 /* strlensi_unroll_1 returns the address of the zero at the end of
25336 the string, like memchr(), so compute the length by subtracting
25337 the start address. */
25338 emit_insn (ix86_gen_sub3 (out, out, addr));
25340 else
25342 rtx unspec;
25344 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25345 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25346 return false;
25348 scratch2 = gen_reg_rtx (Pmode);
25349 scratch3 = gen_reg_rtx (Pmode);
25350 scratch4 = force_reg (Pmode, constm1_rtx);
25352 emit_move_insn (scratch3, addr);
25353 eoschar = force_reg (QImode, eoschar);
25355 src = replace_equiv_address_nv (src, scratch3);
25357 /* If .md starts supporting :P, this can be done in .md. */
25358 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25359 scratch4), UNSPEC_SCAS);
25360 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25361 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25362 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25364 return true;
25367 /* For given symbol (function) construct code to compute address of it's PLT
25368 entry in large x86-64 PIC model. */
25369 static rtx
25370 construct_plt_address (rtx symbol)
25372 rtx tmp, unspec;
25374 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25375 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25376 gcc_assert (Pmode == DImode);
25378 tmp = gen_reg_rtx (Pmode);
25379 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25381 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25382 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25383 return tmp;
25387 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25388 rtx callarg2,
25389 rtx pop, bool sibcall)
25391 rtx vec[3];
25392 rtx use = NULL, call;
25393 unsigned int vec_len = 0;
25395 if (pop == const0_rtx)
25396 pop = NULL;
25397 gcc_assert (!TARGET_64BIT || !pop);
25399 if (TARGET_MACHO && !TARGET_64BIT)
25401 #if TARGET_MACHO
25402 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25403 fnaddr = machopic_indirect_call_target (fnaddr);
25404 #endif
25406 else
25408 /* Static functions and indirect calls don't need the pic register. */
25409 if (flag_pic
25410 && (!TARGET_64BIT
25411 || (ix86_cmodel == CM_LARGE_PIC
25412 && DEFAULT_ABI != MS_ABI))
25413 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25414 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
25416 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25417 if (ix86_use_pseudo_pic_reg ())
25418 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25419 pic_offset_table_rtx);
25423 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
25425 rtx al = gen_rtx_REG (QImode, AX_REG);
25426 emit_move_insn (al, callarg2);
25427 use_reg (&use, al);
25430 if (ix86_cmodel == CM_LARGE_PIC
25431 && !TARGET_PECOFF
25432 && MEM_P (fnaddr)
25433 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25434 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25435 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25436 else if (sibcall
25437 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25438 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25440 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25441 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25444 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25446 if (retval)
25448 /* We should add bounds as destination register in case
25449 pointer with bounds may be returned. */
25450 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
25452 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
25453 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
25454 retval = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, retval, b0, b1));
25455 chkp_put_regs_to_expr_list (retval);
25458 call = gen_rtx_SET (VOIDmode, retval, call);
25460 vec[vec_len++] = call;
25462 if (pop)
25464 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25465 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
25466 vec[vec_len++] = pop;
25469 if (TARGET_64BIT_MS_ABI
25470 && (!callarg2 || INTVAL (callarg2) != -2))
25472 int const cregs_size
25473 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25474 int i;
25476 for (i = 0; i < cregs_size; i++)
25478 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25479 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25481 clobber_reg (&use, gen_rtx_REG (mode, regno));
25485 if (vec_len > 1)
25486 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25487 call = emit_call_insn (call);
25488 if (use)
25489 CALL_INSN_FUNCTION_USAGE (call) = use;
25491 return call;
25494 /* Output the assembly for a call instruction. */
25496 const char *
25497 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25499 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25500 bool seh_nop_p = false;
25501 const char *xasm;
25503 if (SIBLING_CALL_P (insn))
25505 if (direct_p)
25506 xasm = "%!jmp\t%P0";
25507 /* SEH epilogue detection requires the indirect branch case
25508 to include REX.W. */
25509 else if (TARGET_SEH)
25510 xasm = "%!rex.W jmp %A0";
25511 else
25512 xasm = "%!jmp\t%A0";
25514 output_asm_insn (xasm, &call_op);
25515 return "";
25518 /* SEH unwinding can require an extra nop to be emitted in several
25519 circumstances. Determine if we have one of those. */
25520 if (TARGET_SEH)
25522 rtx_insn *i;
25524 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25526 /* If we get to another real insn, we don't need the nop. */
25527 if (INSN_P (i))
25528 break;
25530 /* If we get to the epilogue note, prevent a catch region from
25531 being adjacent to the standard epilogue sequence. If non-
25532 call-exceptions, we'll have done this during epilogue emission. */
25533 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25534 && !flag_non_call_exceptions
25535 && !can_throw_internal (insn))
25537 seh_nop_p = true;
25538 break;
25542 /* If we didn't find a real insn following the call, prevent the
25543 unwinder from looking into the next function. */
25544 if (i == NULL)
25545 seh_nop_p = true;
25548 if (direct_p)
25549 xasm = "%!call\t%P0";
25550 else
25551 xasm = "%!call\t%A0";
25553 output_asm_insn (xasm, &call_op);
25555 if (seh_nop_p)
25556 return "nop";
25558 return "";
25561 /* Clear stack slot assignments remembered from previous functions.
25562 This is called from INIT_EXPANDERS once before RTL is emitted for each
25563 function. */
25565 static struct machine_function *
25566 ix86_init_machine_status (void)
25568 struct machine_function *f;
25570 f = ggc_cleared_alloc<machine_function> ();
25571 f->use_fast_prologue_epilogue_nregs = -1;
25572 f->call_abi = ix86_abi;
25574 return f;
25577 /* Return a MEM corresponding to a stack slot with mode MODE.
25578 Allocate a new slot if necessary.
25580 The RTL for a function can have several slots available: N is
25581 which slot to use. */
25584 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
25586 struct stack_local_entry *s;
25588 gcc_assert (n < MAX_386_STACK_LOCALS);
25590 for (s = ix86_stack_locals; s; s = s->next)
25591 if (s->mode == mode && s->n == n)
25592 return validize_mem (copy_rtx (s->rtl));
25594 s = ggc_alloc<stack_local_entry> ();
25595 s->n = n;
25596 s->mode = mode;
25597 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25599 s->next = ix86_stack_locals;
25600 ix86_stack_locals = s;
25601 return validize_mem (copy_rtx (s->rtl));
25604 static void
25605 ix86_instantiate_decls (void)
25607 struct stack_local_entry *s;
25609 for (s = ix86_stack_locals; s; s = s->next)
25610 if (s->rtl != NULL_RTX)
25611 instantiate_decl_rtl (s->rtl);
25614 /* Check whether x86 address PARTS is a pc-relative address. */
25616 static bool
25617 rip_relative_addr_p (struct ix86_address *parts)
25619 rtx base, index, disp;
25621 base = parts->base;
25622 index = parts->index;
25623 disp = parts->disp;
25625 if (disp && !base && !index)
25627 if (TARGET_64BIT)
25629 rtx symbol = disp;
25631 if (GET_CODE (disp) == CONST)
25632 symbol = XEXP (disp, 0);
25633 if (GET_CODE (symbol) == PLUS
25634 && CONST_INT_P (XEXP (symbol, 1)))
25635 symbol = XEXP (symbol, 0);
25637 if (GET_CODE (symbol) == LABEL_REF
25638 || (GET_CODE (symbol) == SYMBOL_REF
25639 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25640 || (GET_CODE (symbol) == UNSPEC
25641 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25642 || XINT (symbol, 1) == UNSPEC_PCREL
25643 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25644 return true;
25647 return false;
25650 /* Calculate the length of the memory address in the instruction encoding.
25651 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25652 or other prefixes. We never generate addr32 prefix for LEA insn. */
25655 memory_address_length (rtx addr, bool lea)
25657 struct ix86_address parts;
25658 rtx base, index, disp;
25659 int len;
25660 int ok;
25662 if (GET_CODE (addr) == PRE_DEC
25663 || GET_CODE (addr) == POST_INC
25664 || GET_CODE (addr) == PRE_MODIFY
25665 || GET_CODE (addr) == POST_MODIFY)
25666 return 0;
25668 ok = ix86_decompose_address (addr, &parts);
25669 gcc_assert (ok);
25671 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25673 /* If this is not LEA instruction, add the length of addr32 prefix. */
25674 if (TARGET_64BIT && !lea
25675 && (SImode_address_operand (addr, VOIDmode)
25676 || (parts.base && GET_MODE (parts.base) == SImode)
25677 || (parts.index && GET_MODE (parts.index) == SImode)))
25678 len++;
25680 base = parts.base;
25681 index = parts.index;
25682 disp = parts.disp;
25684 if (base && GET_CODE (base) == SUBREG)
25685 base = SUBREG_REG (base);
25686 if (index && GET_CODE (index) == SUBREG)
25687 index = SUBREG_REG (index);
25689 gcc_assert (base == NULL_RTX || REG_P (base));
25690 gcc_assert (index == NULL_RTX || REG_P (index));
25692 /* Rule of thumb:
25693 - esp as the base always wants an index,
25694 - ebp as the base always wants a displacement,
25695 - r12 as the base always wants an index,
25696 - r13 as the base always wants a displacement. */
25698 /* Register Indirect. */
25699 if (base && !index && !disp)
25701 /* esp (for its index) and ebp (for its displacement) need
25702 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25703 code. */
25704 if (base == arg_pointer_rtx
25705 || base == frame_pointer_rtx
25706 || REGNO (base) == SP_REG
25707 || REGNO (base) == BP_REG
25708 || REGNO (base) == R12_REG
25709 || REGNO (base) == R13_REG)
25710 len++;
25713 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25714 is not disp32, but disp32(%rip), so for disp32
25715 SIB byte is needed, unless print_operand_address
25716 optimizes it into disp32(%rip) or (%rip) is implied
25717 by UNSPEC. */
25718 else if (disp && !base && !index)
25720 len += 4;
25721 if (rip_relative_addr_p (&parts))
25722 len++;
25724 else
25726 /* Find the length of the displacement constant. */
25727 if (disp)
25729 if (base && satisfies_constraint_K (disp))
25730 len += 1;
25731 else
25732 len += 4;
25734 /* ebp always wants a displacement. Similarly r13. */
25735 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25736 len++;
25738 /* An index requires the two-byte modrm form.... */
25739 if (index
25740 /* ...like esp (or r12), which always wants an index. */
25741 || base == arg_pointer_rtx
25742 || base == frame_pointer_rtx
25743 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25744 len++;
25747 return len;
25750 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25751 is set, expect that insn have 8bit immediate alternative. */
25753 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
25755 int len = 0;
25756 int i;
25757 extract_insn_cached (insn);
25758 for (i = recog_data.n_operands - 1; i >= 0; --i)
25759 if (CONSTANT_P (recog_data.operand[i]))
25761 enum attr_mode mode = get_attr_mode (insn);
25763 gcc_assert (!len);
25764 if (shortform && CONST_INT_P (recog_data.operand[i]))
25766 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25767 switch (mode)
25769 case MODE_QI:
25770 len = 1;
25771 continue;
25772 case MODE_HI:
25773 ival = trunc_int_for_mode (ival, HImode);
25774 break;
25775 case MODE_SI:
25776 ival = trunc_int_for_mode (ival, SImode);
25777 break;
25778 default:
25779 break;
25781 if (IN_RANGE (ival, -128, 127))
25783 len = 1;
25784 continue;
25787 switch (mode)
25789 case MODE_QI:
25790 len = 1;
25791 break;
25792 case MODE_HI:
25793 len = 2;
25794 break;
25795 case MODE_SI:
25796 len = 4;
25797 break;
25798 /* Immediates for DImode instructions are encoded
25799 as 32bit sign extended values. */
25800 case MODE_DI:
25801 len = 4;
25802 break;
25803 default:
25804 fatal_insn ("unknown insn mode", insn);
25807 return len;
25810 /* Compute default value for "length_address" attribute. */
25812 ix86_attr_length_address_default (rtx_insn *insn)
25814 int i;
25816 if (get_attr_type (insn) == TYPE_LEA)
25818 rtx set = PATTERN (insn), addr;
25820 if (GET_CODE (set) == PARALLEL)
25821 set = XVECEXP (set, 0, 0);
25823 gcc_assert (GET_CODE (set) == SET);
25825 addr = SET_SRC (set);
25827 return memory_address_length (addr, true);
25830 extract_insn_cached (insn);
25831 for (i = recog_data.n_operands - 1; i >= 0; --i)
25832 if (MEM_P (recog_data.operand[i]))
25834 constrain_operands_cached (insn, reload_completed);
25835 if (which_alternative != -1)
25837 const char *constraints = recog_data.constraints[i];
25838 int alt = which_alternative;
25840 while (*constraints == '=' || *constraints == '+')
25841 constraints++;
25842 while (alt-- > 0)
25843 while (*constraints++ != ',')
25845 /* Skip ignored operands. */
25846 if (*constraints == 'X')
25847 continue;
25849 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25851 return 0;
25854 /* Compute default value for "length_vex" attribute. It includes
25855 2 or 3 byte VEX prefix and 1 opcode byte. */
25858 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
25859 bool has_vex_w)
25861 int i;
25863 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25864 byte VEX prefix. */
25865 if (!has_0f_opcode || has_vex_w)
25866 return 3 + 1;
25868 /* We can always use 2 byte VEX prefix in 32bit. */
25869 if (!TARGET_64BIT)
25870 return 2 + 1;
25872 extract_insn_cached (insn);
25874 for (i = recog_data.n_operands - 1; i >= 0; --i)
25875 if (REG_P (recog_data.operand[i]))
25877 /* REX.W bit uses 3 byte VEX prefix. */
25878 if (GET_MODE (recog_data.operand[i]) == DImode
25879 && GENERAL_REG_P (recog_data.operand[i]))
25880 return 3 + 1;
25882 else
25884 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25885 if (MEM_P (recog_data.operand[i])
25886 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25887 return 3 + 1;
25890 return 2 + 1;
25893 /* Return the maximum number of instructions a cpu can issue. */
25895 static int
25896 ix86_issue_rate (void)
25898 switch (ix86_tune)
25900 case PROCESSOR_PENTIUM:
25901 case PROCESSOR_BONNELL:
25902 case PROCESSOR_SILVERMONT:
25903 case PROCESSOR_INTEL:
25904 case PROCESSOR_K6:
25905 case PROCESSOR_BTVER2:
25906 case PROCESSOR_PENTIUM4:
25907 case PROCESSOR_NOCONA:
25908 return 2;
25910 case PROCESSOR_PENTIUMPRO:
25911 case PROCESSOR_ATHLON:
25912 case PROCESSOR_K8:
25913 case PROCESSOR_AMDFAM10:
25914 case PROCESSOR_GENERIC:
25915 case PROCESSOR_BTVER1:
25916 return 3;
25918 case PROCESSOR_BDVER1:
25919 case PROCESSOR_BDVER2:
25920 case PROCESSOR_BDVER3:
25921 case PROCESSOR_BDVER4:
25922 case PROCESSOR_CORE2:
25923 case PROCESSOR_NEHALEM:
25924 case PROCESSOR_SANDYBRIDGE:
25925 case PROCESSOR_HASWELL:
25926 return 4;
25928 default:
25929 return 1;
25933 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25934 by DEP_INSN and nothing set by DEP_INSN. */
25936 static bool
25937 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
25939 rtx set, set2;
25941 /* Simplify the test for uninteresting insns. */
25942 if (insn_type != TYPE_SETCC
25943 && insn_type != TYPE_ICMOV
25944 && insn_type != TYPE_FCMOV
25945 && insn_type != TYPE_IBR)
25946 return false;
25948 if ((set = single_set (dep_insn)) != 0)
25950 set = SET_DEST (set);
25951 set2 = NULL_RTX;
25953 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25954 && XVECLEN (PATTERN (dep_insn), 0) == 2
25955 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25956 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25958 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25959 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25961 else
25962 return false;
25964 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25965 return false;
25967 /* This test is true if the dependent insn reads the flags but
25968 not any other potentially set register. */
25969 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25970 return false;
25972 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25973 return false;
25975 return true;
25978 /* Return true iff USE_INSN has a memory address with operands set by
25979 SET_INSN. */
25981 bool
25982 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
25984 int i;
25985 extract_insn_cached (use_insn);
25986 for (i = recog_data.n_operands - 1; i >= 0; --i)
25987 if (MEM_P (recog_data.operand[i]))
25989 rtx addr = XEXP (recog_data.operand[i], 0);
25990 return modified_in_p (addr, set_insn) != 0;
25992 return false;
25995 /* Helper function for exact_store_load_dependency.
25996 Return true if addr is found in insn. */
25997 static bool
25998 exact_dependency_1 (rtx addr, rtx insn)
26000 enum rtx_code code;
26001 const char *format_ptr;
26002 int i, j;
26004 code = GET_CODE (insn);
26005 switch (code)
26007 case MEM:
26008 if (rtx_equal_p (addr, insn))
26009 return true;
26010 break;
26011 case REG:
26012 CASE_CONST_ANY:
26013 case SYMBOL_REF:
26014 case CODE_LABEL:
26015 case PC:
26016 case CC0:
26017 case EXPR_LIST:
26018 return false;
26019 default:
26020 break;
26023 format_ptr = GET_RTX_FORMAT (code);
26024 for (i = 0; i < GET_RTX_LENGTH (code); i++)
26026 switch (*format_ptr++)
26028 case 'e':
26029 if (exact_dependency_1 (addr, XEXP (insn, i)))
26030 return true;
26031 break;
26032 case 'E':
26033 for (j = 0; j < XVECLEN (insn, i); j++)
26034 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
26035 return true;
26036 break;
26039 return false;
26042 /* Return true if there exists exact dependency for store & load, i.e.
26043 the same memory address is used in them. */
26044 static bool
26045 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
26047 rtx set1, set2;
26049 set1 = single_set (store);
26050 if (!set1)
26051 return false;
26052 if (!MEM_P (SET_DEST (set1)))
26053 return false;
26054 set2 = single_set (load);
26055 if (!set2)
26056 return false;
26057 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
26058 return true;
26059 return false;
26062 static int
26063 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26065 enum attr_type insn_type, dep_insn_type;
26066 enum attr_memory memory;
26067 rtx set, set2;
26068 int dep_insn_code_number;
26070 /* Anti and output dependencies have zero cost on all CPUs. */
26071 if (REG_NOTE_KIND (link) != 0)
26072 return 0;
26074 dep_insn_code_number = recog_memoized (dep_insn);
26076 /* If we can't recognize the insns, we can't really do anything. */
26077 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
26078 return cost;
26080 insn_type = get_attr_type (insn);
26081 dep_insn_type = get_attr_type (dep_insn);
26083 switch (ix86_tune)
26085 case PROCESSOR_PENTIUM:
26086 /* Address Generation Interlock adds a cycle of latency. */
26087 if (insn_type == TYPE_LEA)
26089 rtx addr = PATTERN (insn);
26091 if (GET_CODE (addr) == PARALLEL)
26092 addr = XVECEXP (addr, 0, 0);
26094 gcc_assert (GET_CODE (addr) == SET);
26096 addr = SET_SRC (addr);
26097 if (modified_in_p (addr, dep_insn))
26098 cost += 1;
26100 else if (ix86_agi_dependent (dep_insn, insn))
26101 cost += 1;
26103 /* ??? Compares pair with jump/setcc. */
26104 if (ix86_flags_dependent (insn, dep_insn, insn_type))
26105 cost = 0;
26107 /* Floating point stores require value to be ready one cycle earlier. */
26108 if (insn_type == TYPE_FMOV
26109 && get_attr_memory (insn) == MEMORY_STORE
26110 && !ix86_agi_dependent (dep_insn, insn))
26111 cost += 1;
26112 break;
26114 case PROCESSOR_PENTIUMPRO:
26115 /* INT->FP conversion is expensive. */
26116 if (get_attr_fp_int_src (dep_insn))
26117 cost += 5;
26119 /* There is one cycle extra latency between an FP op and a store. */
26120 if (insn_type == TYPE_FMOV
26121 && (set = single_set (dep_insn)) != NULL_RTX
26122 && (set2 = single_set (insn)) != NULL_RTX
26123 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
26124 && MEM_P (SET_DEST (set2)))
26125 cost += 1;
26127 memory = get_attr_memory (insn);
26129 /* Show ability of reorder buffer to hide latency of load by executing
26130 in parallel with previous instruction in case
26131 previous instruction is not needed to compute the address. */
26132 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26133 && !ix86_agi_dependent (dep_insn, insn))
26135 /* Claim moves to take one cycle, as core can issue one load
26136 at time and the next load can start cycle later. */
26137 if (dep_insn_type == TYPE_IMOV
26138 || dep_insn_type == TYPE_FMOV)
26139 cost = 1;
26140 else if (cost > 1)
26141 cost--;
26143 break;
26145 case PROCESSOR_K6:
26146 /* The esp dependency is resolved before
26147 the instruction is really finished. */
26148 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26149 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26150 return 1;
26152 /* INT->FP conversion is expensive. */
26153 if (get_attr_fp_int_src (dep_insn))
26154 cost += 5;
26156 memory = get_attr_memory (insn);
26158 /* Show ability of reorder buffer to hide latency of load by executing
26159 in parallel with previous instruction in case
26160 previous instruction is not needed to compute the address. */
26161 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26162 && !ix86_agi_dependent (dep_insn, insn))
26164 /* Claim moves to take one cycle, as core can issue one load
26165 at time and the next load can start cycle later. */
26166 if (dep_insn_type == TYPE_IMOV
26167 || dep_insn_type == TYPE_FMOV)
26168 cost = 1;
26169 else if (cost > 2)
26170 cost -= 2;
26171 else
26172 cost = 1;
26174 break;
26176 case PROCESSOR_AMDFAM10:
26177 case PROCESSOR_BDVER1:
26178 case PROCESSOR_BDVER2:
26179 case PROCESSOR_BDVER3:
26180 case PROCESSOR_BDVER4:
26181 case PROCESSOR_BTVER1:
26182 case PROCESSOR_BTVER2:
26183 case PROCESSOR_GENERIC:
26184 /* Stack engine allows to execute push&pop instructions in parall. */
26185 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26186 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26187 return 0;
26188 /* FALLTHRU */
26190 case PROCESSOR_ATHLON:
26191 case PROCESSOR_K8:
26192 memory = get_attr_memory (insn);
26194 /* Show ability of reorder buffer to hide latency of load by executing
26195 in parallel with previous instruction in case
26196 previous instruction is not needed to compute the address. */
26197 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26198 && !ix86_agi_dependent (dep_insn, insn))
26200 enum attr_unit unit = get_attr_unit (insn);
26201 int loadcost = 3;
26203 /* Because of the difference between the length of integer and
26204 floating unit pipeline preparation stages, the memory operands
26205 for floating point are cheaper.
26207 ??? For Athlon it the difference is most probably 2. */
26208 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
26209 loadcost = 3;
26210 else
26211 loadcost = TARGET_ATHLON ? 2 : 0;
26213 if (cost >= loadcost)
26214 cost -= loadcost;
26215 else
26216 cost = 0;
26218 break;
26220 case PROCESSOR_CORE2:
26221 case PROCESSOR_NEHALEM:
26222 case PROCESSOR_SANDYBRIDGE:
26223 case PROCESSOR_HASWELL:
26224 /* Stack engine allows to execute push&pop instructions in parall. */
26225 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26226 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26227 return 0;
26229 memory = get_attr_memory (insn);
26231 /* Show ability of reorder buffer to hide latency of load by executing
26232 in parallel with previous instruction in case
26233 previous instruction is not needed to compute the address. */
26234 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26235 && !ix86_agi_dependent (dep_insn, insn))
26237 if (cost >= 4)
26238 cost -= 4;
26239 else
26240 cost = 0;
26242 break;
26244 case PROCESSOR_SILVERMONT:
26245 case PROCESSOR_INTEL:
26246 if (!reload_completed)
26247 return cost;
26249 /* Increase cost of integer loads. */
26250 memory = get_attr_memory (dep_insn);
26251 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26253 enum attr_unit unit = get_attr_unit (dep_insn);
26254 if (unit == UNIT_INTEGER && cost == 1)
26256 if (memory == MEMORY_LOAD)
26257 cost = 3;
26258 else
26260 /* Increase cost of ld/st for short int types only
26261 because of store forwarding issue. */
26262 rtx set = single_set (dep_insn);
26263 if (set && (GET_MODE (SET_DEST (set)) == QImode
26264 || GET_MODE (SET_DEST (set)) == HImode))
26266 /* Increase cost of store/load insn if exact
26267 dependence exists and it is load insn. */
26268 enum attr_memory insn_memory = get_attr_memory (insn);
26269 if (insn_memory == MEMORY_LOAD
26270 && exact_store_load_dependency (dep_insn, insn))
26271 cost = 3;
26277 default:
26278 break;
26281 return cost;
26284 /* How many alternative schedules to try. This should be as wide as the
26285 scheduling freedom in the DFA, but no wider. Making this value too
26286 large results extra work for the scheduler. */
26288 static int
26289 ia32_multipass_dfa_lookahead (void)
26291 switch (ix86_tune)
26293 case PROCESSOR_PENTIUM:
26294 return 2;
26296 case PROCESSOR_PENTIUMPRO:
26297 case PROCESSOR_K6:
26298 return 1;
26300 case PROCESSOR_BDVER1:
26301 case PROCESSOR_BDVER2:
26302 case PROCESSOR_BDVER3:
26303 case PROCESSOR_BDVER4:
26304 /* We use lookahead value 4 for BD both before and after reload
26305 schedules. Plan is to have value 8 included for O3. */
26306 return 4;
26308 case PROCESSOR_CORE2:
26309 case PROCESSOR_NEHALEM:
26310 case PROCESSOR_SANDYBRIDGE:
26311 case PROCESSOR_HASWELL:
26312 case PROCESSOR_BONNELL:
26313 case PROCESSOR_SILVERMONT:
26314 case PROCESSOR_INTEL:
26315 /* Generally, we want haifa-sched:max_issue() to look ahead as far
26316 as many instructions can be executed on a cycle, i.e.,
26317 issue_rate. I wonder why tuning for many CPUs does not do this. */
26318 if (reload_completed)
26319 return ix86_issue_rate ();
26320 /* Don't use lookahead for pre-reload schedule to save compile time. */
26321 return 0;
26323 default:
26324 return 0;
26328 /* Return true if target platform supports macro-fusion. */
26330 static bool
26331 ix86_macro_fusion_p ()
26333 return TARGET_FUSE_CMP_AND_BRANCH;
26336 /* Check whether current microarchitecture support macro fusion
26337 for insn pair "CONDGEN + CONDJMP". Refer to
26338 "Intel Architectures Optimization Reference Manual". */
26340 static bool
26341 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
26343 rtx src, dest;
26344 enum rtx_code ccode;
26345 rtx compare_set = NULL_RTX, test_if, cond;
26346 rtx alu_set = NULL_RTX, addr = NULL_RTX;
26348 if (!any_condjump_p (condjmp))
26349 return false;
26351 if (get_attr_type (condgen) != TYPE_TEST
26352 && get_attr_type (condgen) != TYPE_ICMP
26353 && get_attr_type (condgen) != TYPE_INCDEC
26354 && get_attr_type (condgen) != TYPE_ALU)
26355 return false;
26357 compare_set = single_set (condgen);
26358 if (compare_set == NULL_RTX
26359 && !TARGET_FUSE_ALU_AND_BRANCH)
26360 return false;
26362 if (compare_set == NULL_RTX)
26364 int i;
26365 rtx pat = PATTERN (condgen);
26366 for (i = 0; i < XVECLEN (pat, 0); i++)
26367 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26369 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26370 if (GET_CODE (set_src) == COMPARE)
26371 compare_set = XVECEXP (pat, 0, i);
26372 else
26373 alu_set = XVECEXP (pat, 0, i);
26376 if (compare_set == NULL_RTX)
26377 return false;
26378 src = SET_SRC (compare_set);
26379 if (GET_CODE (src) != COMPARE)
26380 return false;
26382 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26383 supported. */
26384 if ((MEM_P (XEXP (src, 0))
26385 && CONST_INT_P (XEXP (src, 1)))
26386 || (MEM_P (XEXP (src, 1))
26387 && CONST_INT_P (XEXP (src, 0))))
26388 return false;
26390 /* No fusion for RIP-relative address. */
26391 if (MEM_P (XEXP (src, 0)))
26392 addr = XEXP (XEXP (src, 0), 0);
26393 else if (MEM_P (XEXP (src, 1)))
26394 addr = XEXP (XEXP (src, 1), 0);
26396 if (addr) {
26397 ix86_address parts;
26398 int ok = ix86_decompose_address (addr, &parts);
26399 gcc_assert (ok);
26401 if (rip_relative_addr_p (&parts))
26402 return false;
26405 test_if = SET_SRC (pc_set (condjmp));
26406 cond = XEXP (test_if, 0);
26407 ccode = GET_CODE (cond);
26408 /* Check whether conditional jump use Sign or Overflow Flags. */
26409 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26410 && (ccode == GE
26411 || ccode == GT
26412 || ccode == LE
26413 || ccode == LT))
26414 return false;
26416 /* Return true for TYPE_TEST and TYPE_ICMP. */
26417 if (get_attr_type (condgen) == TYPE_TEST
26418 || get_attr_type (condgen) == TYPE_ICMP)
26419 return true;
26421 /* The following is the case that macro-fusion for alu + jmp. */
26422 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26423 return false;
26425 /* No fusion for alu op with memory destination operand. */
26426 dest = SET_DEST (alu_set);
26427 if (MEM_P (dest))
26428 return false;
26430 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26431 supported. */
26432 if (get_attr_type (condgen) == TYPE_INCDEC
26433 && (ccode == GEU
26434 || ccode == GTU
26435 || ccode == LEU
26436 || ccode == LTU))
26437 return false;
26439 return true;
26442 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26443 execution. It is applied if
26444 (1) IMUL instruction is on the top of list;
26445 (2) There exists the only producer of independent IMUL instruction in
26446 ready list.
26447 Return index of IMUL producer if it was found and -1 otherwise. */
26448 static int
26449 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26451 rtx_insn *insn;
26452 rtx set, insn1, insn2;
26453 sd_iterator_def sd_it;
26454 dep_t dep;
26455 int index = -1;
26456 int i;
26458 if (!TARGET_BONNELL)
26459 return index;
26461 /* Check that IMUL instruction is on the top of ready list. */
26462 insn = ready[n_ready - 1];
26463 set = single_set (insn);
26464 if (!set)
26465 return index;
26466 if (!(GET_CODE (SET_SRC (set)) == MULT
26467 && GET_MODE (SET_SRC (set)) == SImode))
26468 return index;
26470 /* Search for producer of independent IMUL instruction. */
26471 for (i = n_ready - 2; i >= 0; i--)
26473 insn = ready[i];
26474 if (!NONDEBUG_INSN_P (insn))
26475 continue;
26476 /* Skip IMUL instruction. */
26477 insn2 = PATTERN (insn);
26478 if (GET_CODE (insn2) == PARALLEL)
26479 insn2 = XVECEXP (insn2, 0, 0);
26480 if (GET_CODE (insn2) == SET
26481 && GET_CODE (SET_SRC (insn2)) == MULT
26482 && GET_MODE (SET_SRC (insn2)) == SImode)
26483 continue;
26485 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26487 rtx con;
26488 con = DEP_CON (dep);
26489 if (!NONDEBUG_INSN_P (con))
26490 continue;
26491 insn1 = PATTERN (con);
26492 if (GET_CODE (insn1) == PARALLEL)
26493 insn1 = XVECEXP (insn1, 0, 0);
26495 if (GET_CODE (insn1) == SET
26496 && GET_CODE (SET_SRC (insn1)) == MULT
26497 && GET_MODE (SET_SRC (insn1)) == SImode)
26499 sd_iterator_def sd_it1;
26500 dep_t dep1;
26501 /* Check if there is no other dependee for IMUL. */
26502 index = i;
26503 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26505 rtx pro;
26506 pro = DEP_PRO (dep1);
26507 if (!NONDEBUG_INSN_P (pro))
26508 continue;
26509 if (pro != insn)
26510 index = -1;
26512 if (index >= 0)
26513 break;
26516 if (index >= 0)
26517 break;
26519 return index;
26522 /* Try to find the best candidate on the top of ready list if two insns
26523 have the same priority - candidate is best if its dependees were
26524 scheduled earlier. Applied for Silvermont only.
26525 Return true if top 2 insns must be interchanged. */
26526 static bool
26527 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26529 rtx_insn *top = ready[n_ready - 1];
26530 rtx_insn *next = ready[n_ready - 2];
26531 rtx set;
26532 sd_iterator_def sd_it;
26533 dep_t dep;
26534 int clock1 = -1;
26535 int clock2 = -1;
26536 #define INSN_TICK(INSN) (HID (INSN)->tick)
26538 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26539 return false;
26541 if (!NONDEBUG_INSN_P (top))
26542 return false;
26543 if (!NONJUMP_INSN_P (top))
26544 return false;
26545 if (!NONDEBUG_INSN_P (next))
26546 return false;
26547 if (!NONJUMP_INSN_P (next))
26548 return false;
26549 set = single_set (top);
26550 if (!set)
26551 return false;
26552 set = single_set (next);
26553 if (!set)
26554 return false;
26556 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26558 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26559 return false;
26560 /* Determine winner more precise. */
26561 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26563 rtx pro;
26564 pro = DEP_PRO (dep);
26565 if (!NONDEBUG_INSN_P (pro))
26566 continue;
26567 if (INSN_TICK (pro) > clock1)
26568 clock1 = INSN_TICK (pro);
26570 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26572 rtx pro;
26573 pro = DEP_PRO (dep);
26574 if (!NONDEBUG_INSN_P (pro))
26575 continue;
26576 if (INSN_TICK (pro) > clock2)
26577 clock2 = INSN_TICK (pro);
26580 if (clock1 == clock2)
26582 /* Determine winner - load must win. */
26583 enum attr_memory memory1, memory2;
26584 memory1 = get_attr_memory (top);
26585 memory2 = get_attr_memory (next);
26586 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26587 return true;
26589 return (bool) (clock2 < clock1);
26591 return false;
26592 #undef INSN_TICK
26595 /* Perform possible reodering of ready list for Atom/Silvermont only.
26596 Return issue rate. */
26597 static int
26598 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26599 int *pn_ready, int clock_var)
26601 int issue_rate = -1;
26602 int n_ready = *pn_ready;
26603 int i;
26604 rtx_insn *insn;
26605 int index = -1;
26607 /* Set up issue rate. */
26608 issue_rate = ix86_issue_rate ();
26610 /* Do reodering for BONNELL/SILVERMONT only. */
26611 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26612 return issue_rate;
26614 /* Nothing to do if ready list contains only 1 instruction. */
26615 if (n_ready <= 1)
26616 return issue_rate;
26618 /* Do reodering for post-reload scheduler only. */
26619 if (!reload_completed)
26620 return issue_rate;
26622 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26624 if (sched_verbose > 1)
26625 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26626 INSN_UID (ready[index]));
26628 /* Put IMUL producer (ready[index]) at the top of ready list. */
26629 insn = ready[index];
26630 for (i = index; i < n_ready - 1; i++)
26631 ready[i] = ready[i + 1];
26632 ready[n_ready - 1] = insn;
26633 return issue_rate;
26635 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26637 if (sched_verbose > 1)
26638 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26639 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26640 /* Swap 2 top elements of ready list. */
26641 insn = ready[n_ready - 1];
26642 ready[n_ready - 1] = ready[n_ready - 2];
26643 ready[n_ready - 2] = insn;
26645 return issue_rate;
26648 static bool
26649 ix86_class_likely_spilled_p (reg_class_t);
26651 /* Returns true if lhs of insn is HW function argument register and set up
26652 is_spilled to true if it is likely spilled HW register. */
26653 static bool
26654 insn_is_function_arg (rtx insn, bool* is_spilled)
26656 rtx dst;
26658 if (!NONDEBUG_INSN_P (insn))
26659 return false;
26660 /* Call instructions are not movable, ignore it. */
26661 if (CALL_P (insn))
26662 return false;
26663 insn = PATTERN (insn);
26664 if (GET_CODE (insn) == PARALLEL)
26665 insn = XVECEXP (insn, 0, 0);
26666 if (GET_CODE (insn) != SET)
26667 return false;
26668 dst = SET_DEST (insn);
26669 if (REG_P (dst) && HARD_REGISTER_P (dst)
26670 && ix86_function_arg_regno_p (REGNO (dst)))
26672 /* Is it likely spilled HW register? */
26673 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26674 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26675 *is_spilled = true;
26676 return true;
26678 return false;
26681 /* Add output dependencies for chain of function adjacent arguments if only
26682 there is a move to likely spilled HW register. Return first argument
26683 if at least one dependence was added or NULL otherwise. */
26684 static rtx_insn *
26685 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26687 rtx_insn *insn;
26688 rtx_insn *last = call;
26689 rtx_insn *first_arg = NULL;
26690 bool is_spilled = false;
26692 head = PREV_INSN (head);
26694 /* Find nearest to call argument passing instruction. */
26695 while (true)
26697 last = PREV_INSN (last);
26698 if (last == head)
26699 return NULL;
26700 if (!NONDEBUG_INSN_P (last))
26701 continue;
26702 if (insn_is_function_arg (last, &is_spilled))
26703 break;
26704 return NULL;
26707 first_arg = last;
26708 while (true)
26710 insn = PREV_INSN (last);
26711 if (!INSN_P (insn))
26712 break;
26713 if (insn == head)
26714 break;
26715 if (!NONDEBUG_INSN_P (insn))
26717 last = insn;
26718 continue;
26720 if (insn_is_function_arg (insn, &is_spilled))
26722 /* Add output depdendence between two function arguments if chain
26723 of output arguments contains likely spilled HW registers. */
26724 if (is_spilled)
26725 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26726 first_arg = last = insn;
26728 else
26729 break;
26731 if (!is_spilled)
26732 return NULL;
26733 return first_arg;
26736 /* Add output or anti dependency from insn to first_arg to restrict its code
26737 motion. */
26738 static void
26739 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26741 rtx set;
26742 rtx tmp;
26744 set = single_set (insn);
26745 if (!set)
26746 return;
26747 tmp = SET_DEST (set);
26748 if (REG_P (tmp))
26750 /* Add output dependency to the first function argument. */
26751 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26752 return;
26754 /* Add anti dependency. */
26755 add_dependence (first_arg, insn, REG_DEP_ANTI);
26758 /* Avoid cross block motion of function argument through adding dependency
26759 from the first non-jump instruction in bb. */
26760 static void
26761 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26763 rtx_insn *insn = BB_END (bb);
26765 while (insn)
26767 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26769 rtx set = single_set (insn);
26770 if (set)
26772 avoid_func_arg_motion (arg, insn);
26773 return;
26776 if (insn == BB_HEAD (bb))
26777 return;
26778 insn = PREV_INSN (insn);
26782 /* Hook for pre-reload schedule - avoid motion of function arguments
26783 passed in likely spilled HW registers. */
26784 static void
26785 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26787 rtx_insn *insn;
26788 rtx_insn *first_arg = NULL;
26789 if (reload_completed)
26790 return;
26791 while (head != tail && DEBUG_INSN_P (head))
26792 head = NEXT_INSN (head);
26793 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26794 if (INSN_P (insn) && CALL_P (insn))
26796 first_arg = add_parameter_dependencies (insn, head);
26797 if (first_arg)
26799 /* Add dependee for first argument to predecessors if only
26800 region contains more than one block. */
26801 basic_block bb = BLOCK_FOR_INSN (insn);
26802 int rgn = CONTAINING_RGN (bb->index);
26803 int nr_blks = RGN_NR_BLOCKS (rgn);
26804 /* Skip trivial regions and region head blocks that can have
26805 predecessors outside of region. */
26806 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26808 edge e;
26809 edge_iterator ei;
26811 /* Regions are SCCs with the exception of selective
26812 scheduling with pipelining of outer blocks enabled.
26813 So also check that immediate predecessors of a non-head
26814 block are in the same region. */
26815 FOR_EACH_EDGE (e, ei, bb->preds)
26817 /* Avoid creating of loop-carried dependencies through
26818 using topological ordering in the region. */
26819 if (rgn == CONTAINING_RGN (e->src->index)
26820 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26821 add_dependee_for_func_arg (first_arg, e->src);
26824 insn = first_arg;
26825 if (insn == head)
26826 break;
26829 else if (first_arg)
26830 avoid_func_arg_motion (first_arg, insn);
26833 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26834 HW registers to maximum, to schedule them at soon as possible. These are
26835 moves from function argument registers at the top of the function entry
26836 and moves from function return value registers after call. */
26837 static int
26838 ix86_adjust_priority (rtx_insn *insn, int priority)
26840 rtx set;
26842 if (reload_completed)
26843 return priority;
26845 if (!NONDEBUG_INSN_P (insn))
26846 return priority;
26848 set = single_set (insn);
26849 if (set)
26851 rtx tmp = SET_SRC (set);
26852 if (REG_P (tmp)
26853 && HARD_REGISTER_P (tmp)
26854 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26855 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26856 return current_sched_info->sched_max_insns_priority;
26859 return priority;
26862 /* Model decoder of Core 2/i7.
26863 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26864 track the instruction fetch block boundaries and make sure that long
26865 (9+ bytes) instructions are assigned to D0. */
26867 /* Maximum length of an insn that can be handled by
26868 a secondary decoder unit. '8' for Core 2/i7. */
26869 static int core2i7_secondary_decoder_max_insn_size;
26871 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26872 '16' for Core 2/i7. */
26873 static int core2i7_ifetch_block_size;
26875 /* Maximum number of instructions decoder can handle per cycle.
26876 '6' for Core 2/i7. */
26877 static int core2i7_ifetch_block_max_insns;
26879 typedef struct ix86_first_cycle_multipass_data_ *
26880 ix86_first_cycle_multipass_data_t;
26881 typedef const struct ix86_first_cycle_multipass_data_ *
26882 const_ix86_first_cycle_multipass_data_t;
26884 /* A variable to store target state across calls to max_issue within
26885 one cycle. */
26886 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26887 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26889 /* Initialize DATA. */
26890 static void
26891 core2i7_first_cycle_multipass_init (void *_data)
26893 ix86_first_cycle_multipass_data_t data
26894 = (ix86_first_cycle_multipass_data_t) _data;
26896 data->ifetch_block_len = 0;
26897 data->ifetch_block_n_insns = 0;
26898 data->ready_try_change = NULL;
26899 data->ready_try_change_size = 0;
26902 /* Advancing the cycle; reset ifetch block counts. */
26903 static void
26904 core2i7_dfa_post_advance_cycle (void)
26906 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26908 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26910 data->ifetch_block_len = 0;
26911 data->ifetch_block_n_insns = 0;
26914 static int min_insn_size (rtx_insn *);
26916 /* Filter out insns from ready_try that the core will not be able to issue
26917 on current cycle due to decoder. */
26918 static void
26919 core2i7_first_cycle_multipass_filter_ready_try
26920 (const_ix86_first_cycle_multipass_data_t data,
26921 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
26923 while (n_ready--)
26925 rtx_insn *insn;
26926 int insn_size;
26928 if (ready_try[n_ready])
26929 continue;
26931 insn = get_ready_element (n_ready);
26932 insn_size = min_insn_size (insn);
26934 if (/* If this is a too long an insn for a secondary decoder ... */
26935 (!first_cycle_insn_p
26936 && insn_size > core2i7_secondary_decoder_max_insn_size)
26937 /* ... or it would not fit into the ifetch block ... */
26938 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26939 /* ... or the decoder is full already ... */
26940 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26941 /* ... mask the insn out. */
26943 ready_try[n_ready] = 1;
26945 if (data->ready_try_change)
26946 bitmap_set_bit (data->ready_try_change, n_ready);
26951 /* Prepare for a new round of multipass lookahead scheduling. */
26952 static void
26953 core2i7_first_cycle_multipass_begin (void *_data,
26954 signed char *ready_try, int n_ready,
26955 bool first_cycle_insn_p)
26957 ix86_first_cycle_multipass_data_t data
26958 = (ix86_first_cycle_multipass_data_t) _data;
26959 const_ix86_first_cycle_multipass_data_t prev_data
26960 = ix86_first_cycle_multipass_data;
26962 /* Restore the state from the end of the previous round. */
26963 data->ifetch_block_len = prev_data->ifetch_block_len;
26964 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26966 /* Filter instructions that cannot be issued on current cycle due to
26967 decoder restrictions. */
26968 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26969 first_cycle_insn_p);
26972 /* INSN is being issued in current solution. Account for its impact on
26973 the decoder model. */
26974 static void
26975 core2i7_first_cycle_multipass_issue (void *_data,
26976 signed char *ready_try, int n_ready,
26977 rtx_insn *insn, const void *_prev_data)
26979 ix86_first_cycle_multipass_data_t data
26980 = (ix86_first_cycle_multipass_data_t) _data;
26981 const_ix86_first_cycle_multipass_data_t prev_data
26982 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26984 int insn_size = min_insn_size (insn);
26986 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26987 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26988 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26989 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26991 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26992 if (!data->ready_try_change)
26994 data->ready_try_change = sbitmap_alloc (n_ready);
26995 data->ready_try_change_size = n_ready;
26997 else if (data->ready_try_change_size < n_ready)
26999 data->ready_try_change = sbitmap_resize (data->ready_try_change,
27000 n_ready, 0);
27001 data->ready_try_change_size = n_ready;
27003 bitmap_clear (data->ready_try_change);
27005 /* Filter out insns from ready_try that the core will not be able to issue
27006 on current cycle due to decoder. */
27007 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27008 false);
27011 /* Revert the effect on ready_try. */
27012 static void
27013 core2i7_first_cycle_multipass_backtrack (const void *_data,
27014 signed char *ready_try,
27015 int n_ready ATTRIBUTE_UNUSED)
27017 const_ix86_first_cycle_multipass_data_t data
27018 = (const_ix86_first_cycle_multipass_data_t) _data;
27019 unsigned int i = 0;
27020 sbitmap_iterator sbi;
27022 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
27023 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
27025 ready_try[i] = 0;
27029 /* Save the result of multipass lookahead scheduling for the next round. */
27030 static void
27031 core2i7_first_cycle_multipass_end (const void *_data)
27033 const_ix86_first_cycle_multipass_data_t data
27034 = (const_ix86_first_cycle_multipass_data_t) _data;
27035 ix86_first_cycle_multipass_data_t next_data
27036 = ix86_first_cycle_multipass_data;
27038 if (data != NULL)
27040 next_data->ifetch_block_len = data->ifetch_block_len;
27041 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
27045 /* Deallocate target data. */
27046 static void
27047 core2i7_first_cycle_multipass_fini (void *_data)
27049 ix86_first_cycle_multipass_data_t data
27050 = (ix86_first_cycle_multipass_data_t) _data;
27052 if (data->ready_try_change)
27054 sbitmap_free (data->ready_try_change);
27055 data->ready_try_change = NULL;
27056 data->ready_try_change_size = 0;
27060 /* Prepare for scheduling pass. */
27061 static void
27062 ix86_sched_init_global (FILE *, int, int)
27064 /* Install scheduling hooks for current CPU. Some of these hooks are used
27065 in time-critical parts of the scheduler, so we only set them up when
27066 they are actually used. */
27067 switch (ix86_tune)
27069 case PROCESSOR_CORE2:
27070 case PROCESSOR_NEHALEM:
27071 case PROCESSOR_SANDYBRIDGE:
27072 case PROCESSOR_HASWELL:
27073 /* Do not perform multipass scheduling for pre-reload schedule
27074 to save compile time. */
27075 if (reload_completed)
27077 targetm.sched.dfa_post_advance_cycle
27078 = core2i7_dfa_post_advance_cycle;
27079 targetm.sched.first_cycle_multipass_init
27080 = core2i7_first_cycle_multipass_init;
27081 targetm.sched.first_cycle_multipass_begin
27082 = core2i7_first_cycle_multipass_begin;
27083 targetm.sched.first_cycle_multipass_issue
27084 = core2i7_first_cycle_multipass_issue;
27085 targetm.sched.first_cycle_multipass_backtrack
27086 = core2i7_first_cycle_multipass_backtrack;
27087 targetm.sched.first_cycle_multipass_end
27088 = core2i7_first_cycle_multipass_end;
27089 targetm.sched.first_cycle_multipass_fini
27090 = core2i7_first_cycle_multipass_fini;
27092 /* Set decoder parameters. */
27093 core2i7_secondary_decoder_max_insn_size = 8;
27094 core2i7_ifetch_block_size = 16;
27095 core2i7_ifetch_block_max_insns = 6;
27096 break;
27098 /* ... Fall through ... */
27099 default:
27100 targetm.sched.dfa_post_advance_cycle = NULL;
27101 targetm.sched.first_cycle_multipass_init = NULL;
27102 targetm.sched.first_cycle_multipass_begin = NULL;
27103 targetm.sched.first_cycle_multipass_issue = NULL;
27104 targetm.sched.first_cycle_multipass_backtrack = NULL;
27105 targetm.sched.first_cycle_multipass_end = NULL;
27106 targetm.sched.first_cycle_multipass_fini = NULL;
27107 break;
27112 /* Compute the alignment given to a constant that is being placed in memory.
27113 EXP is the constant and ALIGN is the alignment that the object would
27114 ordinarily have.
27115 The value of this function is used instead of that alignment to align
27116 the object. */
27119 ix86_constant_alignment (tree exp, int align)
27121 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
27122 || TREE_CODE (exp) == INTEGER_CST)
27124 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
27125 return 64;
27126 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
27127 return 128;
27129 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
27130 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
27131 return BITS_PER_WORD;
27133 return align;
27136 /* Compute the alignment for a static variable.
27137 TYPE is the data type, and ALIGN is the alignment that
27138 the object would ordinarily have. The value of this function is used
27139 instead of that alignment to align the object. */
27142 ix86_data_alignment (tree type, int align, bool opt)
27144 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
27145 for symbols from other compilation units or symbols that don't need
27146 to bind locally. In order to preserve some ABI compatibility with
27147 those compilers, ensure we don't decrease alignment from what we
27148 used to assume. */
27150 int max_align_compat
27151 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
27153 /* A data structure, equal or greater than the size of a cache line
27154 (64 bytes in the Pentium 4 and other recent Intel processors, including
27155 processors based on Intel Core microarchitecture) should be aligned
27156 so that its base address is a multiple of a cache line size. */
27158 int max_align
27159 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
27161 if (max_align < BITS_PER_WORD)
27162 max_align = BITS_PER_WORD;
27164 if (opt
27165 && AGGREGATE_TYPE_P (type)
27166 && TYPE_SIZE (type)
27167 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
27169 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
27170 && align < max_align_compat)
27171 align = max_align_compat;
27172 if (wi::geu_p (TYPE_SIZE (type), max_align)
27173 && align < max_align)
27174 align = max_align;
27177 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27178 to 16byte boundary. */
27179 if (TARGET_64BIT)
27181 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
27182 && TYPE_SIZE (type)
27183 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27184 && wi::geu_p (TYPE_SIZE (type), 128)
27185 && align < 128)
27186 return 128;
27189 if (!opt)
27190 return align;
27192 if (TREE_CODE (type) == ARRAY_TYPE)
27194 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27195 return 64;
27196 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27197 return 128;
27199 else if (TREE_CODE (type) == COMPLEX_TYPE)
27202 if (TYPE_MODE (type) == DCmode && align < 64)
27203 return 64;
27204 if ((TYPE_MODE (type) == XCmode
27205 || TYPE_MODE (type) == TCmode) && align < 128)
27206 return 128;
27208 else if ((TREE_CODE (type) == RECORD_TYPE
27209 || TREE_CODE (type) == UNION_TYPE
27210 || TREE_CODE (type) == QUAL_UNION_TYPE)
27211 && TYPE_FIELDS (type))
27213 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27214 return 64;
27215 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27216 return 128;
27218 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27219 || TREE_CODE (type) == INTEGER_TYPE)
27221 if (TYPE_MODE (type) == DFmode && align < 64)
27222 return 64;
27223 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27224 return 128;
27227 return align;
27230 /* Compute the alignment for a local variable or a stack slot. EXP is
27231 the data type or decl itself, MODE is the widest mode available and
27232 ALIGN is the alignment that the object would ordinarily have. The
27233 value of this macro is used instead of that alignment to align the
27234 object. */
27236 unsigned int
27237 ix86_local_alignment (tree exp, machine_mode mode,
27238 unsigned int align)
27240 tree type, decl;
27242 if (exp && DECL_P (exp))
27244 type = TREE_TYPE (exp);
27245 decl = exp;
27247 else
27249 type = exp;
27250 decl = NULL;
27253 /* Don't do dynamic stack realignment for long long objects with
27254 -mpreferred-stack-boundary=2. */
27255 if (!TARGET_64BIT
27256 && align == 64
27257 && ix86_preferred_stack_boundary < 64
27258 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
27259 && (!type || !TYPE_USER_ALIGN (type))
27260 && (!decl || !DECL_USER_ALIGN (decl)))
27261 align = 32;
27263 /* If TYPE is NULL, we are allocating a stack slot for caller-save
27264 register in MODE. We will return the largest alignment of XF
27265 and DF. */
27266 if (!type)
27268 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
27269 align = GET_MODE_ALIGNMENT (DFmode);
27270 return align;
27273 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27274 to 16byte boundary. Exact wording is:
27276 An array uses the same alignment as its elements, except that a local or
27277 global array variable of length at least 16 bytes or
27278 a C99 variable-length array variable always has alignment of at least 16 bytes.
27280 This was added to allow use of aligned SSE instructions at arrays. This
27281 rule is meant for static storage (where compiler can not do the analysis
27282 by itself). We follow it for automatic variables only when convenient.
27283 We fully control everything in the function compiled and functions from
27284 other unit can not rely on the alignment.
27286 Exclude va_list type. It is the common case of local array where
27287 we can not benefit from the alignment.
27289 TODO: Probably one should optimize for size only when var is not escaping. */
27290 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
27291 && TARGET_SSE)
27293 if (AGGREGATE_TYPE_P (type)
27294 && (va_list_type_node == NULL_TREE
27295 || (TYPE_MAIN_VARIANT (type)
27296 != TYPE_MAIN_VARIANT (va_list_type_node)))
27297 && TYPE_SIZE (type)
27298 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27299 && wi::geu_p (TYPE_SIZE (type), 16)
27300 && align < 128)
27301 return 128;
27303 if (TREE_CODE (type) == ARRAY_TYPE)
27305 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27306 return 64;
27307 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27308 return 128;
27310 else if (TREE_CODE (type) == COMPLEX_TYPE)
27312 if (TYPE_MODE (type) == DCmode && align < 64)
27313 return 64;
27314 if ((TYPE_MODE (type) == XCmode
27315 || TYPE_MODE (type) == TCmode) && align < 128)
27316 return 128;
27318 else if ((TREE_CODE (type) == RECORD_TYPE
27319 || TREE_CODE (type) == UNION_TYPE
27320 || TREE_CODE (type) == QUAL_UNION_TYPE)
27321 && TYPE_FIELDS (type))
27323 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27324 return 64;
27325 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27326 return 128;
27328 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27329 || TREE_CODE (type) == INTEGER_TYPE)
27332 if (TYPE_MODE (type) == DFmode && align < 64)
27333 return 64;
27334 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27335 return 128;
27337 return align;
27340 /* Compute the minimum required alignment for dynamic stack realignment
27341 purposes for a local variable, parameter or a stack slot. EXP is
27342 the data type or decl itself, MODE is its mode and ALIGN is the
27343 alignment that the object would ordinarily have. */
27345 unsigned int
27346 ix86_minimum_alignment (tree exp, machine_mode mode,
27347 unsigned int align)
27349 tree type, decl;
27351 if (exp && DECL_P (exp))
27353 type = TREE_TYPE (exp);
27354 decl = exp;
27356 else
27358 type = exp;
27359 decl = NULL;
27362 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27363 return align;
27365 /* Don't do dynamic stack realignment for long long objects with
27366 -mpreferred-stack-boundary=2. */
27367 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27368 && (!type || !TYPE_USER_ALIGN (type))
27369 && (!decl || !DECL_USER_ALIGN (decl)))
27370 return 32;
27372 return align;
27375 /* Find a location for the static chain incoming to a nested function.
27376 This is a register, unless all free registers are used by arguments. */
27378 static rtx
27379 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
27381 unsigned regno;
27383 /* While this function won't be called by the middle-end when a static
27384 chain isn't needed, it's also used throughout the backend so it's
27385 easiest to keep this check centralized. */
27386 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
27387 return NULL;
27389 if (TARGET_64BIT)
27391 /* We always use R10 in 64-bit mode. */
27392 regno = R10_REG;
27394 else
27396 const_tree fntype, fndecl;
27397 unsigned int ccvt;
27399 /* By default in 32-bit mode we use ECX to pass the static chain. */
27400 regno = CX_REG;
27402 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
27404 fntype = TREE_TYPE (fndecl_or_type);
27405 fndecl = fndecl_or_type;
27407 else
27409 fntype = fndecl_or_type;
27410 fndecl = NULL;
27413 ccvt = ix86_get_callcvt (fntype);
27414 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27416 /* Fastcall functions use ecx/edx for arguments, which leaves
27417 us with EAX for the static chain.
27418 Thiscall functions use ecx for arguments, which also
27419 leaves us with EAX for the static chain. */
27420 regno = AX_REG;
27422 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27424 /* Thiscall functions use ecx for arguments, which leaves
27425 us with EAX and EDX for the static chain.
27426 We are using for abi-compatibility EAX. */
27427 regno = AX_REG;
27429 else if (ix86_function_regparm (fntype, fndecl) == 3)
27431 /* For regparm 3, we have no free call-clobbered registers in
27432 which to store the static chain. In order to implement this,
27433 we have the trampoline push the static chain to the stack.
27434 However, we can't push a value below the return address when
27435 we call the nested function directly, so we have to use an
27436 alternate entry point. For this we use ESI, and have the
27437 alternate entry point push ESI, so that things appear the
27438 same once we're executing the nested function. */
27439 if (incoming_p)
27441 if (fndecl == current_function_decl)
27442 ix86_static_chain_on_stack = true;
27443 return gen_frame_mem (SImode,
27444 plus_constant (Pmode,
27445 arg_pointer_rtx, -8));
27447 regno = SI_REG;
27451 return gen_rtx_REG (Pmode, regno);
27454 /* Emit RTL insns to initialize the variable parts of a trampoline.
27455 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27456 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27457 to be passed to the target function. */
27459 static void
27460 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27462 rtx mem, fnaddr;
27463 int opcode;
27464 int offset = 0;
27466 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27468 if (TARGET_64BIT)
27470 int size;
27472 /* Load the function address to r11. Try to load address using
27473 the shorter movl instead of movabs. We may want to support
27474 movq for kernel mode, but kernel does not use trampolines at
27475 the moment. FNADDR is a 32bit address and may not be in
27476 DImode when ptr_mode == SImode. Always use movl in this
27477 case. */
27478 if (ptr_mode == SImode
27479 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27481 fnaddr = copy_addr_to_reg (fnaddr);
27483 mem = adjust_address (m_tramp, HImode, offset);
27484 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27486 mem = adjust_address (m_tramp, SImode, offset + 2);
27487 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27488 offset += 6;
27490 else
27492 mem = adjust_address (m_tramp, HImode, offset);
27493 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27495 mem = adjust_address (m_tramp, DImode, offset + 2);
27496 emit_move_insn (mem, fnaddr);
27497 offset += 10;
27500 /* Load static chain using movabs to r10. Use the shorter movl
27501 instead of movabs when ptr_mode == SImode. */
27502 if (ptr_mode == SImode)
27504 opcode = 0xba41;
27505 size = 6;
27507 else
27509 opcode = 0xba49;
27510 size = 10;
27513 mem = adjust_address (m_tramp, HImode, offset);
27514 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27516 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27517 emit_move_insn (mem, chain_value);
27518 offset += size;
27520 /* Jump to r11; the last (unused) byte is a nop, only there to
27521 pad the write out to a single 32-bit store. */
27522 mem = adjust_address (m_tramp, SImode, offset);
27523 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27524 offset += 4;
27526 else
27528 rtx disp, chain;
27530 /* Depending on the static chain location, either load a register
27531 with a constant, or push the constant to the stack. All of the
27532 instructions are the same size. */
27533 chain = ix86_static_chain (fndecl, true);
27534 if (REG_P (chain))
27536 switch (REGNO (chain))
27538 case AX_REG:
27539 opcode = 0xb8; break;
27540 case CX_REG:
27541 opcode = 0xb9; break;
27542 default:
27543 gcc_unreachable ();
27546 else
27547 opcode = 0x68;
27549 mem = adjust_address (m_tramp, QImode, offset);
27550 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27552 mem = adjust_address (m_tramp, SImode, offset + 1);
27553 emit_move_insn (mem, chain_value);
27554 offset += 5;
27556 mem = adjust_address (m_tramp, QImode, offset);
27557 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27559 mem = adjust_address (m_tramp, SImode, offset + 1);
27561 /* Compute offset from the end of the jmp to the target function.
27562 In the case in which the trampoline stores the static chain on
27563 the stack, we need to skip the first insn which pushes the
27564 (call-saved) register static chain; this push is 1 byte. */
27565 offset += 5;
27566 disp = expand_binop (SImode, sub_optab, fnaddr,
27567 plus_constant (Pmode, XEXP (m_tramp, 0),
27568 offset - (MEM_P (chain) ? 1 : 0)),
27569 NULL_RTX, 1, OPTAB_DIRECT);
27570 emit_move_insn (mem, disp);
27573 gcc_assert (offset <= TRAMPOLINE_SIZE);
27575 #ifdef HAVE_ENABLE_EXECUTE_STACK
27576 #ifdef CHECK_EXECUTE_STACK_ENABLED
27577 if (CHECK_EXECUTE_STACK_ENABLED)
27578 #endif
27579 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27580 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27581 #endif
27584 /* The following file contains several enumerations and data structures
27585 built from the definitions in i386-builtin-types.def. */
27587 #include "i386-builtin-types.inc"
27589 /* Table for the ix86 builtin non-function types. */
27590 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27592 /* Retrieve an element from the above table, building some of
27593 the types lazily. */
27595 static tree
27596 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27598 unsigned int index;
27599 tree type, itype;
27601 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27603 type = ix86_builtin_type_tab[(int) tcode];
27604 if (type != NULL)
27605 return type;
27607 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27608 if (tcode <= IX86_BT_LAST_VECT)
27610 machine_mode mode;
27612 index = tcode - IX86_BT_LAST_PRIM - 1;
27613 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27614 mode = ix86_builtin_type_vect_mode[index];
27616 type = build_vector_type_for_mode (itype, mode);
27618 else
27620 int quals;
27622 index = tcode - IX86_BT_LAST_VECT - 1;
27623 if (tcode <= IX86_BT_LAST_PTR)
27624 quals = TYPE_UNQUALIFIED;
27625 else
27626 quals = TYPE_QUAL_CONST;
27628 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27629 if (quals != TYPE_UNQUALIFIED)
27630 itype = build_qualified_type (itype, quals);
27632 type = build_pointer_type (itype);
27635 ix86_builtin_type_tab[(int) tcode] = type;
27636 return type;
27639 /* Table for the ix86 builtin function types. */
27640 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27642 /* Retrieve an element from the above table, building some of
27643 the types lazily. */
27645 static tree
27646 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27648 tree type;
27650 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27652 type = ix86_builtin_func_type_tab[(int) tcode];
27653 if (type != NULL)
27654 return type;
27656 if (tcode <= IX86_BT_LAST_FUNC)
27658 unsigned start = ix86_builtin_func_start[(int) tcode];
27659 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27660 tree rtype, atype, args = void_list_node;
27661 unsigned i;
27663 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27664 for (i = after - 1; i > start; --i)
27666 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27667 args = tree_cons (NULL, atype, args);
27670 type = build_function_type (rtype, args);
27672 else
27674 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27675 enum ix86_builtin_func_type icode;
27677 icode = ix86_builtin_func_alias_base[index];
27678 type = ix86_get_builtin_func_type (icode);
27681 ix86_builtin_func_type_tab[(int) tcode] = type;
27682 return type;
27686 /* Codes for all the SSE/MMX builtins. */
27687 enum ix86_builtins
27689 IX86_BUILTIN_ADDPS,
27690 IX86_BUILTIN_ADDSS,
27691 IX86_BUILTIN_DIVPS,
27692 IX86_BUILTIN_DIVSS,
27693 IX86_BUILTIN_MULPS,
27694 IX86_BUILTIN_MULSS,
27695 IX86_BUILTIN_SUBPS,
27696 IX86_BUILTIN_SUBSS,
27698 IX86_BUILTIN_CMPEQPS,
27699 IX86_BUILTIN_CMPLTPS,
27700 IX86_BUILTIN_CMPLEPS,
27701 IX86_BUILTIN_CMPGTPS,
27702 IX86_BUILTIN_CMPGEPS,
27703 IX86_BUILTIN_CMPNEQPS,
27704 IX86_BUILTIN_CMPNLTPS,
27705 IX86_BUILTIN_CMPNLEPS,
27706 IX86_BUILTIN_CMPNGTPS,
27707 IX86_BUILTIN_CMPNGEPS,
27708 IX86_BUILTIN_CMPORDPS,
27709 IX86_BUILTIN_CMPUNORDPS,
27710 IX86_BUILTIN_CMPEQSS,
27711 IX86_BUILTIN_CMPLTSS,
27712 IX86_BUILTIN_CMPLESS,
27713 IX86_BUILTIN_CMPNEQSS,
27714 IX86_BUILTIN_CMPNLTSS,
27715 IX86_BUILTIN_CMPNLESS,
27716 IX86_BUILTIN_CMPORDSS,
27717 IX86_BUILTIN_CMPUNORDSS,
27719 IX86_BUILTIN_COMIEQSS,
27720 IX86_BUILTIN_COMILTSS,
27721 IX86_BUILTIN_COMILESS,
27722 IX86_BUILTIN_COMIGTSS,
27723 IX86_BUILTIN_COMIGESS,
27724 IX86_BUILTIN_COMINEQSS,
27725 IX86_BUILTIN_UCOMIEQSS,
27726 IX86_BUILTIN_UCOMILTSS,
27727 IX86_BUILTIN_UCOMILESS,
27728 IX86_BUILTIN_UCOMIGTSS,
27729 IX86_BUILTIN_UCOMIGESS,
27730 IX86_BUILTIN_UCOMINEQSS,
27732 IX86_BUILTIN_CVTPI2PS,
27733 IX86_BUILTIN_CVTPS2PI,
27734 IX86_BUILTIN_CVTSI2SS,
27735 IX86_BUILTIN_CVTSI642SS,
27736 IX86_BUILTIN_CVTSS2SI,
27737 IX86_BUILTIN_CVTSS2SI64,
27738 IX86_BUILTIN_CVTTPS2PI,
27739 IX86_BUILTIN_CVTTSS2SI,
27740 IX86_BUILTIN_CVTTSS2SI64,
27742 IX86_BUILTIN_MAXPS,
27743 IX86_BUILTIN_MAXSS,
27744 IX86_BUILTIN_MINPS,
27745 IX86_BUILTIN_MINSS,
27747 IX86_BUILTIN_LOADUPS,
27748 IX86_BUILTIN_STOREUPS,
27749 IX86_BUILTIN_MOVSS,
27751 IX86_BUILTIN_MOVHLPS,
27752 IX86_BUILTIN_MOVLHPS,
27753 IX86_BUILTIN_LOADHPS,
27754 IX86_BUILTIN_LOADLPS,
27755 IX86_BUILTIN_STOREHPS,
27756 IX86_BUILTIN_STORELPS,
27758 IX86_BUILTIN_MASKMOVQ,
27759 IX86_BUILTIN_MOVMSKPS,
27760 IX86_BUILTIN_PMOVMSKB,
27762 IX86_BUILTIN_MOVNTPS,
27763 IX86_BUILTIN_MOVNTQ,
27765 IX86_BUILTIN_LOADDQU,
27766 IX86_BUILTIN_STOREDQU,
27768 IX86_BUILTIN_PACKSSWB,
27769 IX86_BUILTIN_PACKSSDW,
27770 IX86_BUILTIN_PACKUSWB,
27772 IX86_BUILTIN_PADDB,
27773 IX86_BUILTIN_PADDW,
27774 IX86_BUILTIN_PADDD,
27775 IX86_BUILTIN_PADDQ,
27776 IX86_BUILTIN_PADDSB,
27777 IX86_BUILTIN_PADDSW,
27778 IX86_BUILTIN_PADDUSB,
27779 IX86_BUILTIN_PADDUSW,
27780 IX86_BUILTIN_PSUBB,
27781 IX86_BUILTIN_PSUBW,
27782 IX86_BUILTIN_PSUBD,
27783 IX86_BUILTIN_PSUBQ,
27784 IX86_BUILTIN_PSUBSB,
27785 IX86_BUILTIN_PSUBSW,
27786 IX86_BUILTIN_PSUBUSB,
27787 IX86_BUILTIN_PSUBUSW,
27789 IX86_BUILTIN_PAND,
27790 IX86_BUILTIN_PANDN,
27791 IX86_BUILTIN_POR,
27792 IX86_BUILTIN_PXOR,
27794 IX86_BUILTIN_PAVGB,
27795 IX86_BUILTIN_PAVGW,
27797 IX86_BUILTIN_PCMPEQB,
27798 IX86_BUILTIN_PCMPEQW,
27799 IX86_BUILTIN_PCMPEQD,
27800 IX86_BUILTIN_PCMPGTB,
27801 IX86_BUILTIN_PCMPGTW,
27802 IX86_BUILTIN_PCMPGTD,
27804 IX86_BUILTIN_PMADDWD,
27806 IX86_BUILTIN_PMAXSW,
27807 IX86_BUILTIN_PMAXUB,
27808 IX86_BUILTIN_PMINSW,
27809 IX86_BUILTIN_PMINUB,
27811 IX86_BUILTIN_PMULHUW,
27812 IX86_BUILTIN_PMULHW,
27813 IX86_BUILTIN_PMULLW,
27815 IX86_BUILTIN_PSADBW,
27816 IX86_BUILTIN_PSHUFW,
27818 IX86_BUILTIN_PSLLW,
27819 IX86_BUILTIN_PSLLD,
27820 IX86_BUILTIN_PSLLQ,
27821 IX86_BUILTIN_PSRAW,
27822 IX86_BUILTIN_PSRAD,
27823 IX86_BUILTIN_PSRLW,
27824 IX86_BUILTIN_PSRLD,
27825 IX86_BUILTIN_PSRLQ,
27826 IX86_BUILTIN_PSLLWI,
27827 IX86_BUILTIN_PSLLDI,
27828 IX86_BUILTIN_PSLLQI,
27829 IX86_BUILTIN_PSRAWI,
27830 IX86_BUILTIN_PSRADI,
27831 IX86_BUILTIN_PSRLWI,
27832 IX86_BUILTIN_PSRLDI,
27833 IX86_BUILTIN_PSRLQI,
27835 IX86_BUILTIN_PUNPCKHBW,
27836 IX86_BUILTIN_PUNPCKHWD,
27837 IX86_BUILTIN_PUNPCKHDQ,
27838 IX86_BUILTIN_PUNPCKLBW,
27839 IX86_BUILTIN_PUNPCKLWD,
27840 IX86_BUILTIN_PUNPCKLDQ,
27842 IX86_BUILTIN_SHUFPS,
27844 IX86_BUILTIN_RCPPS,
27845 IX86_BUILTIN_RCPSS,
27846 IX86_BUILTIN_RSQRTPS,
27847 IX86_BUILTIN_RSQRTPS_NR,
27848 IX86_BUILTIN_RSQRTSS,
27849 IX86_BUILTIN_RSQRTF,
27850 IX86_BUILTIN_SQRTPS,
27851 IX86_BUILTIN_SQRTPS_NR,
27852 IX86_BUILTIN_SQRTSS,
27854 IX86_BUILTIN_UNPCKHPS,
27855 IX86_BUILTIN_UNPCKLPS,
27857 IX86_BUILTIN_ANDPS,
27858 IX86_BUILTIN_ANDNPS,
27859 IX86_BUILTIN_ORPS,
27860 IX86_BUILTIN_XORPS,
27862 IX86_BUILTIN_EMMS,
27863 IX86_BUILTIN_LDMXCSR,
27864 IX86_BUILTIN_STMXCSR,
27865 IX86_BUILTIN_SFENCE,
27867 IX86_BUILTIN_FXSAVE,
27868 IX86_BUILTIN_FXRSTOR,
27869 IX86_BUILTIN_FXSAVE64,
27870 IX86_BUILTIN_FXRSTOR64,
27872 IX86_BUILTIN_XSAVE,
27873 IX86_BUILTIN_XRSTOR,
27874 IX86_BUILTIN_XSAVE64,
27875 IX86_BUILTIN_XRSTOR64,
27877 IX86_BUILTIN_XSAVEOPT,
27878 IX86_BUILTIN_XSAVEOPT64,
27880 IX86_BUILTIN_XSAVEC,
27881 IX86_BUILTIN_XSAVEC64,
27883 IX86_BUILTIN_XSAVES,
27884 IX86_BUILTIN_XRSTORS,
27885 IX86_BUILTIN_XSAVES64,
27886 IX86_BUILTIN_XRSTORS64,
27888 /* 3DNow! Original */
27889 IX86_BUILTIN_FEMMS,
27890 IX86_BUILTIN_PAVGUSB,
27891 IX86_BUILTIN_PF2ID,
27892 IX86_BUILTIN_PFACC,
27893 IX86_BUILTIN_PFADD,
27894 IX86_BUILTIN_PFCMPEQ,
27895 IX86_BUILTIN_PFCMPGE,
27896 IX86_BUILTIN_PFCMPGT,
27897 IX86_BUILTIN_PFMAX,
27898 IX86_BUILTIN_PFMIN,
27899 IX86_BUILTIN_PFMUL,
27900 IX86_BUILTIN_PFRCP,
27901 IX86_BUILTIN_PFRCPIT1,
27902 IX86_BUILTIN_PFRCPIT2,
27903 IX86_BUILTIN_PFRSQIT1,
27904 IX86_BUILTIN_PFRSQRT,
27905 IX86_BUILTIN_PFSUB,
27906 IX86_BUILTIN_PFSUBR,
27907 IX86_BUILTIN_PI2FD,
27908 IX86_BUILTIN_PMULHRW,
27910 /* 3DNow! Athlon Extensions */
27911 IX86_BUILTIN_PF2IW,
27912 IX86_BUILTIN_PFNACC,
27913 IX86_BUILTIN_PFPNACC,
27914 IX86_BUILTIN_PI2FW,
27915 IX86_BUILTIN_PSWAPDSI,
27916 IX86_BUILTIN_PSWAPDSF,
27918 /* SSE2 */
27919 IX86_BUILTIN_ADDPD,
27920 IX86_BUILTIN_ADDSD,
27921 IX86_BUILTIN_DIVPD,
27922 IX86_BUILTIN_DIVSD,
27923 IX86_BUILTIN_MULPD,
27924 IX86_BUILTIN_MULSD,
27925 IX86_BUILTIN_SUBPD,
27926 IX86_BUILTIN_SUBSD,
27928 IX86_BUILTIN_CMPEQPD,
27929 IX86_BUILTIN_CMPLTPD,
27930 IX86_BUILTIN_CMPLEPD,
27931 IX86_BUILTIN_CMPGTPD,
27932 IX86_BUILTIN_CMPGEPD,
27933 IX86_BUILTIN_CMPNEQPD,
27934 IX86_BUILTIN_CMPNLTPD,
27935 IX86_BUILTIN_CMPNLEPD,
27936 IX86_BUILTIN_CMPNGTPD,
27937 IX86_BUILTIN_CMPNGEPD,
27938 IX86_BUILTIN_CMPORDPD,
27939 IX86_BUILTIN_CMPUNORDPD,
27940 IX86_BUILTIN_CMPEQSD,
27941 IX86_BUILTIN_CMPLTSD,
27942 IX86_BUILTIN_CMPLESD,
27943 IX86_BUILTIN_CMPNEQSD,
27944 IX86_BUILTIN_CMPNLTSD,
27945 IX86_BUILTIN_CMPNLESD,
27946 IX86_BUILTIN_CMPORDSD,
27947 IX86_BUILTIN_CMPUNORDSD,
27949 IX86_BUILTIN_COMIEQSD,
27950 IX86_BUILTIN_COMILTSD,
27951 IX86_BUILTIN_COMILESD,
27952 IX86_BUILTIN_COMIGTSD,
27953 IX86_BUILTIN_COMIGESD,
27954 IX86_BUILTIN_COMINEQSD,
27955 IX86_BUILTIN_UCOMIEQSD,
27956 IX86_BUILTIN_UCOMILTSD,
27957 IX86_BUILTIN_UCOMILESD,
27958 IX86_BUILTIN_UCOMIGTSD,
27959 IX86_BUILTIN_UCOMIGESD,
27960 IX86_BUILTIN_UCOMINEQSD,
27962 IX86_BUILTIN_MAXPD,
27963 IX86_BUILTIN_MAXSD,
27964 IX86_BUILTIN_MINPD,
27965 IX86_BUILTIN_MINSD,
27967 IX86_BUILTIN_ANDPD,
27968 IX86_BUILTIN_ANDNPD,
27969 IX86_BUILTIN_ORPD,
27970 IX86_BUILTIN_XORPD,
27972 IX86_BUILTIN_SQRTPD,
27973 IX86_BUILTIN_SQRTSD,
27975 IX86_BUILTIN_UNPCKHPD,
27976 IX86_BUILTIN_UNPCKLPD,
27978 IX86_BUILTIN_SHUFPD,
27980 IX86_BUILTIN_LOADUPD,
27981 IX86_BUILTIN_STOREUPD,
27982 IX86_BUILTIN_MOVSD,
27984 IX86_BUILTIN_LOADHPD,
27985 IX86_BUILTIN_LOADLPD,
27987 IX86_BUILTIN_CVTDQ2PD,
27988 IX86_BUILTIN_CVTDQ2PS,
27990 IX86_BUILTIN_CVTPD2DQ,
27991 IX86_BUILTIN_CVTPD2PI,
27992 IX86_BUILTIN_CVTPD2PS,
27993 IX86_BUILTIN_CVTTPD2DQ,
27994 IX86_BUILTIN_CVTTPD2PI,
27996 IX86_BUILTIN_CVTPI2PD,
27997 IX86_BUILTIN_CVTSI2SD,
27998 IX86_BUILTIN_CVTSI642SD,
28000 IX86_BUILTIN_CVTSD2SI,
28001 IX86_BUILTIN_CVTSD2SI64,
28002 IX86_BUILTIN_CVTSD2SS,
28003 IX86_BUILTIN_CVTSS2SD,
28004 IX86_BUILTIN_CVTTSD2SI,
28005 IX86_BUILTIN_CVTTSD2SI64,
28007 IX86_BUILTIN_CVTPS2DQ,
28008 IX86_BUILTIN_CVTPS2PD,
28009 IX86_BUILTIN_CVTTPS2DQ,
28011 IX86_BUILTIN_MOVNTI,
28012 IX86_BUILTIN_MOVNTI64,
28013 IX86_BUILTIN_MOVNTPD,
28014 IX86_BUILTIN_MOVNTDQ,
28016 IX86_BUILTIN_MOVQ128,
28018 /* SSE2 MMX */
28019 IX86_BUILTIN_MASKMOVDQU,
28020 IX86_BUILTIN_MOVMSKPD,
28021 IX86_BUILTIN_PMOVMSKB128,
28023 IX86_BUILTIN_PACKSSWB128,
28024 IX86_BUILTIN_PACKSSDW128,
28025 IX86_BUILTIN_PACKUSWB128,
28027 IX86_BUILTIN_PADDB128,
28028 IX86_BUILTIN_PADDW128,
28029 IX86_BUILTIN_PADDD128,
28030 IX86_BUILTIN_PADDQ128,
28031 IX86_BUILTIN_PADDSB128,
28032 IX86_BUILTIN_PADDSW128,
28033 IX86_BUILTIN_PADDUSB128,
28034 IX86_BUILTIN_PADDUSW128,
28035 IX86_BUILTIN_PSUBB128,
28036 IX86_BUILTIN_PSUBW128,
28037 IX86_BUILTIN_PSUBD128,
28038 IX86_BUILTIN_PSUBQ128,
28039 IX86_BUILTIN_PSUBSB128,
28040 IX86_BUILTIN_PSUBSW128,
28041 IX86_BUILTIN_PSUBUSB128,
28042 IX86_BUILTIN_PSUBUSW128,
28044 IX86_BUILTIN_PAND128,
28045 IX86_BUILTIN_PANDN128,
28046 IX86_BUILTIN_POR128,
28047 IX86_BUILTIN_PXOR128,
28049 IX86_BUILTIN_PAVGB128,
28050 IX86_BUILTIN_PAVGW128,
28052 IX86_BUILTIN_PCMPEQB128,
28053 IX86_BUILTIN_PCMPEQW128,
28054 IX86_BUILTIN_PCMPEQD128,
28055 IX86_BUILTIN_PCMPGTB128,
28056 IX86_BUILTIN_PCMPGTW128,
28057 IX86_BUILTIN_PCMPGTD128,
28059 IX86_BUILTIN_PMADDWD128,
28061 IX86_BUILTIN_PMAXSW128,
28062 IX86_BUILTIN_PMAXUB128,
28063 IX86_BUILTIN_PMINSW128,
28064 IX86_BUILTIN_PMINUB128,
28066 IX86_BUILTIN_PMULUDQ,
28067 IX86_BUILTIN_PMULUDQ128,
28068 IX86_BUILTIN_PMULHUW128,
28069 IX86_BUILTIN_PMULHW128,
28070 IX86_BUILTIN_PMULLW128,
28072 IX86_BUILTIN_PSADBW128,
28073 IX86_BUILTIN_PSHUFHW,
28074 IX86_BUILTIN_PSHUFLW,
28075 IX86_BUILTIN_PSHUFD,
28077 IX86_BUILTIN_PSLLDQI128,
28078 IX86_BUILTIN_PSLLWI128,
28079 IX86_BUILTIN_PSLLDI128,
28080 IX86_BUILTIN_PSLLQI128,
28081 IX86_BUILTIN_PSRAWI128,
28082 IX86_BUILTIN_PSRADI128,
28083 IX86_BUILTIN_PSRLDQI128,
28084 IX86_BUILTIN_PSRLWI128,
28085 IX86_BUILTIN_PSRLDI128,
28086 IX86_BUILTIN_PSRLQI128,
28088 IX86_BUILTIN_PSLLDQ128,
28089 IX86_BUILTIN_PSLLW128,
28090 IX86_BUILTIN_PSLLD128,
28091 IX86_BUILTIN_PSLLQ128,
28092 IX86_BUILTIN_PSRAW128,
28093 IX86_BUILTIN_PSRAD128,
28094 IX86_BUILTIN_PSRLW128,
28095 IX86_BUILTIN_PSRLD128,
28096 IX86_BUILTIN_PSRLQ128,
28098 IX86_BUILTIN_PUNPCKHBW128,
28099 IX86_BUILTIN_PUNPCKHWD128,
28100 IX86_BUILTIN_PUNPCKHDQ128,
28101 IX86_BUILTIN_PUNPCKHQDQ128,
28102 IX86_BUILTIN_PUNPCKLBW128,
28103 IX86_BUILTIN_PUNPCKLWD128,
28104 IX86_BUILTIN_PUNPCKLDQ128,
28105 IX86_BUILTIN_PUNPCKLQDQ128,
28107 IX86_BUILTIN_CLFLUSH,
28108 IX86_BUILTIN_MFENCE,
28109 IX86_BUILTIN_LFENCE,
28110 IX86_BUILTIN_PAUSE,
28112 IX86_BUILTIN_FNSTENV,
28113 IX86_BUILTIN_FLDENV,
28114 IX86_BUILTIN_FNSTSW,
28115 IX86_BUILTIN_FNCLEX,
28117 IX86_BUILTIN_BSRSI,
28118 IX86_BUILTIN_BSRDI,
28119 IX86_BUILTIN_RDPMC,
28120 IX86_BUILTIN_RDTSC,
28121 IX86_BUILTIN_RDTSCP,
28122 IX86_BUILTIN_ROLQI,
28123 IX86_BUILTIN_ROLHI,
28124 IX86_BUILTIN_RORQI,
28125 IX86_BUILTIN_RORHI,
28127 /* SSE3. */
28128 IX86_BUILTIN_ADDSUBPS,
28129 IX86_BUILTIN_HADDPS,
28130 IX86_BUILTIN_HSUBPS,
28131 IX86_BUILTIN_MOVSHDUP,
28132 IX86_BUILTIN_MOVSLDUP,
28133 IX86_BUILTIN_ADDSUBPD,
28134 IX86_BUILTIN_HADDPD,
28135 IX86_BUILTIN_HSUBPD,
28136 IX86_BUILTIN_LDDQU,
28138 IX86_BUILTIN_MONITOR,
28139 IX86_BUILTIN_MWAIT,
28141 /* SSSE3. */
28142 IX86_BUILTIN_PHADDW,
28143 IX86_BUILTIN_PHADDD,
28144 IX86_BUILTIN_PHADDSW,
28145 IX86_BUILTIN_PHSUBW,
28146 IX86_BUILTIN_PHSUBD,
28147 IX86_BUILTIN_PHSUBSW,
28148 IX86_BUILTIN_PMADDUBSW,
28149 IX86_BUILTIN_PMULHRSW,
28150 IX86_BUILTIN_PSHUFB,
28151 IX86_BUILTIN_PSIGNB,
28152 IX86_BUILTIN_PSIGNW,
28153 IX86_BUILTIN_PSIGND,
28154 IX86_BUILTIN_PALIGNR,
28155 IX86_BUILTIN_PABSB,
28156 IX86_BUILTIN_PABSW,
28157 IX86_BUILTIN_PABSD,
28159 IX86_BUILTIN_PHADDW128,
28160 IX86_BUILTIN_PHADDD128,
28161 IX86_BUILTIN_PHADDSW128,
28162 IX86_BUILTIN_PHSUBW128,
28163 IX86_BUILTIN_PHSUBD128,
28164 IX86_BUILTIN_PHSUBSW128,
28165 IX86_BUILTIN_PMADDUBSW128,
28166 IX86_BUILTIN_PMULHRSW128,
28167 IX86_BUILTIN_PSHUFB128,
28168 IX86_BUILTIN_PSIGNB128,
28169 IX86_BUILTIN_PSIGNW128,
28170 IX86_BUILTIN_PSIGND128,
28171 IX86_BUILTIN_PALIGNR128,
28172 IX86_BUILTIN_PABSB128,
28173 IX86_BUILTIN_PABSW128,
28174 IX86_BUILTIN_PABSD128,
28176 /* AMDFAM10 - SSE4A New Instructions. */
28177 IX86_BUILTIN_MOVNTSD,
28178 IX86_BUILTIN_MOVNTSS,
28179 IX86_BUILTIN_EXTRQI,
28180 IX86_BUILTIN_EXTRQ,
28181 IX86_BUILTIN_INSERTQI,
28182 IX86_BUILTIN_INSERTQ,
28184 /* SSE4.1. */
28185 IX86_BUILTIN_BLENDPD,
28186 IX86_BUILTIN_BLENDPS,
28187 IX86_BUILTIN_BLENDVPD,
28188 IX86_BUILTIN_BLENDVPS,
28189 IX86_BUILTIN_PBLENDVB128,
28190 IX86_BUILTIN_PBLENDW128,
28192 IX86_BUILTIN_DPPD,
28193 IX86_BUILTIN_DPPS,
28195 IX86_BUILTIN_INSERTPS128,
28197 IX86_BUILTIN_MOVNTDQA,
28198 IX86_BUILTIN_MPSADBW128,
28199 IX86_BUILTIN_PACKUSDW128,
28200 IX86_BUILTIN_PCMPEQQ,
28201 IX86_BUILTIN_PHMINPOSUW128,
28203 IX86_BUILTIN_PMAXSB128,
28204 IX86_BUILTIN_PMAXSD128,
28205 IX86_BUILTIN_PMAXUD128,
28206 IX86_BUILTIN_PMAXUW128,
28208 IX86_BUILTIN_PMINSB128,
28209 IX86_BUILTIN_PMINSD128,
28210 IX86_BUILTIN_PMINUD128,
28211 IX86_BUILTIN_PMINUW128,
28213 IX86_BUILTIN_PMOVSXBW128,
28214 IX86_BUILTIN_PMOVSXBD128,
28215 IX86_BUILTIN_PMOVSXBQ128,
28216 IX86_BUILTIN_PMOVSXWD128,
28217 IX86_BUILTIN_PMOVSXWQ128,
28218 IX86_BUILTIN_PMOVSXDQ128,
28220 IX86_BUILTIN_PMOVZXBW128,
28221 IX86_BUILTIN_PMOVZXBD128,
28222 IX86_BUILTIN_PMOVZXBQ128,
28223 IX86_BUILTIN_PMOVZXWD128,
28224 IX86_BUILTIN_PMOVZXWQ128,
28225 IX86_BUILTIN_PMOVZXDQ128,
28227 IX86_BUILTIN_PMULDQ128,
28228 IX86_BUILTIN_PMULLD128,
28230 IX86_BUILTIN_ROUNDSD,
28231 IX86_BUILTIN_ROUNDSS,
28233 IX86_BUILTIN_ROUNDPD,
28234 IX86_BUILTIN_ROUNDPS,
28236 IX86_BUILTIN_FLOORPD,
28237 IX86_BUILTIN_CEILPD,
28238 IX86_BUILTIN_TRUNCPD,
28239 IX86_BUILTIN_RINTPD,
28240 IX86_BUILTIN_ROUNDPD_AZ,
28242 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
28243 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
28244 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
28246 IX86_BUILTIN_FLOORPS,
28247 IX86_BUILTIN_CEILPS,
28248 IX86_BUILTIN_TRUNCPS,
28249 IX86_BUILTIN_RINTPS,
28250 IX86_BUILTIN_ROUNDPS_AZ,
28252 IX86_BUILTIN_FLOORPS_SFIX,
28253 IX86_BUILTIN_CEILPS_SFIX,
28254 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
28256 IX86_BUILTIN_PTESTZ,
28257 IX86_BUILTIN_PTESTC,
28258 IX86_BUILTIN_PTESTNZC,
28260 IX86_BUILTIN_VEC_INIT_V2SI,
28261 IX86_BUILTIN_VEC_INIT_V4HI,
28262 IX86_BUILTIN_VEC_INIT_V8QI,
28263 IX86_BUILTIN_VEC_EXT_V2DF,
28264 IX86_BUILTIN_VEC_EXT_V2DI,
28265 IX86_BUILTIN_VEC_EXT_V4SF,
28266 IX86_BUILTIN_VEC_EXT_V4SI,
28267 IX86_BUILTIN_VEC_EXT_V8HI,
28268 IX86_BUILTIN_VEC_EXT_V2SI,
28269 IX86_BUILTIN_VEC_EXT_V4HI,
28270 IX86_BUILTIN_VEC_EXT_V16QI,
28271 IX86_BUILTIN_VEC_SET_V2DI,
28272 IX86_BUILTIN_VEC_SET_V4SF,
28273 IX86_BUILTIN_VEC_SET_V4SI,
28274 IX86_BUILTIN_VEC_SET_V8HI,
28275 IX86_BUILTIN_VEC_SET_V4HI,
28276 IX86_BUILTIN_VEC_SET_V16QI,
28278 IX86_BUILTIN_VEC_PACK_SFIX,
28279 IX86_BUILTIN_VEC_PACK_SFIX256,
28281 /* SSE4.2. */
28282 IX86_BUILTIN_CRC32QI,
28283 IX86_BUILTIN_CRC32HI,
28284 IX86_BUILTIN_CRC32SI,
28285 IX86_BUILTIN_CRC32DI,
28287 IX86_BUILTIN_PCMPESTRI128,
28288 IX86_BUILTIN_PCMPESTRM128,
28289 IX86_BUILTIN_PCMPESTRA128,
28290 IX86_BUILTIN_PCMPESTRC128,
28291 IX86_BUILTIN_PCMPESTRO128,
28292 IX86_BUILTIN_PCMPESTRS128,
28293 IX86_BUILTIN_PCMPESTRZ128,
28294 IX86_BUILTIN_PCMPISTRI128,
28295 IX86_BUILTIN_PCMPISTRM128,
28296 IX86_BUILTIN_PCMPISTRA128,
28297 IX86_BUILTIN_PCMPISTRC128,
28298 IX86_BUILTIN_PCMPISTRO128,
28299 IX86_BUILTIN_PCMPISTRS128,
28300 IX86_BUILTIN_PCMPISTRZ128,
28302 IX86_BUILTIN_PCMPGTQ,
28304 /* AES instructions */
28305 IX86_BUILTIN_AESENC128,
28306 IX86_BUILTIN_AESENCLAST128,
28307 IX86_BUILTIN_AESDEC128,
28308 IX86_BUILTIN_AESDECLAST128,
28309 IX86_BUILTIN_AESIMC128,
28310 IX86_BUILTIN_AESKEYGENASSIST128,
28312 /* PCLMUL instruction */
28313 IX86_BUILTIN_PCLMULQDQ128,
28315 /* AVX */
28316 IX86_BUILTIN_ADDPD256,
28317 IX86_BUILTIN_ADDPS256,
28318 IX86_BUILTIN_ADDSUBPD256,
28319 IX86_BUILTIN_ADDSUBPS256,
28320 IX86_BUILTIN_ANDPD256,
28321 IX86_BUILTIN_ANDPS256,
28322 IX86_BUILTIN_ANDNPD256,
28323 IX86_BUILTIN_ANDNPS256,
28324 IX86_BUILTIN_BLENDPD256,
28325 IX86_BUILTIN_BLENDPS256,
28326 IX86_BUILTIN_BLENDVPD256,
28327 IX86_BUILTIN_BLENDVPS256,
28328 IX86_BUILTIN_DIVPD256,
28329 IX86_BUILTIN_DIVPS256,
28330 IX86_BUILTIN_DPPS256,
28331 IX86_BUILTIN_HADDPD256,
28332 IX86_BUILTIN_HADDPS256,
28333 IX86_BUILTIN_HSUBPD256,
28334 IX86_BUILTIN_HSUBPS256,
28335 IX86_BUILTIN_MAXPD256,
28336 IX86_BUILTIN_MAXPS256,
28337 IX86_BUILTIN_MINPD256,
28338 IX86_BUILTIN_MINPS256,
28339 IX86_BUILTIN_MULPD256,
28340 IX86_BUILTIN_MULPS256,
28341 IX86_BUILTIN_ORPD256,
28342 IX86_BUILTIN_ORPS256,
28343 IX86_BUILTIN_SHUFPD256,
28344 IX86_BUILTIN_SHUFPS256,
28345 IX86_BUILTIN_SUBPD256,
28346 IX86_BUILTIN_SUBPS256,
28347 IX86_BUILTIN_XORPD256,
28348 IX86_BUILTIN_XORPS256,
28349 IX86_BUILTIN_CMPSD,
28350 IX86_BUILTIN_CMPSS,
28351 IX86_BUILTIN_CMPPD,
28352 IX86_BUILTIN_CMPPS,
28353 IX86_BUILTIN_CMPPD256,
28354 IX86_BUILTIN_CMPPS256,
28355 IX86_BUILTIN_CVTDQ2PD256,
28356 IX86_BUILTIN_CVTDQ2PS256,
28357 IX86_BUILTIN_CVTPD2PS256,
28358 IX86_BUILTIN_CVTPS2DQ256,
28359 IX86_BUILTIN_CVTPS2PD256,
28360 IX86_BUILTIN_CVTTPD2DQ256,
28361 IX86_BUILTIN_CVTPD2DQ256,
28362 IX86_BUILTIN_CVTTPS2DQ256,
28363 IX86_BUILTIN_EXTRACTF128PD256,
28364 IX86_BUILTIN_EXTRACTF128PS256,
28365 IX86_BUILTIN_EXTRACTF128SI256,
28366 IX86_BUILTIN_VZEROALL,
28367 IX86_BUILTIN_VZEROUPPER,
28368 IX86_BUILTIN_VPERMILVARPD,
28369 IX86_BUILTIN_VPERMILVARPS,
28370 IX86_BUILTIN_VPERMILVARPD256,
28371 IX86_BUILTIN_VPERMILVARPS256,
28372 IX86_BUILTIN_VPERMILPD,
28373 IX86_BUILTIN_VPERMILPS,
28374 IX86_BUILTIN_VPERMILPD256,
28375 IX86_BUILTIN_VPERMILPS256,
28376 IX86_BUILTIN_VPERMIL2PD,
28377 IX86_BUILTIN_VPERMIL2PS,
28378 IX86_BUILTIN_VPERMIL2PD256,
28379 IX86_BUILTIN_VPERMIL2PS256,
28380 IX86_BUILTIN_VPERM2F128PD256,
28381 IX86_BUILTIN_VPERM2F128PS256,
28382 IX86_BUILTIN_VPERM2F128SI256,
28383 IX86_BUILTIN_VBROADCASTSS,
28384 IX86_BUILTIN_VBROADCASTSD256,
28385 IX86_BUILTIN_VBROADCASTSS256,
28386 IX86_BUILTIN_VBROADCASTPD256,
28387 IX86_BUILTIN_VBROADCASTPS256,
28388 IX86_BUILTIN_VINSERTF128PD256,
28389 IX86_BUILTIN_VINSERTF128PS256,
28390 IX86_BUILTIN_VINSERTF128SI256,
28391 IX86_BUILTIN_LOADUPD256,
28392 IX86_BUILTIN_LOADUPS256,
28393 IX86_BUILTIN_STOREUPD256,
28394 IX86_BUILTIN_STOREUPS256,
28395 IX86_BUILTIN_LDDQU256,
28396 IX86_BUILTIN_MOVNTDQ256,
28397 IX86_BUILTIN_MOVNTPD256,
28398 IX86_BUILTIN_MOVNTPS256,
28399 IX86_BUILTIN_LOADDQU256,
28400 IX86_BUILTIN_STOREDQU256,
28401 IX86_BUILTIN_MASKLOADPD,
28402 IX86_BUILTIN_MASKLOADPS,
28403 IX86_BUILTIN_MASKSTOREPD,
28404 IX86_BUILTIN_MASKSTOREPS,
28405 IX86_BUILTIN_MASKLOADPD256,
28406 IX86_BUILTIN_MASKLOADPS256,
28407 IX86_BUILTIN_MASKSTOREPD256,
28408 IX86_BUILTIN_MASKSTOREPS256,
28409 IX86_BUILTIN_MOVSHDUP256,
28410 IX86_BUILTIN_MOVSLDUP256,
28411 IX86_BUILTIN_MOVDDUP256,
28413 IX86_BUILTIN_SQRTPD256,
28414 IX86_BUILTIN_SQRTPS256,
28415 IX86_BUILTIN_SQRTPS_NR256,
28416 IX86_BUILTIN_RSQRTPS256,
28417 IX86_BUILTIN_RSQRTPS_NR256,
28419 IX86_BUILTIN_RCPPS256,
28421 IX86_BUILTIN_ROUNDPD256,
28422 IX86_BUILTIN_ROUNDPS256,
28424 IX86_BUILTIN_FLOORPD256,
28425 IX86_BUILTIN_CEILPD256,
28426 IX86_BUILTIN_TRUNCPD256,
28427 IX86_BUILTIN_RINTPD256,
28428 IX86_BUILTIN_ROUNDPD_AZ256,
28430 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28431 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28432 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28434 IX86_BUILTIN_FLOORPS256,
28435 IX86_BUILTIN_CEILPS256,
28436 IX86_BUILTIN_TRUNCPS256,
28437 IX86_BUILTIN_RINTPS256,
28438 IX86_BUILTIN_ROUNDPS_AZ256,
28440 IX86_BUILTIN_FLOORPS_SFIX256,
28441 IX86_BUILTIN_CEILPS_SFIX256,
28442 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28444 IX86_BUILTIN_UNPCKHPD256,
28445 IX86_BUILTIN_UNPCKLPD256,
28446 IX86_BUILTIN_UNPCKHPS256,
28447 IX86_BUILTIN_UNPCKLPS256,
28449 IX86_BUILTIN_SI256_SI,
28450 IX86_BUILTIN_PS256_PS,
28451 IX86_BUILTIN_PD256_PD,
28452 IX86_BUILTIN_SI_SI256,
28453 IX86_BUILTIN_PS_PS256,
28454 IX86_BUILTIN_PD_PD256,
28456 IX86_BUILTIN_VTESTZPD,
28457 IX86_BUILTIN_VTESTCPD,
28458 IX86_BUILTIN_VTESTNZCPD,
28459 IX86_BUILTIN_VTESTZPS,
28460 IX86_BUILTIN_VTESTCPS,
28461 IX86_BUILTIN_VTESTNZCPS,
28462 IX86_BUILTIN_VTESTZPD256,
28463 IX86_BUILTIN_VTESTCPD256,
28464 IX86_BUILTIN_VTESTNZCPD256,
28465 IX86_BUILTIN_VTESTZPS256,
28466 IX86_BUILTIN_VTESTCPS256,
28467 IX86_BUILTIN_VTESTNZCPS256,
28468 IX86_BUILTIN_PTESTZ256,
28469 IX86_BUILTIN_PTESTC256,
28470 IX86_BUILTIN_PTESTNZC256,
28472 IX86_BUILTIN_MOVMSKPD256,
28473 IX86_BUILTIN_MOVMSKPS256,
28475 /* AVX2 */
28476 IX86_BUILTIN_MPSADBW256,
28477 IX86_BUILTIN_PABSB256,
28478 IX86_BUILTIN_PABSW256,
28479 IX86_BUILTIN_PABSD256,
28480 IX86_BUILTIN_PACKSSDW256,
28481 IX86_BUILTIN_PACKSSWB256,
28482 IX86_BUILTIN_PACKUSDW256,
28483 IX86_BUILTIN_PACKUSWB256,
28484 IX86_BUILTIN_PADDB256,
28485 IX86_BUILTIN_PADDW256,
28486 IX86_BUILTIN_PADDD256,
28487 IX86_BUILTIN_PADDQ256,
28488 IX86_BUILTIN_PADDSB256,
28489 IX86_BUILTIN_PADDSW256,
28490 IX86_BUILTIN_PADDUSB256,
28491 IX86_BUILTIN_PADDUSW256,
28492 IX86_BUILTIN_PALIGNR256,
28493 IX86_BUILTIN_AND256I,
28494 IX86_BUILTIN_ANDNOT256I,
28495 IX86_BUILTIN_PAVGB256,
28496 IX86_BUILTIN_PAVGW256,
28497 IX86_BUILTIN_PBLENDVB256,
28498 IX86_BUILTIN_PBLENDVW256,
28499 IX86_BUILTIN_PCMPEQB256,
28500 IX86_BUILTIN_PCMPEQW256,
28501 IX86_BUILTIN_PCMPEQD256,
28502 IX86_BUILTIN_PCMPEQQ256,
28503 IX86_BUILTIN_PCMPGTB256,
28504 IX86_BUILTIN_PCMPGTW256,
28505 IX86_BUILTIN_PCMPGTD256,
28506 IX86_BUILTIN_PCMPGTQ256,
28507 IX86_BUILTIN_PHADDW256,
28508 IX86_BUILTIN_PHADDD256,
28509 IX86_BUILTIN_PHADDSW256,
28510 IX86_BUILTIN_PHSUBW256,
28511 IX86_BUILTIN_PHSUBD256,
28512 IX86_BUILTIN_PHSUBSW256,
28513 IX86_BUILTIN_PMADDUBSW256,
28514 IX86_BUILTIN_PMADDWD256,
28515 IX86_BUILTIN_PMAXSB256,
28516 IX86_BUILTIN_PMAXSW256,
28517 IX86_BUILTIN_PMAXSD256,
28518 IX86_BUILTIN_PMAXUB256,
28519 IX86_BUILTIN_PMAXUW256,
28520 IX86_BUILTIN_PMAXUD256,
28521 IX86_BUILTIN_PMINSB256,
28522 IX86_BUILTIN_PMINSW256,
28523 IX86_BUILTIN_PMINSD256,
28524 IX86_BUILTIN_PMINUB256,
28525 IX86_BUILTIN_PMINUW256,
28526 IX86_BUILTIN_PMINUD256,
28527 IX86_BUILTIN_PMOVMSKB256,
28528 IX86_BUILTIN_PMOVSXBW256,
28529 IX86_BUILTIN_PMOVSXBD256,
28530 IX86_BUILTIN_PMOVSXBQ256,
28531 IX86_BUILTIN_PMOVSXWD256,
28532 IX86_BUILTIN_PMOVSXWQ256,
28533 IX86_BUILTIN_PMOVSXDQ256,
28534 IX86_BUILTIN_PMOVZXBW256,
28535 IX86_BUILTIN_PMOVZXBD256,
28536 IX86_BUILTIN_PMOVZXBQ256,
28537 IX86_BUILTIN_PMOVZXWD256,
28538 IX86_BUILTIN_PMOVZXWQ256,
28539 IX86_BUILTIN_PMOVZXDQ256,
28540 IX86_BUILTIN_PMULDQ256,
28541 IX86_BUILTIN_PMULHRSW256,
28542 IX86_BUILTIN_PMULHUW256,
28543 IX86_BUILTIN_PMULHW256,
28544 IX86_BUILTIN_PMULLW256,
28545 IX86_BUILTIN_PMULLD256,
28546 IX86_BUILTIN_PMULUDQ256,
28547 IX86_BUILTIN_POR256,
28548 IX86_BUILTIN_PSADBW256,
28549 IX86_BUILTIN_PSHUFB256,
28550 IX86_BUILTIN_PSHUFD256,
28551 IX86_BUILTIN_PSHUFHW256,
28552 IX86_BUILTIN_PSHUFLW256,
28553 IX86_BUILTIN_PSIGNB256,
28554 IX86_BUILTIN_PSIGNW256,
28555 IX86_BUILTIN_PSIGND256,
28556 IX86_BUILTIN_PSLLDQI256,
28557 IX86_BUILTIN_PSLLWI256,
28558 IX86_BUILTIN_PSLLW256,
28559 IX86_BUILTIN_PSLLDI256,
28560 IX86_BUILTIN_PSLLD256,
28561 IX86_BUILTIN_PSLLQI256,
28562 IX86_BUILTIN_PSLLQ256,
28563 IX86_BUILTIN_PSRAWI256,
28564 IX86_BUILTIN_PSRAW256,
28565 IX86_BUILTIN_PSRADI256,
28566 IX86_BUILTIN_PSRAD256,
28567 IX86_BUILTIN_PSRLDQI256,
28568 IX86_BUILTIN_PSRLWI256,
28569 IX86_BUILTIN_PSRLW256,
28570 IX86_BUILTIN_PSRLDI256,
28571 IX86_BUILTIN_PSRLD256,
28572 IX86_BUILTIN_PSRLQI256,
28573 IX86_BUILTIN_PSRLQ256,
28574 IX86_BUILTIN_PSUBB256,
28575 IX86_BUILTIN_PSUBW256,
28576 IX86_BUILTIN_PSUBD256,
28577 IX86_BUILTIN_PSUBQ256,
28578 IX86_BUILTIN_PSUBSB256,
28579 IX86_BUILTIN_PSUBSW256,
28580 IX86_BUILTIN_PSUBUSB256,
28581 IX86_BUILTIN_PSUBUSW256,
28582 IX86_BUILTIN_PUNPCKHBW256,
28583 IX86_BUILTIN_PUNPCKHWD256,
28584 IX86_BUILTIN_PUNPCKHDQ256,
28585 IX86_BUILTIN_PUNPCKHQDQ256,
28586 IX86_BUILTIN_PUNPCKLBW256,
28587 IX86_BUILTIN_PUNPCKLWD256,
28588 IX86_BUILTIN_PUNPCKLDQ256,
28589 IX86_BUILTIN_PUNPCKLQDQ256,
28590 IX86_BUILTIN_PXOR256,
28591 IX86_BUILTIN_MOVNTDQA256,
28592 IX86_BUILTIN_VBROADCASTSS_PS,
28593 IX86_BUILTIN_VBROADCASTSS_PS256,
28594 IX86_BUILTIN_VBROADCASTSD_PD256,
28595 IX86_BUILTIN_VBROADCASTSI256,
28596 IX86_BUILTIN_PBLENDD256,
28597 IX86_BUILTIN_PBLENDD128,
28598 IX86_BUILTIN_PBROADCASTB256,
28599 IX86_BUILTIN_PBROADCASTW256,
28600 IX86_BUILTIN_PBROADCASTD256,
28601 IX86_BUILTIN_PBROADCASTQ256,
28602 IX86_BUILTIN_PBROADCASTB128,
28603 IX86_BUILTIN_PBROADCASTW128,
28604 IX86_BUILTIN_PBROADCASTD128,
28605 IX86_BUILTIN_PBROADCASTQ128,
28606 IX86_BUILTIN_VPERMVARSI256,
28607 IX86_BUILTIN_VPERMDF256,
28608 IX86_BUILTIN_VPERMVARSF256,
28609 IX86_BUILTIN_VPERMDI256,
28610 IX86_BUILTIN_VPERMTI256,
28611 IX86_BUILTIN_VEXTRACT128I256,
28612 IX86_BUILTIN_VINSERT128I256,
28613 IX86_BUILTIN_MASKLOADD,
28614 IX86_BUILTIN_MASKLOADQ,
28615 IX86_BUILTIN_MASKLOADD256,
28616 IX86_BUILTIN_MASKLOADQ256,
28617 IX86_BUILTIN_MASKSTORED,
28618 IX86_BUILTIN_MASKSTOREQ,
28619 IX86_BUILTIN_MASKSTORED256,
28620 IX86_BUILTIN_MASKSTOREQ256,
28621 IX86_BUILTIN_PSLLVV4DI,
28622 IX86_BUILTIN_PSLLVV2DI,
28623 IX86_BUILTIN_PSLLVV8SI,
28624 IX86_BUILTIN_PSLLVV4SI,
28625 IX86_BUILTIN_PSRAVV8SI,
28626 IX86_BUILTIN_PSRAVV4SI,
28627 IX86_BUILTIN_PSRLVV4DI,
28628 IX86_BUILTIN_PSRLVV2DI,
28629 IX86_BUILTIN_PSRLVV8SI,
28630 IX86_BUILTIN_PSRLVV4SI,
28632 IX86_BUILTIN_GATHERSIV2DF,
28633 IX86_BUILTIN_GATHERSIV4DF,
28634 IX86_BUILTIN_GATHERDIV2DF,
28635 IX86_BUILTIN_GATHERDIV4DF,
28636 IX86_BUILTIN_GATHERSIV4SF,
28637 IX86_BUILTIN_GATHERSIV8SF,
28638 IX86_BUILTIN_GATHERDIV4SF,
28639 IX86_BUILTIN_GATHERDIV8SF,
28640 IX86_BUILTIN_GATHERSIV2DI,
28641 IX86_BUILTIN_GATHERSIV4DI,
28642 IX86_BUILTIN_GATHERDIV2DI,
28643 IX86_BUILTIN_GATHERDIV4DI,
28644 IX86_BUILTIN_GATHERSIV4SI,
28645 IX86_BUILTIN_GATHERSIV8SI,
28646 IX86_BUILTIN_GATHERDIV4SI,
28647 IX86_BUILTIN_GATHERDIV8SI,
28649 /* AVX512F */
28650 IX86_BUILTIN_SI512_SI256,
28651 IX86_BUILTIN_PD512_PD256,
28652 IX86_BUILTIN_PS512_PS256,
28653 IX86_BUILTIN_SI512_SI,
28654 IX86_BUILTIN_PD512_PD,
28655 IX86_BUILTIN_PS512_PS,
28656 IX86_BUILTIN_ADDPD512,
28657 IX86_BUILTIN_ADDPS512,
28658 IX86_BUILTIN_ADDSD_ROUND,
28659 IX86_BUILTIN_ADDSS_ROUND,
28660 IX86_BUILTIN_ALIGND512,
28661 IX86_BUILTIN_ALIGNQ512,
28662 IX86_BUILTIN_BLENDMD512,
28663 IX86_BUILTIN_BLENDMPD512,
28664 IX86_BUILTIN_BLENDMPS512,
28665 IX86_BUILTIN_BLENDMQ512,
28666 IX86_BUILTIN_BROADCASTF32X4_512,
28667 IX86_BUILTIN_BROADCASTF64X4_512,
28668 IX86_BUILTIN_BROADCASTI32X4_512,
28669 IX86_BUILTIN_BROADCASTI64X4_512,
28670 IX86_BUILTIN_BROADCASTSD512,
28671 IX86_BUILTIN_BROADCASTSS512,
28672 IX86_BUILTIN_CMPD512,
28673 IX86_BUILTIN_CMPPD512,
28674 IX86_BUILTIN_CMPPS512,
28675 IX86_BUILTIN_CMPQ512,
28676 IX86_BUILTIN_CMPSD_MASK,
28677 IX86_BUILTIN_CMPSS_MASK,
28678 IX86_BUILTIN_COMIDF,
28679 IX86_BUILTIN_COMISF,
28680 IX86_BUILTIN_COMPRESSPD512,
28681 IX86_BUILTIN_COMPRESSPDSTORE512,
28682 IX86_BUILTIN_COMPRESSPS512,
28683 IX86_BUILTIN_COMPRESSPSSTORE512,
28684 IX86_BUILTIN_CVTDQ2PD512,
28685 IX86_BUILTIN_CVTDQ2PS512,
28686 IX86_BUILTIN_CVTPD2DQ512,
28687 IX86_BUILTIN_CVTPD2PS512,
28688 IX86_BUILTIN_CVTPD2UDQ512,
28689 IX86_BUILTIN_CVTPH2PS512,
28690 IX86_BUILTIN_CVTPS2DQ512,
28691 IX86_BUILTIN_CVTPS2PD512,
28692 IX86_BUILTIN_CVTPS2PH512,
28693 IX86_BUILTIN_CVTPS2UDQ512,
28694 IX86_BUILTIN_CVTSD2SS_ROUND,
28695 IX86_BUILTIN_CVTSI2SD64,
28696 IX86_BUILTIN_CVTSI2SS32,
28697 IX86_BUILTIN_CVTSI2SS64,
28698 IX86_BUILTIN_CVTSS2SD_ROUND,
28699 IX86_BUILTIN_CVTTPD2DQ512,
28700 IX86_BUILTIN_CVTTPD2UDQ512,
28701 IX86_BUILTIN_CVTTPS2DQ512,
28702 IX86_BUILTIN_CVTTPS2UDQ512,
28703 IX86_BUILTIN_CVTUDQ2PD512,
28704 IX86_BUILTIN_CVTUDQ2PS512,
28705 IX86_BUILTIN_CVTUSI2SD32,
28706 IX86_BUILTIN_CVTUSI2SD64,
28707 IX86_BUILTIN_CVTUSI2SS32,
28708 IX86_BUILTIN_CVTUSI2SS64,
28709 IX86_BUILTIN_DIVPD512,
28710 IX86_BUILTIN_DIVPS512,
28711 IX86_BUILTIN_DIVSD_ROUND,
28712 IX86_BUILTIN_DIVSS_ROUND,
28713 IX86_BUILTIN_EXPANDPD512,
28714 IX86_BUILTIN_EXPANDPD512Z,
28715 IX86_BUILTIN_EXPANDPDLOAD512,
28716 IX86_BUILTIN_EXPANDPDLOAD512Z,
28717 IX86_BUILTIN_EXPANDPS512,
28718 IX86_BUILTIN_EXPANDPS512Z,
28719 IX86_BUILTIN_EXPANDPSLOAD512,
28720 IX86_BUILTIN_EXPANDPSLOAD512Z,
28721 IX86_BUILTIN_EXTRACTF32X4,
28722 IX86_BUILTIN_EXTRACTF64X4,
28723 IX86_BUILTIN_EXTRACTI32X4,
28724 IX86_BUILTIN_EXTRACTI64X4,
28725 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28726 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28727 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28728 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28729 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28730 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28731 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28732 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28733 IX86_BUILTIN_GETEXPPD512,
28734 IX86_BUILTIN_GETEXPPS512,
28735 IX86_BUILTIN_GETEXPSD128,
28736 IX86_BUILTIN_GETEXPSS128,
28737 IX86_BUILTIN_GETMANTPD512,
28738 IX86_BUILTIN_GETMANTPS512,
28739 IX86_BUILTIN_GETMANTSD128,
28740 IX86_BUILTIN_GETMANTSS128,
28741 IX86_BUILTIN_INSERTF32X4,
28742 IX86_BUILTIN_INSERTF64X4,
28743 IX86_BUILTIN_INSERTI32X4,
28744 IX86_BUILTIN_INSERTI64X4,
28745 IX86_BUILTIN_LOADAPD512,
28746 IX86_BUILTIN_LOADAPS512,
28747 IX86_BUILTIN_LOADDQUDI512,
28748 IX86_BUILTIN_LOADDQUSI512,
28749 IX86_BUILTIN_LOADUPD512,
28750 IX86_BUILTIN_LOADUPS512,
28751 IX86_BUILTIN_MAXPD512,
28752 IX86_BUILTIN_MAXPS512,
28753 IX86_BUILTIN_MAXSD_ROUND,
28754 IX86_BUILTIN_MAXSS_ROUND,
28755 IX86_BUILTIN_MINPD512,
28756 IX86_BUILTIN_MINPS512,
28757 IX86_BUILTIN_MINSD_ROUND,
28758 IX86_BUILTIN_MINSS_ROUND,
28759 IX86_BUILTIN_MOVAPD512,
28760 IX86_BUILTIN_MOVAPS512,
28761 IX86_BUILTIN_MOVDDUP512,
28762 IX86_BUILTIN_MOVDQA32LOAD512,
28763 IX86_BUILTIN_MOVDQA32STORE512,
28764 IX86_BUILTIN_MOVDQA32_512,
28765 IX86_BUILTIN_MOVDQA64LOAD512,
28766 IX86_BUILTIN_MOVDQA64STORE512,
28767 IX86_BUILTIN_MOVDQA64_512,
28768 IX86_BUILTIN_MOVNTDQ512,
28769 IX86_BUILTIN_MOVNTDQA512,
28770 IX86_BUILTIN_MOVNTPD512,
28771 IX86_BUILTIN_MOVNTPS512,
28772 IX86_BUILTIN_MOVSHDUP512,
28773 IX86_BUILTIN_MOVSLDUP512,
28774 IX86_BUILTIN_MULPD512,
28775 IX86_BUILTIN_MULPS512,
28776 IX86_BUILTIN_MULSD_ROUND,
28777 IX86_BUILTIN_MULSS_ROUND,
28778 IX86_BUILTIN_PABSD512,
28779 IX86_BUILTIN_PABSQ512,
28780 IX86_BUILTIN_PADDD512,
28781 IX86_BUILTIN_PADDQ512,
28782 IX86_BUILTIN_PANDD512,
28783 IX86_BUILTIN_PANDND512,
28784 IX86_BUILTIN_PANDNQ512,
28785 IX86_BUILTIN_PANDQ512,
28786 IX86_BUILTIN_PBROADCASTD512,
28787 IX86_BUILTIN_PBROADCASTD512_GPR,
28788 IX86_BUILTIN_PBROADCASTMB512,
28789 IX86_BUILTIN_PBROADCASTMW512,
28790 IX86_BUILTIN_PBROADCASTQ512,
28791 IX86_BUILTIN_PBROADCASTQ512_GPR,
28792 IX86_BUILTIN_PBROADCASTQ512_MEM,
28793 IX86_BUILTIN_PCMPEQD512_MASK,
28794 IX86_BUILTIN_PCMPEQQ512_MASK,
28795 IX86_BUILTIN_PCMPGTD512_MASK,
28796 IX86_BUILTIN_PCMPGTQ512_MASK,
28797 IX86_BUILTIN_PCOMPRESSD512,
28798 IX86_BUILTIN_PCOMPRESSDSTORE512,
28799 IX86_BUILTIN_PCOMPRESSQ512,
28800 IX86_BUILTIN_PCOMPRESSQSTORE512,
28801 IX86_BUILTIN_PEXPANDD512,
28802 IX86_BUILTIN_PEXPANDD512Z,
28803 IX86_BUILTIN_PEXPANDDLOAD512,
28804 IX86_BUILTIN_PEXPANDDLOAD512Z,
28805 IX86_BUILTIN_PEXPANDQ512,
28806 IX86_BUILTIN_PEXPANDQ512Z,
28807 IX86_BUILTIN_PEXPANDQLOAD512,
28808 IX86_BUILTIN_PEXPANDQLOAD512Z,
28809 IX86_BUILTIN_PMAXSD512,
28810 IX86_BUILTIN_PMAXSQ512,
28811 IX86_BUILTIN_PMAXUD512,
28812 IX86_BUILTIN_PMAXUQ512,
28813 IX86_BUILTIN_PMINSD512,
28814 IX86_BUILTIN_PMINSQ512,
28815 IX86_BUILTIN_PMINUD512,
28816 IX86_BUILTIN_PMINUQ512,
28817 IX86_BUILTIN_PMOVDB512,
28818 IX86_BUILTIN_PMOVDB512_MEM,
28819 IX86_BUILTIN_PMOVDW512,
28820 IX86_BUILTIN_PMOVDW512_MEM,
28821 IX86_BUILTIN_PMOVQB512,
28822 IX86_BUILTIN_PMOVQB512_MEM,
28823 IX86_BUILTIN_PMOVQD512,
28824 IX86_BUILTIN_PMOVQD512_MEM,
28825 IX86_BUILTIN_PMOVQW512,
28826 IX86_BUILTIN_PMOVQW512_MEM,
28827 IX86_BUILTIN_PMOVSDB512,
28828 IX86_BUILTIN_PMOVSDB512_MEM,
28829 IX86_BUILTIN_PMOVSDW512,
28830 IX86_BUILTIN_PMOVSDW512_MEM,
28831 IX86_BUILTIN_PMOVSQB512,
28832 IX86_BUILTIN_PMOVSQB512_MEM,
28833 IX86_BUILTIN_PMOVSQD512,
28834 IX86_BUILTIN_PMOVSQD512_MEM,
28835 IX86_BUILTIN_PMOVSQW512,
28836 IX86_BUILTIN_PMOVSQW512_MEM,
28837 IX86_BUILTIN_PMOVSXBD512,
28838 IX86_BUILTIN_PMOVSXBQ512,
28839 IX86_BUILTIN_PMOVSXDQ512,
28840 IX86_BUILTIN_PMOVSXWD512,
28841 IX86_BUILTIN_PMOVSXWQ512,
28842 IX86_BUILTIN_PMOVUSDB512,
28843 IX86_BUILTIN_PMOVUSDB512_MEM,
28844 IX86_BUILTIN_PMOVUSDW512,
28845 IX86_BUILTIN_PMOVUSDW512_MEM,
28846 IX86_BUILTIN_PMOVUSQB512,
28847 IX86_BUILTIN_PMOVUSQB512_MEM,
28848 IX86_BUILTIN_PMOVUSQD512,
28849 IX86_BUILTIN_PMOVUSQD512_MEM,
28850 IX86_BUILTIN_PMOVUSQW512,
28851 IX86_BUILTIN_PMOVUSQW512_MEM,
28852 IX86_BUILTIN_PMOVZXBD512,
28853 IX86_BUILTIN_PMOVZXBQ512,
28854 IX86_BUILTIN_PMOVZXDQ512,
28855 IX86_BUILTIN_PMOVZXWD512,
28856 IX86_BUILTIN_PMOVZXWQ512,
28857 IX86_BUILTIN_PMULDQ512,
28858 IX86_BUILTIN_PMULLD512,
28859 IX86_BUILTIN_PMULUDQ512,
28860 IX86_BUILTIN_PORD512,
28861 IX86_BUILTIN_PORQ512,
28862 IX86_BUILTIN_PROLD512,
28863 IX86_BUILTIN_PROLQ512,
28864 IX86_BUILTIN_PROLVD512,
28865 IX86_BUILTIN_PROLVQ512,
28866 IX86_BUILTIN_PRORD512,
28867 IX86_BUILTIN_PRORQ512,
28868 IX86_BUILTIN_PRORVD512,
28869 IX86_BUILTIN_PRORVQ512,
28870 IX86_BUILTIN_PSHUFD512,
28871 IX86_BUILTIN_PSLLD512,
28872 IX86_BUILTIN_PSLLDI512,
28873 IX86_BUILTIN_PSLLQ512,
28874 IX86_BUILTIN_PSLLQI512,
28875 IX86_BUILTIN_PSLLVV16SI,
28876 IX86_BUILTIN_PSLLVV8DI,
28877 IX86_BUILTIN_PSRAD512,
28878 IX86_BUILTIN_PSRADI512,
28879 IX86_BUILTIN_PSRAQ512,
28880 IX86_BUILTIN_PSRAQI512,
28881 IX86_BUILTIN_PSRAVV16SI,
28882 IX86_BUILTIN_PSRAVV8DI,
28883 IX86_BUILTIN_PSRLD512,
28884 IX86_BUILTIN_PSRLDI512,
28885 IX86_BUILTIN_PSRLQ512,
28886 IX86_BUILTIN_PSRLQI512,
28887 IX86_BUILTIN_PSRLVV16SI,
28888 IX86_BUILTIN_PSRLVV8DI,
28889 IX86_BUILTIN_PSUBD512,
28890 IX86_BUILTIN_PSUBQ512,
28891 IX86_BUILTIN_PTESTMD512,
28892 IX86_BUILTIN_PTESTMQ512,
28893 IX86_BUILTIN_PTESTNMD512,
28894 IX86_BUILTIN_PTESTNMQ512,
28895 IX86_BUILTIN_PUNPCKHDQ512,
28896 IX86_BUILTIN_PUNPCKHQDQ512,
28897 IX86_BUILTIN_PUNPCKLDQ512,
28898 IX86_BUILTIN_PUNPCKLQDQ512,
28899 IX86_BUILTIN_PXORD512,
28900 IX86_BUILTIN_PXORQ512,
28901 IX86_BUILTIN_RCP14PD512,
28902 IX86_BUILTIN_RCP14PS512,
28903 IX86_BUILTIN_RCP14SD,
28904 IX86_BUILTIN_RCP14SS,
28905 IX86_BUILTIN_RNDSCALEPD,
28906 IX86_BUILTIN_RNDSCALEPS,
28907 IX86_BUILTIN_RNDSCALESD,
28908 IX86_BUILTIN_RNDSCALESS,
28909 IX86_BUILTIN_RSQRT14PD512,
28910 IX86_BUILTIN_RSQRT14PS512,
28911 IX86_BUILTIN_RSQRT14SD,
28912 IX86_BUILTIN_RSQRT14SS,
28913 IX86_BUILTIN_SCALEFPD512,
28914 IX86_BUILTIN_SCALEFPS512,
28915 IX86_BUILTIN_SCALEFSD,
28916 IX86_BUILTIN_SCALEFSS,
28917 IX86_BUILTIN_SHUFPD512,
28918 IX86_BUILTIN_SHUFPS512,
28919 IX86_BUILTIN_SHUF_F32x4,
28920 IX86_BUILTIN_SHUF_F64x2,
28921 IX86_BUILTIN_SHUF_I32x4,
28922 IX86_BUILTIN_SHUF_I64x2,
28923 IX86_BUILTIN_SQRTPD512,
28924 IX86_BUILTIN_SQRTPD512_MASK,
28925 IX86_BUILTIN_SQRTPS512_MASK,
28926 IX86_BUILTIN_SQRTPS_NR512,
28927 IX86_BUILTIN_SQRTSD_ROUND,
28928 IX86_BUILTIN_SQRTSS_ROUND,
28929 IX86_BUILTIN_STOREAPD512,
28930 IX86_BUILTIN_STOREAPS512,
28931 IX86_BUILTIN_STOREDQUDI512,
28932 IX86_BUILTIN_STOREDQUSI512,
28933 IX86_BUILTIN_STOREUPD512,
28934 IX86_BUILTIN_STOREUPS512,
28935 IX86_BUILTIN_SUBPD512,
28936 IX86_BUILTIN_SUBPS512,
28937 IX86_BUILTIN_SUBSD_ROUND,
28938 IX86_BUILTIN_SUBSS_ROUND,
28939 IX86_BUILTIN_UCMPD512,
28940 IX86_BUILTIN_UCMPQ512,
28941 IX86_BUILTIN_UNPCKHPD512,
28942 IX86_BUILTIN_UNPCKHPS512,
28943 IX86_BUILTIN_UNPCKLPD512,
28944 IX86_BUILTIN_UNPCKLPS512,
28945 IX86_BUILTIN_VCVTSD2SI32,
28946 IX86_BUILTIN_VCVTSD2SI64,
28947 IX86_BUILTIN_VCVTSD2USI32,
28948 IX86_BUILTIN_VCVTSD2USI64,
28949 IX86_BUILTIN_VCVTSS2SI32,
28950 IX86_BUILTIN_VCVTSS2SI64,
28951 IX86_BUILTIN_VCVTSS2USI32,
28952 IX86_BUILTIN_VCVTSS2USI64,
28953 IX86_BUILTIN_VCVTTSD2SI32,
28954 IX86_BUILTIN_VCVTTSD2SI64,
28955 IX86_BUILTIN_VCVTTSD2USI32,
28956 IX86_BUILTIN_VCVTTSD2USI64,
28957 IX86_BUILTIN_VCVTTSS2SI32,
28958 IX86_BUILTIN_VCVTTSS2SI64,
28959 IX86_BUILTIN_VCVTTSS2USI32,
28960 IX86_BUILTIN_VCVTTSS2USI64,
28961 IX86_BUILTIN_VFMADDPD512_MASK,
28962 IX86_BUILTIN_VFMADDPD512_MASK3,
28963 IX86_BUILTIN_VFMADDPD512_MASKZ,
28964 IX86_BUILTIN_VFMADDPS512_MASK,
28965 IX86_BUILTIN_VFMADDPS512_MASK3,
28966 IX86_BUILTIN_VFMADDPS512_MASKZ,
28967 IX86_BUILTIN_VFMADDSD3_ROUND,
28968 IX86_BUILTIN_VFMADDSS3_ROUND,
28969 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28970 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28971 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28972 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28973 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28974 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28975 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28976 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28977 IX86_BUILTIN_VFMSUBPD512_MASK3,
28978 IX86_BUILTIN_VFMSUBPS512_MASK3,
28979 IX86_BUILTIN_VFMSUBSD3_MASK3,
28980 IX86_BUILTIN_VFMSUBSS3_MASK3,
28981 IX86_BUILTIN_VFNMADDPD512_MASK,
28982 IX86_BUILTIN_VFNMADDPS512_MASK,
28983 IX86_BUILTIN_VFNMSUBPD512_MASK,
28984 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28985 IX86_BUILTIN_VFNMSUBPS512_MASK,
28986 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28987 IX86_BUILTIN_VPCLZCNTD512,
28988 IX86_BUILTIN_VPCLZCNTQ512,
28989 IX86_BUILTIN_VPCONFLICTD512,
28990 IX86_BUILTIN_VPCONFLICTQ512,
28991 IX86_BUILTIN_VPERMDF512,
28992 IX86_BUILTIN_VPERMDI512,
28993 IX86_BUILTIN_VPERMI2VARD512,
28994 IX86_BUILTIN_VPERMI2VARPD512,
28995 IX86_BUILTIN_VPERMI2VARPS512,
28996 IX86_BUILTIN_VPERMI2VARQ512,
28997 IX86_BUILTIN_VPERMILPD512,
28998 IX86_BUILTIN_VPERMILPS512,
28999 IX86_BUILTIN_VPERMILVARPD512,
29000 IX86_BUILTIN_VPERMILVARPS512,
29001 IX86_BUILTIN_VPERMT2VARD512,
29002 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
29003 IX86_BUILTIN_VPERMT2VARPD512,
29004 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
29005 IX86_BUILTIN_VPERMT2VARPS512,
29006 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
29007 IX86_BUILTIN_VPERMT2VARQ512,
29008 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
29009 IX86_BUILTIN_VPERMVARDF512,
29010 IX86_BUILTIN_VPERMVARDI512,
29011 IX86_BUILTIN_VPERMVARSF512,
29012 IX86_BUILTIN_VPERMVARSI512,
29013 IX86_BUILTIN_VTERNLOGD512_MASK,
29014 IX86_BUILTIN_VTERNLOGD512_MASKZ,
29015 IX86_BUILTIN_VTERNLOGQ512_MASK,
29016 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
29018 /* Mask arithmetic operations */
29019 IX86_BUILTIN_KAND16,
29020 IX86_BUILTIN_KANDN16,
29021 IX86_BUILTIN_KNOT16,
29022 IX86_BUILTIN_KOR16,
29023 IX86_BUILTIN_KORTESTC16,
29024 IX86_BUILTIN_KORTESTZ16,
29025 IX86_BUILTIN_KUNPCKBW,
29026 IX86_BUILTIN_KXNOR16,
29027 IX86_BUILTIN_KXOR16,
29028 IX86_BUILTIN_KMOV16,
29030 /* AVX512VL. */
29031 IX86_BUILTIN_PMOVUSQD256_MEM,
29032 IX86_BUILTIN_PMOVUSQD128_MEM,
29033 IX86_BUILTIN_PMOVSQD256_MEM,
29034 IX86_BUILTIN_PMOVSQD128_MEM,
29035 IX86_BUILTIN_PMOVQD256_MEM,
29036 IX86_BUILTIN_PMOVQD128_MEM,
29037 IX86_BUILTIN_PMOVUSQW256_MEM,
29038 IX86_BUILTIN_PMOVUSQW128_MEM,
29039 IX86_BUILTIN_PMOVSQW256_MEM,
29040 IX86_BUILTIN_PMOVSQW128_MEM,
29041 IX86_BUILTIN_PMOVQW256_MEM,
29042 IX86_BUILTIN_PMOVQW128_MEM,
29043 IX86_BUILTIN_PMOVUSQB256_MEM,
29044 IX86_BUILTIN_PMOVUSQB128_MEM,
29045 IX86_BUILTIN_PMOVSQB256_MEM,
29046 IX86_BUILTIN_PMOVSQB128_MEM,
29047 IX86_BUILTIN_PMOVQB256_MEM,
29048 IX86_BUILTIN_PMOVQB128_MEM,
29049 IX86_BUILTIN_PMOVUSDW256_MEM,
29050 IX86_BUILTIN_PMOVUSDW128_MEM,
29051 IX86_BUILTIN_PMOVSDW256_MEM,
29052 IX86_BUILTIN_PMOVSDW128_MEM,
29053 IX86_BUILTIN_PMOVDW256_MEM,
29054 IX86_BUILTIN_PMOVDW128_MEM,
29055 IX86_BUILTIN_PMOVUSDB256_MEM,
29056 IX86_BUILTIN_PMOVUSDB128_MEM,
29057 IX86_BUILTIN_PMOVSDB256_MEM,
29058 IX86_BUILTIN_PMOVSDB128_MEM,
29059 IX86_BUILTIN_PMOVDB256_MEM,
29060 IX86_BUILTIN_PMOVDB128_MEM,
29061 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
29062 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
29063 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
29064 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
29065 IX86_BUILTIN_MOVDQA64STORE256_MASK,
29066 IX86_BUILTIN_MOVDQA64STORE128_MASK,
29067 IX86_BUILTIN_MOVDQA32STORE256_MASK,
29068 IX86_BUILTIN_MOVDQA32STORE128_MASK,
29069 IX86_BUILTIN_LOADAPD256_MASK,
29070 IX86_BUILTIN_LOADAPD128_MASK,
29071 IX86_BUILTIN_LOADAPS256_MASK,
29072 IX86_BUILTIN_LOADAPS128_MASK,
29073 IX86_BUILTIN_STOREAPD256_MASK,
29074 IX86_BUILTIN_STOREAPD128_MASK,
29075 IX86_BUILTIN_STOREAPS256_MASK,
29076 IX86_BUILTIN_STOREAPS128_MASK,
29077 IX86_BUILTIN_LOADUPD256_MASK,
29078 IX86_BUILTIN_LOADUPD128_MASK,
29079 IX86_BUILTIN_LOADUPS256_MASK,
29080 IX86_BUILTIN_LOADUPS128_MASK,
29081 IX86_BUILTIN_STOREUPD256_MASK,
29082 IX86_BUILTIN_STOREUPD128_MASK,
29083 IX86_BUILTIN_STOREUPS256_MASK,
29084 IX86_BUILTIN_STOREUPS128_MASK,
29085 IX86_BUILTIN_LOADDQUDI256_MASK,
29086 IX86_BUILTIN_LOADDQUDI128_MASK,
29087 IX86_BUILTIN_LOADDQUSI256_MASK,
29088 IX86_BUILTIN_LOADDQUSI128_MASK,
29089 IX86_BUILTIN_LOADDQUHI256_MASK,
29090 IX86_BUILTIN_LOADDQUHI128_MASK,
29091 IX86_BUILTIN_LOADDQUQI256_MASK,
29092 IX86_BUILTIN_LOADDQUQI128_MASK,
29093 IX86_BUILTIN_STOREDQUDI256_MASK,
29094 IX86_BUILTIN_STOREDQUDI128_MASK,
29095 IX86_BUILTIN_STOREDQUSI256_MASK,
29096 IX86_BUILTIN_STOREDQUSI128_MASK,
29097 IX86_BUILTIN_STOREDQUHI256_MASK,
29098 IX86_BUILTIN_STOREDQUHI128_MASK,
29099 IX86_BUILTIN_STOREDQUQI256_MASK,
29100 IX86_BUILTIN_STOREDQUQI128_MASK,
29101 IX86_BUILTIN_COMPRESSPDSTORE256,
29102 IX86_BUILTIN_COMPRESSPDSTORE128,
29103 IX86_BUILTIN_COMPRESSPSSTORE256,
29104 IX86_BUILTIN_COMPRESSPSSTORE128,
29105 IX86_BUILTIN_PCOMPRESSQSTORE256,
29106 IX86_BUILTIN_PCOMPRESSQSTORE128,
29107 IX86_BUILTIN_PCOMPRESSDSTORE256,
29108 IX86_BUILTIN_PCOMPRESSDSTORE128,
29109 IX86_BUILTIN_EXPANDPDLOAD256,
29110 IX86_BUILTIN_EXPANDPDLOAD128,
29111 IX86_BUILTIN_EXPANDPSLOAD256,
29112 IX86_BUILTIN_EXPANDPSLOAD128,
29113 IX86_BUILTIN_PEXPANDQLOAD256,
29114 IX86_BUILTIN_PEXPANDQLOAD128,
29115 IX86_BUILTIN_PEXPANDDLOAD256,
29116 IX86_BUILTIN_PEXPANDDLOAD128,
29117 IX86_BUILTIN_EXPANDPDLOAD256Z,
29118 IX86_BUILTIN_EXPANDPDLOAD128Z,
29119 IX86_BUILTIN_EXPANDPSLOAD256Z,
29120 IX86_BUILTIN_EXPANDPSLOAD128Z,
29121 IX86_BUILTIN_PEXPANDQLOAD256Z,
29122 IX86_BUILTIN_PEXPANDQLOAD128Z,
29123 IX86_BUILTIN_PEXPANDDLOAD256Z,
29124 IX86_BUILTIN_PEXPANDDLOAD128Z,
29125 IX86_BUILTIN_PALIGNR256_MASK,
29126 IX86_BUILTIN_PALIGNR128_MASK,
29127 IX86_BUILTIN_MOVDQA64_256_MASK,
29128 IX86_BUILTIN_MOVDQA64_128_MASK,
29129 IX86_BUILTIN_MOVDQA32_256_MASK,
29130 IX86_BUILTIN_MOVDQA32_128_MASK,
29131 IX86_BUILTIN_MOVAPD256_MASK,
29132 IX86_BUILTIN_MOVAPD128_MASK,
29133 IX86_BUILTIN_MOVAPS256_MASK,
29134 IX86_BUILTIN_MOVAPS128_MASK,
29135 IX86_BUILTIN_MOVDQUHI256_MASK,
29136 IX86_BUILTIN_MOVDQUHI128_MASK,
29137 IX86_BUILTIN_MOVDQUQI256_MASK,
29138 IX86_BUILTIN_MOVDQUQI128_MASK,
29139 IX86_BUILTIN_MINPS128_MASK,
29140 IX86_BUILTIN_MAXPS128_MASK,
29141 IX86_BUILTIN_MINPD128_MASK,
29142 IX86_BUILTIN_MAXPD128_MASK,
29143 IX86_BUILTIN_MAXPD256_MASK,
29144 IX86_BUILTIN_MAXPS256_MASK,
29145 IX86_BUILTIN_MINPD256_MASK,
29146 IX86_BUILTIN_MINPS256_MASK,
29147 IX86_BUILTIN_MULPS128_MASK,
29148 IX86_BUILTIN_DIVPS128_MASK,
29149 IX86_BUILTIN_MULPD128_MASK,
29150 IX86_BUILTIN_DIVPD128_MASK,
29151 IX86_BUILTIN_DIVPD256_MASK,
29152 IX86_BUILTIN_DIVPS256_MASK,
29153 IX86_BUILTIN_MULPD256_MASK,
29154 IX86_BUILTIN_MULPS256_MASK,
29155 IX86_BUILTIN_ADDPD128_MASK,
29156 IX86_BUILTIN_ADDPD256_MASK,
29157 IX86_BUILTIN_ADDPS128_MASK,
29158 IX86_BUILTIN_ADDPS256_MASK,
29159 IX86_BUILTIN_SUBPD128_MASK,
29160 IX86_BUILTIN_SUBPD256_MASK,
29161 IX86_BUILTIN_SUBPS128_MASK,
29162 IX86_BUILTIN_SUBPS256_MASK,
29163 IX86_BUILTIN_XORPD256_MASK,
29164 IX86_BUILTIN_XORPD128_MASK,
29165 IX86_BUILTIN_XORPS256_MASK,
29166 IX86_BUILTIN_XORPS128_MASK,
29167 IX86_BUILTIN_ORPD256_MASK,
29168 IX86_BUILTIN_ORPD128_MASK,
29169 IX86_BUILTIN_ORPS256_MASK,
29170 IX86_BUILTIN_ORPS128_MASK,
29171 IX86_BUILTIN_BROADCASTF32x2_256,
29172 IX86_BUILTIN_BROADCASTI32x2_256,
29173 IX86_BUILTIN_BROADCASTI32x2_128,
29174 IX86_BUILTIN_BROADCASTF64X2_256,
29175 IX86_BUILTIN_BROADCASTI64X2_256,
29176 IX86_BUILTIN_BROADCASTF32X4_256,
29177 IX86_BUILTIN_BROADCASTI32X4_256,
29178 IX86_BUILTIN_EXTRACTF32X4_256,
29179 IX86_BUILTIN_EXTRACTI32X4_256,
29180 IX86_BUILTIN_DBPSADBW256,
29181 IX86_BUILTIN_DBPSADBW128,
29182 IX86_BUILTIN_CVTTPD2QQ256,
29183 IX86_BUILTIN_CVTTPD2QQ128,
29184 IX86_BUILTIN_CVTTPD2UQQ256,
29185 IX86_BUILTIN_CVTTPD2UQQ128,
29186 IX86_BUILTIN_CVTPD2QQ256,
29187 IX86_BUILTIN_CVTPD2QQ128,
29188 IX86_BUILTIN_CVTPD2UQQ256,
29189 IX86_BUILTIN_CVTPD2UQQ128,
29190 IX86_BUILTIN_CVTPD2UDQ256_MASK,
29191 IX86_BUILTIN_CVTPD2UDQ128_MASK,
29192 IX86_BUILTIN_CVTTPS2QQ256,
29193 IX86_BUILTIN_CVTTPS2QQ128,
29194 IX86_BUILTIN_CVTTPS2UQQ256,
29195 IX86_BUILTIN_CVTTPS2UQQ128,
29196 IX86_BUILTIN_CVTTPS2DQ256_MASK,
29197 IX86_BUILTIN_CVTTPS2DQ128_MASK,
29198 IX86_BUILTIN_CVTTPS2UDQ256,
29199 IX86_BUILTIN_CVTTPS2UDQ128,
29200 IX86_BUILTIN_CVTTPD2DQ256_MASK,
29201 IX86_BUILTIN_CVTTPD2DQ128_MASK,
29202 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
29203 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
29204 IX86_BUILTIN_CVTPD2DQ256_MASK,
29205 IX86_BUILTIN_CVTPD2DQ128_MASK,
29206 IX86_BUILTIN_CVTDQ2PD256_MASK,
29207 IX86_BUILTIN_CVTDQ2PD128_MASK,
29208 IX86_BUILTIN_CVTUDQ2PD256_MASK,
29209 IX86_BUILTIN_CVTUDQ2PD128_MASK,
29210 IX86_BUILTIN_CVTDQ2PS256_MASK,
29211 IX86_BUILTIN_CVTDQ2PS128_MASK,
29212 IX86_BUILTIN_CVTUDQ2PS256_MASK,
29213 IX86_BUILTIN_CVTUDQ2PS128_MASK,
29214 IX86_BUILTIN_CVTPS2PD256_MASK,
29215 IX86_BUILTIN_CVTPS2PD128_MASK,
29216 IX86_BUILTIN_PBROADCASTB256_MASK,
29217 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
29218 IX86_BUILTIN_PBROADCASTB128_MASK,
29219 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
29220 IX86_BUILTIN_PBROADCASTW256_MASK,
29221 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
29222 IX86_BUILTIN_PBROADCASTW128_MASK,
29223 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
29224 IX86_BUILTIN_PBROADCASTD256_MASK,
29225 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
29226 IX86_BUILTIN_PBROADCASTD128_MASK,
29227 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
29228 IX86_BUILTIN_PBROADCASTQ256_MASK,
29229 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
29230 IX86_BUILTIN_PBROADCASTQ256_MEM_MASK,
29231 IX86_BUILTIN_PBROADCASTQ128_MASK,
29232 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
29233 IX86_BUILTIN_PBROADCASTQ128_MEM_MASK,
29234 IX86_BUILTIN_BROADCASTSS256,
29235 IX86_BUILTIN_BROADCASTSS128,
29236 IX86_BUILTIN_BROADCASTSD256,
29237 IX86_BUILTIN_EXTRACTF64X2_256,
29238 IX86_BUILTIN_EXTRACTI64X2_256,
29239 IX86_BUILTIN_INSERTF32X4_256,
29240 IX86_BUILTIN_INSERTI32X4_256,
29241 IX86_BUILTIN_PMOVSXBW256_MASK,
29242 IX86_BUILTIN_PMOVSXBW128_MASK,
29243 IX86_BUILTIN_PMOVSXBD256_MASK,
29244 IX86_BUILTIN_PMOVSXBD128_MASK,
29245 IX86_BUILTIN_PMOVSXBQ256_MASK,
29246 IX86_BUILTIN_PMOVSXBQ128_MASK,
29247 IX86_BUILTIN_PMOVSXWD256_MASK,
29248 IX86_BUILTIN_PMOVSXWD128_MASK,
29249 IX86_BUILTIN_PMOVSXWQ256_MASK,
29250 IX86_BUILTIN_PMOVSXWQ128_MASK,
29251 IX86_BUILTIN_PMOVSXDQ256_MASK,
29252 IX86_BUILTIN_PMOVSXDQ128_MASK,
29253 IX86_BUILTIN_PMOVZXBW256_MASK,
29254 IX86_BUILTIN_PMOVZXBW128_MASK,
29255 IX86_BUILTIN_PMOVZXBD256_MASK,
29256 IX86_BUILTIN_PMOVZXBD128_MASK,
29257 IX86_BUILTIN_PMOVZXBQ256_MASK,
29258 IX86_BUILTIN_PMOVZXBQ128_MASK,
29259 IX86_BUILTIN_PMOVZXWD256_MASK,
29260 IX86_BUILTIN_PMOVZXWD128_MASK,
29261 IX86_BUILTIN_PMOVZXWQ256_MASK,
29262 IX86_BUILTIN_PMOVZXWQ128_MASK,
29263 IX86_BUILTIN_PMOVZXDQ256_MASK,
29264 IX86_BUILTIN_PMOVZXDQ128_MASK,
29265 IX86_BUILTIN_REDUCEPD256_MASK,
29266 IX86_BUILTIN_REDUCEPD128_MASK,
29267 IX86_BUILTIN_REDUCEPS256_MASK,
29268 IX86_BUILTIN_REDUCEPS128_MASK,
29269 IX86_BUILTIN_REDUCESD_MASK,
29270 IX86_BUILTIN_REDUCESS_MASK,
29271 IX86_BUILTIN_VPERMVARHI256_MASK,
29272 IX86_BUILTIN_VPERMVARHI128_MASK,
29273 IX86_BUILTIN_VPERMT2VARHI256,
29274 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
29275 IX86_BUILTIN_VPERMT2VARHI128,
29276 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
29277 IX86_BUILTIN_VPERMI2VARHI256,
29278 IX86_BUILTIN_VPERMI2VARHI128,
29279 IX86_BUILTIN_RCP14PD256,
29280 IX86_BUILTIN_RCP14PD128,
29281 IX86_BUILTIN_RCP14PS256,
29282 IX86_BUILTIN_RCP14PS128,
29283 IX86_BUILTIN_RSQRT14PD256_MASK,
29284 IX86_BUILTIN_RSQRT14PD128_MASK,
29285 IX86_BUILTIN_RSQRT14PS256_MASK,
29286 IX86_BUILTIN_RSQRT14PS128_MASK,
29287 IX86_BUILTIN_SQRTPD256_MASK,
29288 IX86_BUILTIN_SQRTPD128_MASK,
29289 IX86_BUILTIN_SQRTPS256_MASK,
29290 IX86_BUILTIN_SQRTPS128_MASK,
29291 IX86_BUILTIN_PADDB128_MASK,
29292 IX86_BUILTIN_PADDW128_MASK,
29293 IX86_BUILTIN_PADDD128_MASK,
29294 IX86_BUILTIN_PADDQ128_MASK,
29295 IX86_BUILTIN_PSUBB128_MASK,
29296 IX86_BUILTIN_PSUBW128_MASK,
29297 IX86_BUILTIN_PSUBD128_MASK,
29298 IX86_BUILTIN_PSUBQ128_MASK,
29299 IX86_BUILTIN_PADDSB128_MASK,
29300 IX86_BUILTIN_PADDSW128_MASK,
29301 IX86_BUILTIN_PSUBSB128_MASK,
29302 IX86_BUILTIN_PSUBSW128_MASK,
29303 IX86_BUILTIN_PADDUSB128_MASK,
29304 IX86_BUILTIN_PADDUSW128_MASK,
29305 IX86_BUILTIN_PSUBUSB128_MASK,
29306 IX86_BUILTIN_PSUBUSW128_MASK,
29307 IX86_BUILTIN_PADDB256_MASK,
29308 IX86_BUILTIN_PADDW256_MASK,
29309 IX86_BUILTIN_PADDD256_MASK,
29310 IX86_BUILTIN_PADDQ256_MASK,
29311 IX86_BUILTIN_PADDSB256_MASK,
29312 IX86_BUILTIN_PADDSW256_MASK,
29313 IX86_BUILTIN_PADDUSB256_MASK,
29314 IX86_BUILTIN_PADDUSW256_MASK,
29315 IX86_BUILTIN_PSUBB256_MASK,
29316 IX86_BUILTIN_PSUBW256_MASK,
29317 IX86_BUILTIN_PSUBD256_MASK,
29318 IX86_BUILTIN_PSUBQ256_MASK,
29319 IX86_BUILTIN_PSUBSB256_MASK,
29320 IX86_BUILTIN_PSUBSW256_MASK,
29321 IX86_BUILTIN_PSUBUSB256_MASK,
29322 IX86_BUILTIN_PSUBUSW256_MASK,
29323 IX86_BUILTIN_SHUF_F64x2_256,
29324 IX86_BUILTIN_SHUF_I64x2_256,
29325 IX86_BUILTIN_SHUF_I32x4_256,
29326 IX86_BUILTIN_SHUF_F32x4_256,
29327 IX86_BUILTIN_PMOVWB128,
29328 IX86_BUILTIN_PMOVWB256,
29329 IX86_BUILTIN_PMOVSWB128,
29330 IX86_BUILTIN_PMOVSWB256,
29331 IX86_BUILTIN_PMOVUSWB128,
29332 IX86_BUILTIN_PMOVUSWB256,
29333 IX86_BUILTIN_PMOVDB128,
29334 IX86_BUILTIN_PMOVDB256,
29335 IX86_BUILTIN_PMOVSDB128,
29336 IX86_BUILTIN_PMOVSDB256,
29337 IX86_BUILTIN_PMOVUSDB128,
29338 IX86_BUILTIN_PMOVUSDB256,
29339 IX86_BUILTIN_PMOVDW128,
29340 IX86_BUILTIN_PMOVDW256,
29341 IX86_BUILTIN_PMOVSDW128,
29342 IX86_BUILTIN_PMOVSDW256,
29343 IX86_BUILTIN_PMOVUSDW128,
29344 IX86_BUILTIN_PMOVUSDW256,
29345 IX86_BUILTIN_PMOVQB128,
29346 IX86_BUILTIN_PMOVQB256,
29347 IX86_BUILTIN_PMOVSQB128,
29348 IX86_BUILTIN_PMOVSQB256,
29349 IX86_BUILTIN_PMOVUSQB128,
29350 IX86_BUILTIN_PMOVUSQB256,
29351 IX86_BUILTIN_PMOVQW128,
29352 IX86_BUILTIN_PMOVQW256,
29353 IX86_BUILTIN_PMOVSQW128,
29354 IX86_BUILTIN_PMOVSQW256,
29355 IX86_BUILTIN_PMOVUSQW128,
29356 IX86_BUILTIN_PMOVUSQW256,
29357 IX86_BUILTIN_PMOVQD128,
29358 IX86_BUILTIN_PMOVQD256,
29359 IX86_BUILTIN_PMOVSQD128,
29360 IX86_BUILTIN_PMOVSQD256,
29361 IX86_BUILTIN_PMOVUSQD128,
29362 IX86_BUILTIN_PMOVUSQD256,
29363 IX86_BUILTIN_RANGEPD256,
29364 IX86_BUILTIN_RANGEPD128,
29365 IX86_BUILTIN_RANGEPS256,
29366 IX86_BUILTIN_RANGEPS128,
29367 IX86_BUILTIN_GETEXPPS256,
29368 IX86_BUILTIN_GETEXPPD256,
29369 IX86_BUILTIN_GETEXPPS128,
29370 IX86_BUILTIN_GETEXPPD128,
29371 IX86_BUILTIN_FIXUPIMMPD256_MASK,
29372 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
29373 IX86_BUILTIN_FIXUPIMMPS256_MASK,
29374 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
29375 IX86_BUILTIN_FIXUPIMMPD128_MASK,
29376 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
29377 IX86_BUILTIN_FIXUPIMMPS128_MASK,
29378 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
29379 IX86_BUILTIN_PABSQ256,
29380 IX86_BUILTIN_PABSQ128,
29381 IX86_BUILTIN_PABSD256_MASK,
29382 IX86_BUILTIN_PABSD128_MASK,
29383 IX86_BUILTIN_PMULHRSW256_MASK,
29384 IX86_BUILTIN_PMULHRSW128_MASK,
29385 IX86_BUILTIN_PMULHUW128_MASK,
29386 IX86_BUILTIN_PMULHUW256_MASK,
29387 IX86_BUILTIN_PMULHW256_MASK,
29388 IX86_BUILTIN_PMULHW128_MASK,
29389 IX86_BUILTIN_PMULLW256_MASK,
29390 IX86_BUILTIN_PMULLW128_MASK,
29391 IX86_BUILTIN_PMULLQ256,
29392 IX86_BUILTIN_PMULLQ128,
29393 IX86_BUILTIN_ANDPD256_MASK,
29394 IX86_BUILTIN_ANDPD128_MASK,
29395 IX86_BUILTIN_ANDPS256_MASK,
29396 IX86_BUILTIN_ANDPS128_MASK,
29397 IX86_BUILTIN_ANDNPD256_MASK,
29398 IX86_BUILTIN_ANDNPD128_MASK,
29399 IX86_BUILTIN_ANDNPS256_MASK,
29400 IX86_BUILTIN_ANDNPS128_MASK,
29401 IX86_BUILTIN_PSLLWI128_MASK,
29402 IX86_BUILTIN_PSLLDI128_MASK,
29403 IX86_BUILTIN_PSLLQI128_MASK,
29404 IX86_BUILTIN_PSLLW128_MASK,
29405 IX86_BUILTIN_PSLLD128_MASK,
29406 IX86_BUILTIN_PSLLQ128_MASK,
29407 IX86_BUILTIN_PSLLWI256_MASK ,
29408 IX86_BUILTIN_PSLLW256_MASK,
29409 IX86_BUILTIN_PSLLDI256_MASK,
29410 IX86_BUILTIN_PSLLD256_MASK,
29411 IX86_BUILTIN_PSLLQI256_MASK,
29412 IX86_BUILTIN_PSLLQ256_MASK,
29413 IX86_BUILTIN_PSRADI128_MASK,
29414 IX86_BUILTIN_PSRAD128_MASK,
29415 IX86_BUILTIN_PSRADI256_MASK,
29416 IX86_BUILTIN_PSRAD256_MASK,
29417 IX86_BUILTIN_PSRAQI128_MASK,
29418 IX86_BUILTIN_PSRAQ128_MASK,
29419 IX86_BUILTIN_PSRAQI256_MASK,
29420 IX86_BUILTIN_PSRAQ256_MASK,
29421 IX86_BUILTIN_PANDD256,
29422 IX86_BUILTIN_PANDD128,
29423 IX86_BUILTIN_PSRLDI128_MASK,
29424 IX86_BUILTIN_PSRLD128_MASK,
29425 IX86_BUILTIN_PSRLDI256_MASK,
29426 IX86_BUILTIN_PSRLD256_MASK,
29427 IX86_BUILTIN_PSRLQI128_MASK,
29428 IX86_BUILTIN_PSRLQ128_MASK,
29429 IX86_BUILTIN_PSRLQI256_MASK,
29430 IX86_BUILTIN_PSRLQ256_MASK,
29431 IX86_BUILTIN_PANDQ256,
29432 IX86_BUILTIN_PANDQ128,
29433 IX86_BUILTIN_PANDND256,
29434 IX86_BUILTIN_PANDND128,
29435 IX86_BUILTIN_PANDNQ256,
29436 IX86_BUILTIN_PANDNQ128,
29437 IX86_BUILTIN_PORD256,
29438 IX86_BUILTIN_PORD128,
29439 IX86_BUILTIN_PORQ256,
29440 IX86_BUILTIN_PORQ128,
29441 IX86_BUILTIN_PXORD256,
29442 IX86_BUILTIN_PXORD128,
29443 IX86_BUILTIN_PXORQ256,
29444 IX86_BUILTIN_PXORQ128,
29445 IX86_BUILTIN_PACKSSWB256_MASK,
29446 IX86_BUILTIN_PACKSSWB128_MASK,
29447 IX86_BUILTIN_PACKUSWB256_MASK,
29448 IX86_BUILTIN_PACKUSWB128_MASK,
29449 IX86_BUILTIN_RNDSCALEPS256,
29450 IX86_BUILTIN_RNDSCALEPD256,
29451 IX86_BUILTIN_RNDSCALEPS128,
29452 IX86_BUILTIN_RNDSCALEPD128,
29453 IX86_BUILTIN_VTERNLOGQ256_MASK,
29454 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29455 IX86_BUILTIN_VTERNLOGD256_MASK,
29456 IX86_BUILTIN_VTERNLOGD256_MASKZ,
29457 IX86_BUILTIN_VTERNLOGQ128_MASK,
29458 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29459 IX86_BUILTIN_VTERNLOGD128_MASK,
29460 IX86_BUILTIN_VTERNLOGD128_MASKZ,
29461 IX86_BUILTIN_SCALEFPD256,
29462 IX86_BUILTIN_SCALEFPS256,
29463 IX86_BUILTIN_SCALEFPD128,
29464 IX86_BUILTIN_SCALEFPS128,
29465 IX86_BUILTIN_VFMADDPD256_MASK,
29466 IX86_BUILTIN_VFMADDPD256_MASK3,
29467 IX86_BUILTIN_VFMADDPD256_MASKZ,
29468 IX86_BUILTIN_VFMADDPD128_MASK,
29469 IX86_BUILTIN_VFMADDPD128_MASK3,
29470 IX86_BUILTIN_VFMADDPD128_MASKZ,
29471 IX86_BUILTIN_VFMADDPS256_MASK,
29472 IX86_BUILTIN_VFMADDPS256_MASK3,
29473 IX86_BUILTIN_VFMADDPS256_MASKZ,
29474 IX86_BUILTIN_VFMADDPS128_MASK,
29475 IX86_BUILTIN_VFMADDPS128_MASK3,
29476 IX86_BUILTIN_VFMADDPS128_MASKZ,
29477 IX86_BUILTIN_VFMSUBPD256_MASK3,
29478 IX86_BUILTIN_VFMSUBPD128_MASK3,
29479 IX86_BUILTIN_VFMSUBPS256_MASK3,
29480 IX86_BUILTIN_VFMSUBPS128_MASK3,
29481 IX86_BUILTIN_VFNMADDPD256_MASK,
29482 IX86_BUILTIN_VFNMADDPD128_MASK,
29483 IX86_BUILTIN_VFNMADDPS256_MASK,
29484 IX86_BUILTIN_VFNMADDPS128_MASK,
29485 IX86_BUILTIN_VFNMSUBPD256_MASK,
29486 IX86_BUILTIN_VFNMSUBPD256_MASK3,
29487 IX86_BUILTIN_VFNMSUBPD128_MASK,
29488 IX86_BUILTIN_VFNMSUBPD128_MASK3,
29489 IX86_BUILTIN_VFNMSUBPS256_MASK,
29490 IX86_BUILTIN_VFNMSUBPS256_MASK3,
29491 IX86_BUILTIN_VFNMSUBPS128_MASK,
29492 IX86_BUILTIN_VFNMSUBPS128_MASK3,
29493 IX86_BUILTIN_VFMADDSUBPD256_MASK,
29494 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29495 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29496 IX86_BUILTIN_VFMADDSUBPD128_MASK,
29497 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29498 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29499 IX86_BUILTIN_VFMADDSUBPS256_MASK,
29500 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29501 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29502 IX86_BUILTIN_VFMADDSUBPS128_MASK,
29503 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29504 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29505 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29506 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29507 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29508 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29509 IX86_BUILTIN_INSERTF64X2_256,
29510 IX86_BUILTIN_INSERTI64X2_256,
29511 IX86_BUILTIN_PSRAVV16HI,
29512 IX86_BUILTIN_PSRAVV8HI,
29513 IX86_BUILTIN_PMADDUBSW256_MASK,
29514 IX86_BUILTIN_PMADDUBSW128_MASK,
29515 IX86_BUILTIN_PMADDWD256_MASK,
29516 IX86_BUILTIN_PMADDWD128_MASK,
29517 IX86_BUILTIN_PSRLVV16HI,
29518 IX86_BUILTIN_PSRLVV8HI,
29519 IX86_BUILTIN_CVTPS2DQ256_MASK,
29520 IX86_BUILTIN_CVTPS2DQ128_MASK,
29521 IX86_BUILTIN_CVTPS2UDQ256,
29522 IX86_BUILTIN_CVTPS2UDQ128,
29523 IX86_BUILTIN_CVTPS2QQ256,
29524 IX86_BUILTIN_CVTPS2QQ128,
29525 IX86_BUILTIN_CVTPS2UQQ256,
29526 IX86_BUILTIN_CVTPS2UQQ128,
29527 IX86_BUILTIN_GETMANTPS256,
29528 IX86_BUILTIN_GETMANTPS128,
29529 IX86_BUILTIN_GETMANTPD256,
29530 IX86_BUILTIN_GETMANTPD128,
29531 IX86_BUILTIN_MOVDDUP256_MASK,
29532 IX86_BUILTIN_MOVDDUP128_MASK,
29533 IX86_BUILTIN_MOVSHDUP256_MASK,
29534 IX86_BUILTIN_MOVSHDUP128_MASK,
29535 IX86_BUILTIN_MOVSLDUP256_MASK,
29536 IX86_BUILTIN_MOVSLDUP128_MASK,
29537 IX86_BUILTIN_CVTQQ2PS256,
29538 IX86_BUILTIN_CVTQQ2PS128,
29539 IX86_BUILTIN_CVTUQQ2PS256,
29540 IX86_BUILTIN_CVTUQQ2PS128,
29541 IX86_BUILTIN_CVTQQ2PD256,
29542 IX86_BUILTIN_CVTQQ2PD128,
29543 IX86_BUILTIN_CVTUQQ2PD256,
29544 IX86_BUILTIN_CVTUQQ2PD128,
29545 IX86_BUILTIN_VPERMT2VARQ256,
29546 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29547 IX86_BUILTIN_VPERMT2VARD256,
29548 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29549 IX86_BUILTIN_VPERMI2VARQ256,
29550 IX86_BUILTIN_VPERMI2VARD256,
29551 IX86_BUILTIN_VPERMT2VARPD256,
29552 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29553 IX86_BUILTIN_VPERMT2VARPS256,
29554 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29555 IX86_BUILTIN_VPERMI2VARPD256,
29556 IX86_BUILTIN_VPERMI2VARPS256,
29557 IX86_BUILTIN_VPERMT2VARQ128,
29558 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29559 IX86_BUILTIN_VPERMT2VARD128,
29560 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29561 IX86_BUILTIN_VPERMI2VARQ128,
29562 IX86_BUILTIN_VPERMI2VARD128,
29563 IX86_BUILTIN_VPERMT2VARPD128,
29564 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29565 IX86_BUILTIN_VPERMT2VARPS128,
29566 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29567 IX86_BUILTIN_VPERMI2VARPD128,
29568 IX86_BUILTIN_VPERMI2VARPS128,
29569 IX86_BUILTIN_PSHUFB256_MASK,
29570 IX86_BUILTIN_PSHUFB128_MASK,
29571 IX86_BUILTIN_PSHUFHW256_MASK,
29572 IX86_BUILTIN_PSHUFHW128_MASK,
29573 IX86_BUILTIN_PSHUFLW256_MASK,
29574 IX86_BUILTIN_PSHUFLW128_MASK,
29575 IX86_BUILTIN_PSHUFD256_MASK,
29576 IX86_BUILTIN_PSHUFD128_MASK,
29577 IX86_BUILTIN_SHUFPD256_MASK,
29578 IX86_BUILTIN_SHUFPD128_MASK,
29579 IX86_BUILTIN_SHUFPS256_MASK,
29580 IX86_BUILTIN_SHUFPS128_MASK,
29581 IX86_BUILTIN_PROLVQ256,
29582 IX86_BUILTIN_PROLVQ128,
29583 IX86_BUILTIN_PROLQ256,
29584 IX86_BUILTIN_PROLQ128,
29585 IX86_BUILTIN_PRORVQ256,
29586 IX86_BUILTIN_PRORVQ128,
29587 IX86_BUILTIN_PRORQ256,
29588 IX86_BUILTIN_PRORQ128,
29589 IX86_BUILTIN_PSRAVQ128,
29590 IX86_BUILTIN_PSRAVQ256,
29591 IX86_BUILTIN_PSLLVV4DI_MASK,
29592 IX86_BUILTIN_PSLLVV2DI_MASK,
29593 IX86_BUILTIN_PSLLVV8SI_MASK,
29594 IX86_BUILTIN_PSLLVV4SI_MASK,
29595 IX86_BUILTIN_PSRAVV8SI_MASK,
29596 IX86_BUILTIN_PSRAVV4SI_MASK,
29597 IX86_BUILTIN_PSRLVV4DI_MASK,
29598 IX86_BUILTIN_PSRLVV2DI_MASK,
29599 IX86_BUILTIN_PSRLVV8SI_MASK,
29600 IX86_BUILTIN_PSRLVV4SI_MASK,
29601 IX86_BUILTIN_PSRAWI256_MASK,
29602 IX86_BUILTIN_PSRAW256_MASK,
29603 IX86_BUILTIN_PSRAWI128_MASK,
29604 IX86_BUILTIN_PSRAW128_MASK,
29605 IX86_BUILTIN_PSRLWI256_MASK,
29606 IX86_BUILTIN_PSRLW256_MASK,
29607 IX86_BUILTIN_PSRLWI128_MASK,
29608 IX86_BUILTIN_PSRLW128_MASK,
29609 IX86_BUILTIN_PRORVD256,
29610 IX86_BUILTIN_PROLVD256,
29611 IX86_BUILTIN_PRORD256,
29612 IX86_BUILTIN_PROLD256,
29613 IX86_BUILTIN_PRORVD128,
29614 IX86_BUILTIN_PROLVD128,
29615 IX86_BUILTIN_PRORD128,
29616 IX86_BUILTIN_PROLD128,
29617 IX86_BUILTIN_FPCLASSPD256,
29618 IX86_BUILTIN_FPCLASSPD128,
29619 IX86_BUILTIN_FPCLASSSD,
29620 IX86_BUILTIN_FPCLASSPS256,
29621 IX86_BUILTIN_FPCLASSPS128,
29622 IX86_BUILTIN_FPCLASSSS,
29623 IX86_BUILTIN_CVTB2MASK128,
29624 IX86_BUILTIN_CVTB2MASK256,
29625 IX86_BUILTIN_CVTW2MASK128,
29626 IX86_BUILTIN_CVTW2MASK256,
29627 IX86_BUILTIN_CVTD2MASK128,
29628 IX86_BUILTIN_CVTD2MASK256,
29629 IX86_BUILTIN_CVTQ2MASK128,
29630 IX86_BUILTIN_CVTQ2MASK256,
29631 IX86_BUILTIN_CVTMASK2B128,
29632 IX86_BUILTIN_CVTMASK2B256,
29633 IX86_BUILTIN_CVTMASK2W128,
29634 IX86_BUILTIN_CVTMASK2W256,
29635 IX86_BUILTIN_CVTMASK2D128,
29636 IX86_BUILTIN_CVTMASK2D256,
29637 IX86_BUILTIN_CVTMASK2Q128,
29638 IX86_BUILTIN_CVTMASK2Q256,
29639 IX86_BUILTIN_PCMPEQB128_MASK,
29640 IX86_BUILTIN_PCMPEQB256_MASK,
29641 IX86_BUILTIN_PCMPEQW128_MASK,
29642 IX86_BUILTIN_PCMPEQW256_MASK,
29643 IX86_BUILTIN_PCMPEQD128_MASK,
29644 IX86_BUILTIN_PCMPEQD256_MASK,
29645 IX86_BUILTIN_PCMPEQQ128_MASK,
29646 IX86_BUILTIN_PCMPEQQ256_MASK,
29647 IX86_BUILTIN_PCMPGTB128_MASK,
29648 IX86_BUILTIN_PCMPGTB256_MASK,
29649 IX86_BUILTIN_PCMPGTW128_MASK,
29650 IX86_BUILTIN_PCMPGTW256_MASK,
29651 IX86_BUILTIN_PCMPGTD128_MASK,
29652 IX86_BUILTIN_PCMPGTD256_MASK,
29653 IX86_BUILTIN_PCMPGTQ128_MASK,
29654 IX86_BUILTIN_PCMPGTQ256_MASK,
29655 IX86_BUILTIN_PTESTMB128,
29656 IX86_BUILTIN_PTESTMB256,
29657 IX86_BUILTIN_PTESTMW128,
29658 IX86_BUILTIN_PTESTMW256,
29659 IX86_BUILTIN_PTESTMD128,
29660 IX86_BUILTIN_PTESTMD256,
29661 IX86_BUILTIN_PTESTMQ128,
29662 IX86_BUILTIN_PTESTMQ256,
29663 IX86_BUILTIN_PTESTNMB128,
29664 IX86_BUILTIN_PTESTNMB256,
29665 IX86_BUILTIN_PTESTNMW128,
29666 IX86_BUILTIN_PTESTNMW256,
29667 IX86_BUILTIN_PTESTNMD128,
29668 IX86_BUILTIN_PTESTNMD256,
29669 IX86_BUILTIN_PTESTNMQ128,
29670 IX86_BUILTIN_PTESTNMQ256,
29671 IX86_BUILTIN_PBROADCASTMB128,
29672 IX86_BUILTIN_PBROADCASTMB256,
29673 IX86_BUILTIN_PBROADCASTMW128,
29674 IX86_BUILTIN_PBROADCASTMW256,
29675 IX86_BUILTIN_COMPRESSPD256,
29676 IX86_BUILTIN_COMPRESSPD128,
29677 IX86_BUILTIN_COMPRESSPS256,
29678 IX86_BUILTIN_COMPRESSPS128,
29679 IX86_BUILTIN_PCOMPRESSQ256,
29680 IX86_BUILTIN_PCOMPRESSQ128,
29681 IX86_BUILTIN_PCOMPRESSD256,
29682 IX86_BUILTIN_PCOMPRESSD128,
29683 IX86_BUILTIN_EXPANDPD256,
29684 IX86_BUILTIN_EXPANDPD128,
29685 IX86_BUILTIN_EXPANDPS256,
29686 IX86_BUILTIN_EXPANDPS128,
29687 IX86_BUILTIN_PEXPANDQ256,
29688 IX86_BUILTIN_PEXPANDQ128,
29689 IX86_BUILTIN_PEXPANDD256,
29690 IX86_BUILTIN_PEXPANDD128,
29691 IX86_BUILTIN_EXPANDPD256Z,
29692 IX86_BUILTIN_EXPANDPD128Z,
29693 IX86_BUILTIN_EXPANDPS256Z,
29694 IX86_BUILTIN_EXPANDPS128Z,
29695 IX86_BUILTIN_PEXPANDQ256Z,
29696 IX86_BUILTIN_PEXPANDQ128Z,
29697 IX86_BUILTIN_PEXPANDD256Z,
29698 IX86_BUILTIN_PEXPANDD128Z,
29699 IX86_BUILTIN_PMAXSD256_MASK,
29700 IX86_BUILTIN_PMINSD256_MASK,
29701 IX86_BUILTIN_PMAXUD256_MASK,
29702 IX86_BUILTIN_PMINUD256_MASK,
29703 IX86_BUILTIN_PMAXSD128_MASK,
29704 IX86_BUILTIN_PMINSD128_MASK,
29705 IX86_BUILTIN_PMAXUD128_MASK,
29706 IX86_BUILTIN_PMINUD128_MASK,
29707 IX86_BUILTIN_PMAXSQ256_MASK,
29708 IX86_BUILTIN_PMINSQ256_MASK,
29709 IX86_BUILTIN_PMAXUQ256_MASK,
29710 IX86_BUILTIN_PMINUQ256_MASK,
29711 IX86_BUILTIN_PMAXSQ128_MASK,
29712 IX86_BUILTIN_PMINSQ128_MASK,
29713 IX86_BUILTIN_PMAXUQ128_MASK,
29714 IX86_BUILTIN_PMINUQ128_MASK,
29715 IX86_BUILTIN_PMINSB256_MASK,
29716 IX86_BUILTIN_PMINUB256_MASK,
29717 IX86_BUILTIN_PMAXSB256_MASK,
29718 IX86_BUILTIN_PMAXUB256_MASK,
29719 IX86_BUILTIN_PMINSB128_MASK,
29720 IX86_BUILTIN_PMINUB128_MASK,
29721 IX86_BUILTIN_PMAXSB128_MASK,
29722 IX86_BUILTIN_PMAXUB128_MASK,
29723 IX86_BUILTIN_PMINSW256_MASK,
29724 IX86_BUILTIN_PMINUW256_MASK,
29725 IX86_BUILTIN_PMAXSW256_MASK,
29726 IX86_BUILTIN_PMAXUW256_MASK,
29727 IX86_BUILTIN_PMINSW128_MASK,
29728 IX86_BUILTIN_PMINUW128_MASK,
29729 IX86_BUILTIN_PMAXSW128_MASK,
29730 IX86_BUILTIN_PMAXUW128_MASK,
29731 IX86_BUILTIN_VPCONFLICTQ256,
29732 IX86_BUILTIN_VPCONFLICTD256,
29733 IX86_BUILTIN_VPCLZCNTQ256,
29734 IX86_BUILTIN_VPCLZCNTD256,
29735 IX86_BUILTIN_UNPCKHPD256_MASK,
29736 IX86_BUILTIN_UNPCKHPD128_MASK,
29737 IX86_BUILTIN_UNPCKHPS256_MASK,
29738 IX86_BUILTIN_UNPCKHPS128_MASK,
29739 IX86_BUILTIN_UNPCKLPD256_MASK,
29740 IX86_BUILTIN_UNPCKLPD128_MASK,
29741 IX86_BUILTIN_UNPCKLPS256_MASK,
29742 IX86_BUILTIN_VPCONFLICTQ128,
29743 IX86_BUILTIN_VPCONFLICTD128,
29744 IX86_BUILTIN_VPCLZCNTQ128,
29745 IX86_BUILTIN_VPCLZCNTD128,
29746 IX86_BUILTIN_UNPCKLPS128_MASK,
29747 IX86_BUILTIN_ALIGND256,
29748 IX86_BUILTIN_ALIGNQ256,
29749 IX86_BUILTIN_ALIGND128,
29750 IX86_BUILTIN_ALIGNQ128,
29751 IX86_BUILTIN_CVTPS2PH256_MASK,
29752 IX86_BUILTIN_CVTPS2PH_MASK,
29753 IX86_BUILTIN_CVTPH2PS_MASK,
29754 IX86_BUILTIN_CVTPH2PS256_MASK,
29755 IX86_BUILTIN_PUNPCKHDQ128_MASK,
29756 IX86_BUILTIN_PUNPCKHDQ256_MASK,
29757 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
29758 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
29759 IX86_BUILTIN_PUNPCKLDQ128_MASK,
29760 IX86_BUILTIN_PUNPCKLDQ256_MASK,
29761 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
29762 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
29763 IX86_BUILTIN_PUNPCKHBW128_MASK,
29764 IX86_BUILTIN_PUNPCKHBW256_MASK,
29765 IX86_BUILTIN_PUNPCKHWD128_MASK,
29766 IX86_BUILTIN_PUNPCKHWD256_MASK,
29767 IX86_BUILTIN_PUNPCKLBW128_MASK,
29768 IX86_BUILTIN_PUNPCKLBW256_MASK,
29769 IX86_BUILTIN_PUNPCKLWD128_MASK,
29770 IX86_BUILTIN_PUNPCKLWD256_MASK,
29771 IX86_BUILTIN_PSLLVV16HI,
29772 IX86_BUILTIN_PSLLVV8HI,
29773 IX86_BUILTIN_PACKSSDW256_MASK,
29774 IX86_BUILTIN_PACKSSDW128_MASK,
29775 IX86_BUILTIN_PACKUSDW256_MASK,
29776 IX86_BUILTIN_PACKUSDW128_MASK,
29777 IX86_BUILTIN_PAVGB256_MASK,
29778 IX86_BUILTIN_PAVGW256_MASK,
29779 IX86_BUILTIN_PAVGB128_MASK,
29780 IX86_BUILTIN_PAVGW128_MASK,
29781 IX86_BUILTIN_VPERMVARSF256_MASK,
29782 IX86_BUILTIN_VPERMVARDF256_MASK,
29783 IX86_BUILTIN_VPERMDF256_MASK,
29784 IX86_BUILTIN_PABSB256_MASK,
29785 IX86_BUILTIN_PABSB128_MASK,
29786 IX86_BUILTIN_PABSW256_MASK,
29787 IX86_BUILTIN_PABSW128_MASK,
29788 IX86_BUILTIN_VPERMILVARPD_MASK,
29789 IX86_BUILTIN_VPERMILVARPS_MASK,
29790 IX86_BUILTIN_VPERMILVARPD256_MASK,
29791 IX86_BUILTIN_VPERMILVARPS256_MASK,
29792 IX86_BUILTIN_VPERMILPD_MASK,
29793 IX86_BUILTIN_VPERMILPS_MASK,
29794 IX86_BUILTIN_VPERMILPD256_MASK,
29795 IX86_BUILTIN_VPERMILPS256_MASK,
29796 IX86_BUILTIN_BLENDMQ256,
29797 IX86_BUILTIN_BLENDMD256,
29798 IX86_BUILTIN_BLENDMPD256,
29799 IX86_BUILTIN_BLENDMPS256,
29800 IX86_BUILTIN_BLENDMQ128,
29801 IX86_BUILTIN_BLENDMD128,
29802 IX86_BUILTIN_BLENDMPD128,
29803 IX86_BUILTIN_BLENDMPS128,
29804 IX86_BUILTIN_BLENDMW256,
29805 IX86_BUILTIN_BLENDMB256,
29806 IX86_BUILTIN_BLENDMW128,
29807 IX86_BUILTIN_BLENDMB128,
29808 IX86_BUILTIN_PMULLD256_MASK,
29809 IX86_BUILTIN_PMULLD128_MASK,
29810 IX86_BUILTIN_PMULUDQ256_MASK,
29811 IX86_BUILTIN_PMULDQ256_MASK,
29812 IX86_BUILTIN_PMULDQ128_MASK,
29813 IX86_BUILTIN_PMULUDQ128_MASK,
29814 IX86_BUILTIN_CVTPD2PS256_MASK,
29815 IX86_BUILTIN_CVTPD2PS_MASK,
29816 IX86_BUILTIN_VPERMVARSI256_MASK,
29817 IX86_BUILTIN_VPERMVARDI256_MASK,
29818 IX86_BUILTIN_VPERMDI256_MASK,
29819 IX86_BUILTIN_CMPQ256,
29820 IX86_BUILTIN_CMPD256,
29821 IX86_BUILTIN_UCMPQ256,
29822 IX86_BUILTIN_UCMPD256,
29823 IX86_BUILTIN_CMPB256,
29824 IX86_BUILTIN_CMPW256,
29825 IX86_BUILTIN_UCMPB256,
29826 IX86_BUILTIN_UCMPW256,
29827 IX86_BUILTIN_CMPPD256_MASK,
29828 IX86_BUILTIN_CMPPS256_MASK,
29829 IX86_BUILTIN_CMPQ128,
29830 IX86_BUILTIN_CMPD128,
29831 IX86_BUILTIN_UCMPQ128,
29832 IX86_BUILTIN_UCMPD128,
29833 IX86_BUILTIN_CMPB128,
29834 IX86_BUILTIN_CMPW128,
29835 IX86_BUILTIN_UCMPB128,
29836 IX86_BUILTIN_UCMPW128,
29837 IX86_BUILTIN_CMPPD128_MASK,
29838 IX86_BUILTIN_CMPPS128_MASK,
29840 IX86_BUILTIN_GATHER3SIV8SF,
29841 IX86_BUILTIN_GATHER3SIV4SF,
29842 IX86_BUILTIN_GATHER3SIV4DF,
29843 IX86_BUILTIN_GATHER3SIV2DF,
29844 IX86_BUILTIN_GATHER3DIV8SF,
29845 IX86_BUILTIN_GATHER3DIV4SF,
29846 IX86_BUILTIN_GATHER3DIV4DF,
29847 IX86_BUILTIN_GATHER3DIV2DF,
29848 IX86_BUILTIN_GATHER3SIV8SI,
29849 IX86_BUILTIN_GATHER3SIV4SI,
29850 IX86_BUILTIN_GATHER3SIV4DI,
29851 IX86_BUILTIN_GATHER3SIV2DI,
29852 IX86_BUILTIN_GATHER3DIV8SI,
29853 IX86_BUILTIN_GATHER3DIV4SI,
29854 IX86_BUILTIN_GATHER3DIV4DI,
29855 IX86_BUILTIN_GATHER3DIV2DI,
29856 IX86_BUILTIN_SCATTERSIV8SF,
29857 IX86_BUILTIN_SCATTERSIV4SF,
29858 IX86_BUILTIN_SCATTERSIV4DF,
29859 IX86_BUILTIN_SCATTERSIV2DF,
29860 IX86_BUILTIN_SCATTERDIV8SF,
29861 IX86_BUILTIN_SCATTERDIV4SF,
29862 IX86_BUILTIN_SCATTERDIV4DF,
29863 IX86_BUILTIN_SCATTERDIV2DF,
29864 IX86_BUILTIN_SCATTERSIV8SI,
29865 IX86_BUILTIN_SCATTERSIV4SI,
29866 IX86_BUILTIN_SCATTERSIV4DI,
29867 IX86_BUILTIN_SCATTERSIV2DI,
29868 IX86_BUILTIN_SCATTERDIV8SI,
29869 IX86_BUILTIN_SCATTERDIV4SI,
29870 IX86_BUILTIN_SCATTERDIV4DI,
29871 IX86_BUILTIN_SCATTERDIV2DI,
29873 /* AVX512DQ. */
29874 IX86_BUILTIN_RANGESD128,
29875 IX86_BUILTIN_RANGESS128,
29876 IX86_BUILTIN_KUNPCKWD,
29877 IX86_BUILTIN_KUNPCKDQ,
29878 IX86_BUILTIN_BROADCASTF32x2_512,
29879 IX86_BUILTIN_BROADCASTI32x2_512,
29880 IX86_BUILTIN_BROADCASTF64X2_512,
29881 IX86_BUILTIN_BROADCASTI64X2_512,
29882 IX86_BUILTIN_BROADCASTF32X8_512,
29883 IX86_BUILTIN_BROADCASTI32X8_512,
29884 IX86_BUILTIN_EXTRACTF64X2_512,
29885 IX86_BUILTIN_EXTRACTF32X8,
29886 IX86_BUILTIN_EXTRACTI64X2_512,
29887 IX86_BUILTIN_EXTRACTI32X8,
29888 IX86_BUILTIN_REDUCEPD512_MASK,
29889 IX86_BUILTIN_REDUCEPS512_MASK,
29890 IX86_BUILTIN_PMULLQ512,
29891 IX86_BUILTIN_XORPD512,
29892 IX86_BUILTIN_XORPS512,
29893 IX86_BUILTIN_ORPD512,
29894 IX86_BUILTIN_ORPS512,
29895 IX86_BUILTIN_ANDPD512,
29896 IX86_BUILTIN_ANDPS512,
29897 IX86_BUILTIN_ANDNPD512,
29898 IX86_BUILTIN_ANDNPS512,
29899 IX86_BUILTIN_INSERTF32X8,
29900 IX86_BUILTIN_INSERTI32X8,
29901 IX86_BUILTIN_INSERTF64X2_512,
29902 IX86_BUILTIN_INSERTI64X2_512,
29903 IX86_BUILTIN_FPCLASSPD512,
29904 IX86_BUILTIN_FPCLASSPS512,
29905 IX86_BUILTIN_CVTD2MASK512,
29906 IX86_BUILTIN_CVTQ2MASK512,
29907 IX86_BUILTIN_CVTMASK2D512,
29908 IX86_BUILTIN_CVTMASK2Q512,
29909 IX86_BUILTIN_CVTPD2QQ512,
29910 IX86_BUILTIN_CVTPS2QQ512,
29911 IX86_BUILTIN_CVTPD2UQQ512,
29912 IX86_BUILTIN_CVTPS2UQQ512,
29913 IX86_BUILTIN_CVTQQ2PS512,
29914 IX86_BUILTIN_CVTUQQ2PS512,
29915 IX86_BUILTIN_CVTQQ2PD512,
29916 IX86_BUILTIN_CVTUQQ2PD512,
29917 IX86_BUILTIN_CVTTPS2QQ512,
29918 IX86_BUILTIN_CVTTPS2UQQ512,
29919 IX86_BUILTIN_CVTTPD2QQ512,
29920 IX86_BUILTIN_CVTTPD2UQQ512,
29921 IX86_BUILTIN_RANGEPS512,
29922 IX86_BUILTIN_RANGEPD512,
29924 /* AVX512BW. */
29925 IX86_BUILTIN_PACKUSDW512,
29926 IX86_BUILTIN_PACKSSDW512,
29927 IX86_BUILTIN_LOADDQUHI512_MASK,
29928 IX86_BUILTIN_LOADDQUQI512_MASK,
29929 IX86_BUILTIN_PSLLDQ512,
29930 IX86_BUILTIN_PSRLDQ512,
29931 IX86_BUILTIN_STOREDQUHI512_MASK,
29932 IX86_BUILTIN_STOREDQUQI512_MASK,
29933 IX86_BUILTIN_PALIGNR512,
29934 IX86_BUILTIN_PALIGNR512_MASK,
29935 IX86_BUILTIN_MOVDQUHI512_MASK,
29936 IX86_BUILTIN_MOVDQUQI512_MASK,
29937 IX86_BUILTIN_PSADBW512,
29938 IX86_BUILTIN_DBPSADBW512,
29939 IX86_BUILTIN_PBROADCASTB512,
29940 IX86_BUILTIN_PBROADCASTB512_GPR,
29941 IX86_BUILTIN_PBROADCASTW512,
29942 IX86_BUILTIN_PBROADCASTW512_GPR,
29943 IX86_BUILTIN_PMOVSXBW512_MASK,
29944 IX86_BUILTIN_PMOVZXBW512_MASK,
29945 IX86_BUILTIN_VPERMVARHI512_MASK,
29946 IX86_BUILTIN_VPERMT2VARHI512,
29947 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
29948 IX86_BUILTIN_VPERMI2VARHI512,
29949 IX86_BUILTIN_PAVGB512,
29950 IX86_BUILTIN_PAVGW512,
29951 IX86_BUILTIN_PADDB512,
29952 IX86_BUILTIN_PSUBB512,
29953 IX86_BUILTIN_PSUBSB512,
29954 IX86_BUILTIN_PADDSB512,
29955 IX86_BUILTIN_PSUBUSB512,
29956 IX86_BUILTIN_PADDUSB512,
29957 IX86_BUILTIN_PSUBW512,
29958 IX86_BUILTIN_PADDW512,
29959 IX86_BUILTIN_PSUBSW512,
29960 IX86_BUILTIN_PADDSW512,
29961 IX86_BUILTIN_PSUBUSW512,
29962 IX86_BUILTIN_PADDUSW512,
29963 IX86_BUILTIN_PMAXUW512,
29964 IX86_BUILTIN_PMAXSW512,
29965 IX86_BUILTIN_PMINUW512,
29966 IX86_BUILTIN_PMINSW512,
29967 IX86_BUILTIN_PMAXUB512,
29968 IX86_BUILTIN_PMAXSB512,
29969 IX86_BUILTIN_PMINUB512,
29970 IX86_BUILTIN_PMINSB512,
29971 IX86_BUILTIN_PMOVWB512,
29972 IX86_BUILTIN_PMOVSWB512,
29973 IX86_BUILTIN_PMOVUSWB512,
29974 IX86_BUILTIN_PMULHRSW512_MASK,
29975 IX86_BUILTIN_PMULHUW512_MASK,
29976 IX86_BUILTIN_PMULHW512_MASK,
29977 IX86_BUILTIN_PMULLW512_MASK,
29978 IX86_BUILTIN_PSLLWI512_MASK,
29979 IX86_BUILTIN_PSLLW512_MASK,
29980 IX86_BUILTIN_PACKSSWB512,
29981 IX86_BUILTIN_PACKUSWB512,
29982 IX86_BUILTIN_PSRAVV32HI,
29983 IX86_BUILTIN_PMADDUBSW512_MASK,
29984 IX86_BUILTIN_PMADDWD512_MASK,
29985 IX86_BUILTIN_PSRLVV32HI,
29986 IX86_BUILTIN_PUNPCKHBW512,
29987 IX86_BUILTIN_PUNPCKHWD512,
29988 IX86_BUILTIN_PUNPCKLBW512,
29989 IX86_BUILTIN_PUNPCKLWD512,
29990 IX86_BUILTIN_PSHUFB512,
29991 IX86_BUILTIN_PSHUFHW512,
29992 IX86_BUILTIN_PSHUFLW512,
29993 IX86_BUILTIN_PSRAWI512,
29994 IX86_BUILTIN_PSRAW512,
29995 IX86_BUILTIN_PSRLWI512,
29996 IX86_BUILTIN_PSRLW512,
29997 IX86_BUILTIN_CVTB2MASK512,
29998 IX86_BUILTIN_CVTW2MASK512,
29999 IX86_BUILTIN_CVTMASK2B512,
30000 IX86_BUILTIN_CVTMASK2W512,
30001 IX86_BUILTIN_PCMPEQB512_MASK,
30002 IX86_BUILTIN_PCMPEQW512_MASK,
30003 IX86_BUILTIN_PCMPGTB512_MASK,
30004 IX86_BUILTIN_PCMPGTW512_MASK,
30005 IX86_BUILTIN_PTESTMB512,
30006 IX86_BUILTIN_PTESTMW512,
30007 IX86_BUILTIN_PTESTNMB512,
30008 IX86_BUILTIN_PTESTNMW512,
30009 IX86_BUILTIN_PSLLVV32HI,
30010 IX86_BUILTIN_PABSB512,
30011 IX86_BUILTIN_PABSW512,
30012 IX86_BUILTIN_BLENDMW512,
30013 IX86_BUILTIN_BLENDMB512,
30014 IX86_BUILTIN_CMPB512,
30015 IX86_BUILTIN_CMPW512,
30016 IX86_BUILTIN_UCMPB512,
30017 IX86_BUILTIN_UCMPW512,
30019 /* Alternate 4 and 8 element gather/scatter for the vectorizer
30020 where all operands are 32-byte or 64-byte wide respectively. */
30021 IX86_BUILTIN_GATHERALTSIV4DF,
30022 IX86_BUILTIN_GATHERALTDIV8SF,
30023 IX86_BUILTIN_GATHERALTSIV4DI,
30024 IX86_BUILTIN_GATHERALTDIV8SI,
30025 IX86_BUILTIN_GATHER3ALTDIV16SF,
30026 IX86_BUILTIN_GATHER3ALTDIV16SI,
30027 IX86_BUILTIN_GATHER3ALTSIV4DF,
30028 IX86_BUILTIN_GATHER3ALTDIV8SF,
30029 IX86_BUILTIN_GATHER3ALTSIV4DI,
30030 IX86_BUILTIN_GATHER3ALTDIV8SI,
30031 IX86_BUILTIN_GATHER3ALTSIV8DF,
30032 IX86_BUILTIN_GATHER3ALTSIV8DI,
30033 IX86_BUILTIN_GATHER3DIV16SF,
30034 IX86_BUILTIN_GATHER3DIV16SI,
30035 IX86_BUILTIN_GATHER3DIV8DF,
30036 IX86_BUILTIN_GATHER3DIV8DI,
30037 IX86_BUILTIN_GATHER3SIV16SF,
30038 IX86_BUILTIN_GATHER3SIV16SI,
30039 IX86_BUILTIN_GATHER3SIV8DF,
30040 IX86_BUILTIN_GATHER3SIV8DI,
30041 IX86_BUILTIN_SCATTERDIV16SF,
30042 IX86_BUILTIN_SCATTERDIV16SI,
30043 IX86_BUILTIN_SCATTERDIV8DF,
30044 IX86_BUILTIN_SCATTERDIV8DI,
30045 IX86_BUILTIN_SCATTERSIV16SF,
30046 IX86_BUILTIN_SCATTERSIV16SI,
30047 IX86_BUILTIN_SCATTERSIV8DF,
30048 IX86_BUILTIN_SCATTERSIV8DI,
30050 /* AVX512PF */
30051 IX86_BUILTIN_GATHERPFQPD,
30052 IX86_BUILTIN_GATHERPFDPS,
30053 IX86_BUILTIN_GATHERPFDPD,
30054 IX86_BUILTIN_GATHERPFQPS,
30055 IX86_BUILTIN_SCATTERPFDPD,
30056 IX86_BUILTIN_SCATTERPFDPS,
30057 IX86_BUILTIN_SCATTERPFQPD,
30058 IX86_BUILTIN_SCATTERPFQPS,
30060 /* AVX-512ER */
30061 IX86_BUILTIN_EXP2PD_MASK,
30062 IX86_BUILTIN_EXP2PS_MASK,
30063 IX86_BUILTIN_EXP2PS,
30064 IX86_BUILTIN_RCP28PD,
30065 IX86_BUILTIN_RCP28PS,
30066 IX86_BUILTIN_RCP28SD,
30067 IX86_BUILTIN_RCP28SS,
30068 IX86_BUILTIN_RSQRT28PD,
30069 IX86_BUILTIN_RSQRT28PS,
30070 IX86_BUILTIN_RSQRT28SD,
30071 IX86_BUILTIN_RSQRT28SS,
30073 /* SHA builtins. */
30074 IX86_BUILTIN_SHA1MSG1,
30075 IX86_BUILTIN_SHA1MSG2,
30076 IX86_BUILTIN_SHA1NEXTE,
30077 IX86_BUILTIN_SHA1RNDS4,
30078 IX86_BUILTIN_SHA256MSG1,
30079 IX86_BUILTIN_SHA256MSG2,
30080 IX86_BUILTIN_SHA256RNDS2,
30082 /* CLFLUSHOPT instructions. */
30083 IX86_BUILTIN_CLFLUSHOPT,
30085 /* TFmode support builtins. */
30086 IX86_BUILTIN_INFQ,
30087 IX86_BUILTIN_HUGE_VALQ,
30088 IX86_BUILTIN_FABSQ,
30089 IX86_BUILTIN_COPYSIGNQ,
30091 /* Vectorizer support builtins. */
30092 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
30093 IX86_BUILTIN_CPYSGNPS,
30094 IX86_BUILTIN_CPYSGNPD,
30095 IX86_BUILTIN_CPYSGNPS256,
30096 IX86_BUILTIN_CPYSGNPS512,
30097 IX86_BUILTIN_CPYSGNPD256,
30098 IX86_BUILTIN_CPYSGNPD512,
30099 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
30100 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
30103 /* FMA4 instructions. */
30104 IX86_BUILTIN_VFMADDSS,
30105 IX86_BUILTIN_VFMADDSD,
30106 IX86_BUILTIN_VFMADDPS,
30107 IX86_BUILTIN_VFMADDPD,
30108 IX86_BUILTIN_VFMADDPS256,
30109 IX86_BUILTIN_VFMADDPD256,
30110 IX86_BUILTIN_VFMADDSUBPS,
30111 IX86_BUILTIN_VFMADDSUBPD,
30112 IX86_BUILTIN_VFMADDSUBPS256,
30113 IX86_BUILTIN_VFMADDSUBPD256,
30115 /* FMA3 instructions. */
30116 IX86_BUILTIN_VFMADDSS3,
30117 IX86_BUILTIN_VFMADDSD3,
30119 /* XOP instructions. */
30120 IX86_BUILTIN_VPCMOV,
30121 IX86_BUILTIN_VPCMOV_V2DI,
30122 IX86_BUILTIN_VPCMOV_V4SI,
30123 IX86_BUILTIN_VPCMOV_V8HI,
30124 IX86_BUILTIN_VPCMOV_V16QI,
30125 IX86_BUILTIN_VPCMOV_V4SF,
30126 IX86_BUILTIN_VPCMOV_V2DF,
30127 IX86_BUILTIN_VPCMOV256,
30128 IX86_BUILTIN_VPCMOV_V4DI256,
30129 IX86_BUILTIN_VPCMOV_V8SI256,
30130 IX86_BUILTIN_VPCMOV_V16HI256,
30131 IX86_BUILTIN_VPCMOV_V32QI256,
30132 IX86_BUILTIN_VPCMOV_V8SF256,
30133 IX86_BUILTIN_VPCMOV_V4DF256,
30135 IX86_BUILTIN_VPPERM,
30137 IX86_BUILTIN_VPMACSSWW,
30138 IX86_BUILTIN_VPMACSWW,
30139 IX86_BUILTIN_VPMACSSWD,
30140 IX86_BUILTIN_VPMACSWD,
30141 IX86_BUILTIN_VPMACSSDD,
30142 IX86_BUILTIN_VPMACSDD,
30143 IX86_BUILTIN_VPMACSSDQL,
30144 IX86_BUILTIN_VPMACSSDQH,
30145 IX86_BUILTIN_VPMACSDQL,
30146 IX86_BUILTIN_VPMACSDQH,
30147 IX86_BUILTIN_VPMADCSSWD,
30148 IX86_BUILTIN_VPMADCSWD,
30150 IX86_BUILTIN_VPHADDBW,
30151 IX86_BUILTIN_VPHADDBD,
30152 IX86_BUILTIN_VPHADDBQ,
30153 IX86_BUILTIN_VPHADDWD,
30154 IX86_BUILTIN_VPHADDWQ,
30155 IX86_BUILTIN_VPHADDDQ,
30156 IX86_BUILTIN_VPHADDUBW,
30157 IX86_BUILTIN_VPHADDUBD,
30158 IX86_BUILTIN_VPHADDUBQ,
30159 IX86_BUILTIN_VPHADDUWD,
30160 IX86_BUILTIN_VPHADDUWQ,
30161 IX86_BUILTIN_VPHADDUDQ,
30162 IX86_BUILTIN_VPHSUBBW,
30163 IX86_BUILTIN_VPHSUBWD,
30164 IX86_BUILTIN_VPHSUBDQ,
30166 IX86_BUILTIN_VPROTB,
30167 IX86_BUILTIN_VPROTW,
30168 IX86_BUILTIN_VPROTD,
30169 IX86_BUILTIN_VPROTQ,
30170 IX86_BUILTIN_VPROTB_IMM,
30171 IX86_BUILTIN_VPROTW_IMM,
30172 IX86_BUILTIN_VPROTD_IMM,
30173 IX86_BUILTIN_VPROTQ_IMM,
30175 IX86_BUILTIN_VPSHLB,
30176 IX86_BUILTIN_VPSHLW,
30177 IX86_BUILTIN_VPSHLD,
30178 IX86_BUILTIN_VPSHLQ,
30179 IX86_BUILTIN_VPSHAB,
30180 IX86_BUILTIN_VPSHAW,
30181 IX86_BUILTIN_VPSHAD,
30182 IX86_BUILTIN_VPSHAQ,
30184 IX86_BUILTIN_VFRCZSS,
30185 IX86_BUILTIN_VFRCZSD,
30186 IX86_BUILTIN_VFRCZPS,
30187 IX86_BUILTIN_VFRCZPD,
30188 IX86_BUILTIN_VFRCZPS256,
30189 IX86_BUILTIN_VFRCZPD256,
30191 IX86_BUILTIN_VPCOMEQUB,
30192 IX86_BUILTIN_VPCOMNEUB,
30193 IX86_BUILTIN_VPCOMLTUB,
30194 IX86_BUILTIN_VPCOMLEUB,
30195 IX86_BUILTIN_VPCOMGTUB,
30196 IX86_BUILTIN_VPCOMGEUB,
30197 IX86_BUILTIN_VPCOMFALSEUB,
30198 IX86_BUILTIN_VPCOMTRUEUB,
30200 IX86_BUILTIN_VPCOMEQUW,
30201 IX86_BUILTIN_VPCOMNEUW,
30202 IX86_BUILTIN_VPCOMLTUW,
30203 IX86_BUILTIN_VPCOMLEUW,
30204 IX86_BUILTIN_VPCOMGTUW,
30205 IX86_BUILTIN_VPCOMGEUW,
30206 IX86_BUILTIN_VPCOMFALSEUW,
30207 IX86_BUILTIN_VPCOMTRUEUW,
30209 IX86_BUILTIN_VPCOMEQUD,
30210 IX86_BUILTIN_VPCOMNEUD,
30211 IX86_BUILTIN_VPCOMLTUD,
30212 IX86_BUILTIN_VPCOMLEUD,
30213 IX86_BUILTIN_VPCOMGTUD,
30214 IX86_BUILTIN_VPCOMGEUD,
30215 IX86_BUILTIN_VPCOMFALSEUD,
30216 IX86_BUILTIN_VPCOMTRUEUD,
30218 IX86_BUILTIN_VPCOMEQUQ,
30219 IX86_BUILTIN_VPCOMNEUQ,
30220 IX86_BUILTIN_VPCOMLTUQ,
30221 IX86_BUILTIN_VPCOMLEUQ,
30222 IX86_BUILTIN_VPCOMGTUQ,
30223 IX86_BUILTIN_VPCOMGEUQ,
30224 IX86_BUILTIN_VPCOMFALSEUQ,
30225 IX86_BUILTIN_VPCOMTRUEUQ,
30227 IX86_BUILTIN_VPCOMEQB,
30228 IX86_BUILTIN_VPCOMNEB,
30229 IX86_BUILTIN_VPCOMLTB,
30230 IX86_BUILTIN_VPCOMLEB,
30231 IX86_BUILTIN_VPCOMGTB,
30232 IX86_BUILTIN_VPCOMGEB,
30233 IX86_BUILTIN_VPCOMFALSEB,
30234 IX86_BUILTIN_VPCOMTRUEB,
30236 IX86_BUILTIN_VPCOMEQW,
30237 IX86_BUILTIN_VPCOMNEW,
30238 IX86_BUILTIN_VPCOMLTW,
30239 IX86_BUILTIN_VPCOMLEW,
30240 IX86_BUILTIN_VPCOMGTW,
30241 IX86_BUILTIN_VPCOMGEW,
30242 IX86_BUILTIN_VPCOMFALSEW,
30243 IX86_BUILTIN_VPCOMTRUEW,
30245 IX86_BUILTIN_VPCOMEQD,
30246 IX86_BUILTIN_VPCOMNED,
30247 IX86_BUILTIN_VPCOMLTD,
30248 IX86_BUILTIN_VPCOMLED,
30249 IX86_BUILTIN_VPCOMGTD,
30250 IX86_BUILTIN_VPCOMGED,
30251 IX86_BUILTIN_VPCOMFALSED,
30252 IX86_BUILTIN_VPCOMTRUED,
30254 IX86_BUILTIN_VPCOMEQQ,
30255 IX86_BUILTIN_VPCOMNEQ,
30256 IX86_BUILTIN_VPCOMLTQ,
30257 IX86_BUILTIN_VPCOMLEQ,
30258 IX86_BUILTIN_VPCOMGTQ,
30259 IX86_BUILTIN_VPCOMGEQ,
30260 IX86_BUILTIN_VPCOMFALSEQ,
30261 IX86_BUILTIN_VPCOMTRUEQ,
30263 /* LWP instructions. */
30264 IX86_BUILTIN_LLWPCB,
30265 IX86_BUILTIN_SLWPCB,
30266 IX86_BUILTIN_LWPVAL32,
30267 IX86_BUILTIN_LWPVAL64,
30268 IX86_BUILTIN_LWPINS32,
30269 IX86_BUILTIN_LWPINS64,
30271 IX86_BUILTIN_CLZS,
30273 /* RTM */
30274 IX86_BUILTIN_XBEGIN,
30275 IX86_BUILTIN_XEND,
30276 IX86_BUILTIN_XABORT,
30277 IX86_BUILTIN_XTEST,
30279 /* MPX */
30280 IX86_BUILTIN_BNDMK,
30281 IX86_BUILTIN_BNDSTX,
30282 IX86_BUILTIN_BNDLDX,
30283 IX86_BUILTIN_BNDCL,
30284 IX86_BUILTIN_BNDCU,
30285 IX86_BUILTIN_BNDRET,
30286 IX86_BUILTIN_BNDNARROW,
30287 IX86_BUILTIN_BNDINT,
30288 IX86_BUILTIN_SIZEOF,
30289 IX86_BUILTIN_BNDLOWER,
30290 IX86_BUILTIN_BNDUPPER,
30292 /* BMI instructions. */
30293 IX86_BUILTIN_BEXTR32,
30294 IX86_BUILTIN_BEXTR64,
30295 IX86_BUILTIN_CTZS,
30297 /* TBM instructions. */
30298 IX86_BUILTIN_BEXTRI32,
30299 IX86_BUILTIN_BEXTRI64,
30301 /* BMI2 instructions. */
30302 IX86_BUILTIN_BZHI32,
30303 IX86_BUILTIN_BZHI64,
30304 IX86_BUILTIN_PDEP32,
30305 IX86_BUILTIN_PDEP64,
30306 IX86_BUILTIN_PEXT32,
30307 IX86_BUILTIN_PEXT64,
30309 /* ADX instructions. */
30310 IX86_BUILTIN_ADDCARRYX32,
30311 IX86_BUILTIN_ADDCARRYX64,
30313 /* SBB instructions. */
30314 IX86_BUILTIN_SBB32,
30315 IX86_BUILTIN_SBB64,
30317 /* FSGSBASE instructions. */
30318 IX86_BUILTIN_RDFSBASE32,
30319 IX86_BUILTIN_RDFSBASE64,
30320 IX86_BUILTIN_RDGSBASE32,
30321 IX86_BUILTIN_RDGSBASE64,
30322 IX86_BUILTIN_WRFSBASE32,
30323 IX86_BUILTIN_WRFSBASE64,
30324 IX86_BUILTIN_WRGSBASE32,
30325 IX86_BUILTIN_WRGSBASE64,
30327 /* RDRND instructions. */
30328 IX86_BUILTIN_RDRAND16_STEP,
30329 IX86_BUILTIN_RDRAND32_STEP,
30330 IX86_BUILTIN_RDRAND64_STEP,
30332 /* RDSEED instructions. */
30333 IX86_BUILTIN_RDSEED16_STEP,
30334 IX86_BUILTIN_RDSEED32_STEP,
30335 IX86_BUILTIN_RDSEED64_STEP,
30337 /* F16C instructions. */
30338 IX86_BUILTIN_CVTPH2PS,
30339 IX86_BUILTIN_CVTPH2PS256,
30340 IX86_BUILTIN_CVTPS2PH,
30341 IX86_BUILTIN_CVTPS2PH256,
30343 /* CFString built-in for darwin */
30344 IX86_BUILTIN_CFSTRING,
30346 /* Builtins to get CPU type and supported features. */
30347 IX86_BUILTIN_CPU_INIT,
30348 IX86_BUILTIN_CPU_IS,
30349 IX86_BUILTIN_CPU_SUPPORTS,
30351 /* Read/write FLAGS register built-ins. */
30352 IX86_BUILTIN_READ_FLAGS,
30353 IX86_BUILTIN_WRITE_FLAGS,
30355 IX86_BUILTIN_MAX
30358 /* Table for the ix86 builtin decls. */
30359 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30361 /* Table of all of the builtin functions that are possible with different ISA's
30362 but are waiting to be built until a function is declared to use that
30363 ISA. */
30364 struct builtin_isa {
30365 const char *name; /* function name */
30366 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30367 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
30368 bool const_p; /* true if the declaration is constant */
30369 bool leaf_p; /* true if the declaration has leaf attribute */
30370 bool nothrow_p; /* true if the declaration has nothrow attribute */
30371 bool set_and_not_built_p;
30374 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30377 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
30378 of which isa_flags to use in the ix86_builtins_isa array. Stores the
30379 function decl in the ix86_builtins array. Returns the function decl or
30380 NULL_TREE, if the builtin was not added.
30382 If the front end has a special hook for builtin functions, delay adding
30383 builtin functions that aren't in the current ISA until the ISA is changed
30384 with function specific optimization. Doing so, can save about 300K for the
30385 default compiler. When the builtin is expanded, check at that time whether
30386 it is valid.
30388 If the front end doesn't have a special hook, record all builtins, even if
30389 it isn't an instruction set in the current ISA in case the user uses
30390 function specific options for a different ISA, so that we don't get scope
30391 errors if a builtin is added in the middle of a function scope. */
30393 static inline tree
30394 def_builtin (HOST_WIDE_INT mask, const char *name,
30395 enum ix86_builtin_func_type tcode,
30396 enum ix86_builtins code)
30398 tree decl = NULL_TREE;
30400 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30402 ix86_builtins_isa[(int) code].isa = mask;
30404 mask &= ~OPTION_MASK_ISA_64BIT;
30405 if (mask == 0
30406 || (mask & ix86_isa_flags) != 0
30407 || (lang_hooks.builtin_function
30408 == lang_hooks.builtin_function_ext_scope))
30411 tree type = ix86_get_builtin_func_type (tcode);
30412 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30413 NULL, NULL_TREE);
30414 ix86_builtins[(int) code] = decl;
30415 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30417 else
30419 ix86_builtins[(int) code] = NULL_TREE;
30420 ix86_builtins_isa[(int) code].tcode = tcode;
30421 ix86_builtins_isa[(int) code].name = name;
30422 ix86_builtins_isa[(int) code].leaf_p = false;
30423 ix86_builtins_isa[(int) code].nothrow_p = false;
30424 ix86_builtins_isa[(int) code].const_p = false;
30425 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30429 return decl;
30432 /* Like def_builtin, but also marks the function decl "const". */
30434 static inline tree
30435 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30436 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30438 tree decl = def_builtin (mask, name, tcode, code);
30439 if (decl)
30440 TREE_READONLY (decl) = 1;
30441 else
30442 ix86_builtins_isa[(int) code].const_p = true;
30444 return decl;
30447 /* Add any new builtin functions for a given ISA that may not have been
30448 declared. This saves a bit of space compared to adding all of the
30449 declarations to the tree, even if we didn't use them. */
30451 static void
30452 ix86_add_new_builtins (HOST_WIDE_INT isa)
30454 int i;
30456 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30458 if ((ix86_builtins_isa[i].isa & isa) != 0
30459 && ix86_builtins_isa[i].set_and_not_built_p)
30461 tree decl, type;
30463 /* Don't define the builtin again. */
30464 ix86_builtins_isa[i].set_and_not_built_p = false;
30466 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30467 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30468 type, i, BUILT_IN_MD, NULL,
30469 NULL_TREE);
30471 ix86_builtins[i] = decl;
30472 if (ix86_builtins_isa[i].const_p)
30473 TREE_READONLY (decl) = 1;
30474 if (ix86_builtins_isa[i].leaf_p)
30475 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30476 NULL_TREE);
30477 if (ix86_builtins_isa[i].nothrow_p)
30478 TREE_NOTHROW (decl) = 1;
30483 /* Bits for builtin_description.flag. */
30485 /* Set when we don't support the comparison natively, and should
30486 swap_comparison in order to support it. */
30487 #define BUILTIN_DESC_SWAP_OPERANDS 1
30489 struct builtin_description
30491 const HOST_WIDE_INT mask;
30492 const enum insn_code icode;
30493 const char *const name;
30494 const enum ix86_builtins code;
30495 const enum rtx_code comparison;
30496 const int flag;
30499 static const struct builtin_description bdesc_comi[] =
30501 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30502 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30503 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30504 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30505 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30506 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30507 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30508 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30509 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30510 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30511 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30512 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30514 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30515 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30519 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30520 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30522 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30523 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30524 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30527 static const struct builtin_description bdesc_pcmpestr[] =
30529 /* SSE4.2 */
30530 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30531 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30532 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30533 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30534 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30535 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30536 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30539 static const struct builtin_description bdesc_pcmpistr[] =
30541 /* SSE4.2 */
30542 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30543 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30544 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
30545 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
30546 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
30547 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
30548 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
30551 /* Special builtins with variable number of arguments. */
30552 static const struct builtin_description bdesc_special_args[] =
30554 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
30555 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
30556 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
30558 /* 80387 (for use internally for atomic compound assignment). */
30559 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
30560 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
30561 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
30562 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
30564 /* MMX */
30565 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30567 /* 3DNow! */
30568 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30570 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
30571 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
30572 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
30573 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30574 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30575 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30576 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30577 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30578 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30580 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30581 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30582 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30583 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30584 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30585 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30586 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30587 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30589 /* SSE */
30590 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30591 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30592 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30594 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30595 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30596 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30597 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30599 /* SSE or 3DNow!A */
30600 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30601 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
30603 /* SSE2 */
30604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30605 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30606 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30607 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
30608 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30609 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
30610 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
30611 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
30612 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
30613 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30615 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30616 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30618 /* SSE3 */
30619 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30621 /* SSE4.1 */
30622 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
30624 /* SSE4A */
30625 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30626 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30628 /* AVX */
30629 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
30630 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
30632 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30633 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30634 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30635 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
30636 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
30638 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30639 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30640 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30641 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30642 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30643 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
30644 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30646 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
30647 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30648 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30650 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
30651 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
30652 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
30653 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
30654 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
30655 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
30656 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
30657 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
30659 /* AVX2 */
30660 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
30661 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
30662 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
30663 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
30664 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
30665 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
30666 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
30667 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
30668 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
30670 /* AVX512F */
30671 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30672 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30673 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30674 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30675 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30676 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30677 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30678 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30679 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30680 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30681 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30682 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30683 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30684 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30685 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30686 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30687 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30688 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30689 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30690 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30691 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
30692 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
30693 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
30694 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
30695 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30696 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30697 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30698 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30699 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30700 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30701 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30702 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30703 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30704 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30705 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30706 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30707 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30708 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30709 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30710 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30711 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30712 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30713 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30714 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30715 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30716 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30717 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30719 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
30720 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
30721 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
30722 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
30723 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
30724 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
30726 /* FSGSBASE */
30727 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30728 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30729 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30730 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30731 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30732 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30733 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30734 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30736 /* RTM */
30737 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30738 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
30739 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
30741 /* AVX512BW */
30742 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
30743 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
30744 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
30745 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
30747 /* AVX512VL */
30748 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
30749 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
30750 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
30751 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
30752 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30753 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30754 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30755 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30756 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30757 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30758 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30759 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30760 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30761 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30762 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30763 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30764 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30765 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30766 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30767 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30768 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30769 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30770 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30771 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30772 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30773 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30774 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30775 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30776 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30777 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30778 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30779 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30780 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30781 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30782 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30783 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30784 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
30785 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
30786 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
30787 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
30788 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30789 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30790 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30791 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30792 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30793 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30794 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30795 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30796 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30797 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30798 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30799 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30800 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30801 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30802 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30803 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30804 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30805 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30806 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30807 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30808 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30809 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30810 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30811 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30812 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30813 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30814 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30815 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30816 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30817 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30818 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30819 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30820 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30821 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30822 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30823 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30824 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30825 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30826 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30827 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30828 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30829 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30830 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30831 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30832 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30833 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30834 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30835 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30836 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30837 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30838 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30839 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30840 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30841 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30844 /* Builtins with variable number of arguments. */
30845 static const struct builtin_description bdesc_args[] =
30847 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
30848 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
30849 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
30850 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
30851 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
30852 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
30853 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
30855 /* MMX */
30856 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30857 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30858 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30859 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30860 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30861 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30863 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30864 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30865 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30866 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30867 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30868 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30869 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30870 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30872 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30873 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30875 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30876 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30877 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30878 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30880 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30881 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30882 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30883 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30884 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30885 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30887 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30888 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30889 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30890 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30891 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
30892 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
30894 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
30895 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
30896 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
30898 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
30900 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
30901 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
30902 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
30903 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
30904 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
30905 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
30907 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
30908 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
30909 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
30910 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
30911 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
30912 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
30914 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
30915 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
30916 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
30917 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
30919 /* 3DNow! */
30920 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
30921 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
30922 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
30923 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
30925 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30926 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30927 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30928 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
30929 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
30930 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
30931 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30932 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30933 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30934 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30935 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30936 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30937 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30938 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30939 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30941 /* 3DNow!A */
30942 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
30943 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
30944 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
30945 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
30946 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30947 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30949 /* SSE */
30950 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
30951 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30952 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30953 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30954 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30955 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30956 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
30957 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
30958 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
30959 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
30960 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
30961 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
30963 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30965 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30966 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30967 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30968 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30969 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30970 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30971 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30972 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30974 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
30975 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
30976 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
30977 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
30978 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
30979 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
30980 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
30981 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
30982 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
30983 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
30984 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
30985 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
30986 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
30987 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
30988 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
30989 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
30990 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
30991 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
30992 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
30993 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
30995 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30996 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30997 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30998 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31000 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31001 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31002 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31003 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31005 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31007 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31008 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31009 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31010 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31011 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31013 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
31014 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
31015 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
31017 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
31019 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31020 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31021 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31023 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
31024 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
31026 /* SSE MMX or 3Dnow!A */
31027 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31028 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31029 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31031 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31032 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31033 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31034 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31036 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
31037 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
31039 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
31041 /* SSE2 */
31042 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31044 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
31045 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
31046 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31047 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
31048 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
31050 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31051 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31052 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
31053 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31054 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31056 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
31058 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31059 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31060 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31061 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31063 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31064 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
31065 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31067 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31068 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31069 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31070 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31071 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31072 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31073 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31074 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31076 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31077 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31078 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31079 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31080 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
31081 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31082 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31083 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31084 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31085 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31086 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31087 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31088 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31089 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31090 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31091 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31092 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31093 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31094 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31095 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31097 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31098 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31099 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31100 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31102 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31103 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31104 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31105 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31107 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31109 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31110 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31111 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31113 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31115 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31116 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31117 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31118 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31119 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31120 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31121 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31122 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31124 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31125 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31126 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31127 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31128 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31129 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31130 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31131 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31133 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31134 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
31136 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31137 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31138 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31139 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31141 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31142 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31144 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31145 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31146 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31147 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31148 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31149 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31151 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31152 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31153 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31154 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31156 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31157 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31158 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31159 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31160 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31161 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31162 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31163 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31165 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31166 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31167 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31169 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31170 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
31172 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
31173 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31175 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
31177 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
31178 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
31179 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
31180 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
31182 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31183 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31184 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31185 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31186 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31187 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31188 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31190 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31191 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31192 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31193 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31194 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31195 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31196 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31198 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31199 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31200 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31201 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31203 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
31204 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31205 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31207 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
31209 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31211 /* SSE2 MMX */
31212 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31213 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31215 /* SSE3 */
31216 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
31217 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31219 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31220 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31221 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31222 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31223 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31224 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31226 /* SSSE3 */
31227 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31228 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
31229 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31230 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
31231 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31232 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31234 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31235 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31236 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31237 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31238 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31239 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31240 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31241 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31242 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31243 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31244 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31245 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31246 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
31247 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
31248 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31249 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31250 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31251 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31252 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31253 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31254 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31255 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31256 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31257 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31259 /* SSSE3. */
31260 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
31261 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
31263 /* SSE4.1 */
31264 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31265 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31266 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
31267 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
31268 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31269 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31270 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31271 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
31272 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
31273 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
31275 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31276 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31277 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31278 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31279 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31280 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31281 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31282 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31283 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31284 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31285 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31286 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31287 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31289 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31290 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31291 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31292 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31293 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31294 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31295 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31296 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31297 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31298 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31299 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31300 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31302 /* SSE4.1 */
31303 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31304 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31305 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31306 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31308 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
31309 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
31310 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
31311 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
31313 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31314 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31316 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31317 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31319 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
31320 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
31321 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
31322 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
31324 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
31325 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
31327 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31328 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31330 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31331 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31332 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31334 /* SSE4.2 */
31335 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31336 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
31337 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
31338 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31339 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31341 /* SSE4A */
31342 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
31343 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
31344 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
31345 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31347 /* AES */
31348 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
31349 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31351 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31352 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31353 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31354 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31356 /* PCLMUL */
31357 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
31359 /* AVX */
31360 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31361 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31362 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31363 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31364 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31365 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31366 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31367 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31368 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31369 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31370 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31371 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31372 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31373 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31374 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31375 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31376 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31377 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31378 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31379 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31380 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31381 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31382 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31383 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31384 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31385 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31387 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
31388 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
31389 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
31390 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31392 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31393 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31394 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
31395 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
31396 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31397 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31398 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31399 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31400 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31401 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31402 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31403 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31404 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31405 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
31406 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
31407 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
31408 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
31409 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
31410 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
31411 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31412 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
31413 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31414 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31415 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31416 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31417 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31418 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31419 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31420 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31421 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31422 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31423 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31424 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31425 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31427 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31428 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31429 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31431 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31432 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31433 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31434 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31435 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31437 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31439 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31440 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31442 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31443 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31444 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31445 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31447 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31448 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31450 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31451 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31453 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31454 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31455 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31456 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31458 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31459 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31461 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31462 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31464 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31465 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31466 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31467 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31469 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31470 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31471 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31472 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31473 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31474 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31476 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31477 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31478 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31479 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31480 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31481 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31482 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31483 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31484 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31485 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31486 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31487 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31488 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31489 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31490 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31492 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
31493 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31495 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31496 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31498 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31500 /* AVX2 */
31501 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31502 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31503 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31504 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31505 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31506 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31507 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31508 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31509 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31510 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31511 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31512 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31513 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31514 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31515 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31516 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31517 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31518 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31519 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31520 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31521 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31522 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31523 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31524 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31525 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31526 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31527 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31528 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31529 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31530 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31531 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31532 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31533 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31534 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31535 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31536 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31537 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31538 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31539 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31540 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31541 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31542 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31543 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31544 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31545 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31546 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31547 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31548 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31549 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31550 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31551 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31552 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
31553 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31554 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31555 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31556 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31557 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31558 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31559 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31560 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31561 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31562 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31563 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31564 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31565 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31566 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31567 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31568 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31569 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31570 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31571 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31572 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31573 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31574 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31575 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
31576 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31577 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31578 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31579 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31580 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31581 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31582 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31583 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31584 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31585 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31586 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31587 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31588 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31589 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31590 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31591 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31592 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31593 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31594 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31595 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31596 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31597 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31598 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31599 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31600 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31601 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31602 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31603 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31604 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31605 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31606 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31607 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31608 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31609 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31610 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31611 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31612 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31613 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31614 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31615 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31616 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31617 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31618 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31619 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31620 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31621 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31622 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
31623 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
31624 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31625 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31626 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31627 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31628 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31629 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31630 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31631 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31632 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31633 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
31634 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
31635 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
31636 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
31637 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31638 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31639 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31640 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31641 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31642 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31643 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31644 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31645 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31646 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31648 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31650 /* BMI */
31651 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31652 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31653 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31655 /* TBM */
31656 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31657 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31659 /* F16C */
31660 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
31661 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
31662 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
31663 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
31665 /* BMI2 */
31666 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31667 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31668 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31669 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31670 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31671 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31673 /* AVX512F */
31674 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
31675 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
31676 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
31677 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
31678 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
31679 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
31680 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31681 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31682 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31683 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31684 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31685 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31686 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31687 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
31688 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31689 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
31690 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
31691 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31692 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31693 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31694 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31695 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31696 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31697 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
31698 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31699 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
31700 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31701 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31702 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31703 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31704 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
31705 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
31706 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
31707 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
31708 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
31709 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
31710 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
31711 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
31712 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31713 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31714 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31715 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31716 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31717 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31718 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31719 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31720 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31721 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31722 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31723 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31724 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31725 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31726 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31727 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31728 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
31729 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
31730 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
31731 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
31732 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
31733 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
31734 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31735 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31736 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31737 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31738 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31739 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31740 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31741 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31742 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31743 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31744 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31745 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31746 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31747 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31748 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31749 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31750 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31751 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31752 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31753 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31754 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31755 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31756 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31757 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31758 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31759 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31760 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31761 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31762 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31763 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31764 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31765 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31766 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31767 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31768 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31769 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31770 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31771 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31772 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31773 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31774 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31775 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31776 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31777 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31778 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31779 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31780 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31781 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31782 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31783 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31784 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31785 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31786 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31787 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31788 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31789 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31790 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31791 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31792 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31793 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31794 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31795 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31796 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31797 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31798 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31799 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31800 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31801 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31802 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31803 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31804 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31805 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31806 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31807 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31808 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31809 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31810 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31811 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31812 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31813 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31814 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31815 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31816 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31817 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31818 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31819 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31820 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31821 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31822 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31823 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31824 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31825 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31827 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31829 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
31830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
31831 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
31832 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
31833 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31834 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31835 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31836 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31837 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
31838 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
31839 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
31840 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
31841 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31842 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31843 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31844 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31845 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
31846 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31847 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31848 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
31852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
31853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31857 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
31858 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
31859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
31860 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
31861 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31862 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31863 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31864 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31865 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31866 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31867 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
31868 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
31869 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
31870 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
31872 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
31873 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
31874 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
31875 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
31876 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
31877 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
31878 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
31879 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
31881 /* Mask arithmetic operations */
31882 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31883 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31884 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
31885 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31886 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31887 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31888 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
31889 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31890 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31891 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
31893 /* SHA */
31894 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31895 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31896 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31897 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31898 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31899 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31900 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
31902 /* AVX512VL. */
31903 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
31904 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
31905 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
31906 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
31907 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
31908 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
31909 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
31910 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
31911 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
31912 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
31913 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
31914 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
31915 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
31916 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
31917 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31918 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31919 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31920 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31921 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31922 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31923 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31924 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31925 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31926 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31927 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31928 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31929 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31930 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31931 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31932 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31933 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31934 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31935 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31936 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31937 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31938 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31939 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31940 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31941 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31942 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31943 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31944 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31945 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31946 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31947 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31948 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31949 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
31950 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
31951 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
31952 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
31953 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
31954 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
31955 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
31956 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
31957 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
31958 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
31959 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
31960 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
31961 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
31962 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
31963 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
31964 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
31965 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
31966 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
31967 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
31968 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
31969 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
31970 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
31971 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
31972 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
31973 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
31974 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
31975 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
31976 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
31977 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
31978 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
31979 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
31980 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
31981 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
31982 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
31983 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
31984 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
31985 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
31986 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
31987 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
31988 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
31989 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
31990 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
31991 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
31992 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
31993 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
31994 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
31995 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
31996 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
31997 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
31998 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
31999 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
32000 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32001 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
32002 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32003 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
32004 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32005 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
32006 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32007 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
32008 { OPTION_MASK_ISA_AVX512VL & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_memv4di_mask, "__builtin_ia32_pbroadcastq256_mem_mask", IX86_BUILTIN_PBROADCASTQ256_MEM_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
32009 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32010 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
32011 { OPTION_MASK_ISA_AVX512VL & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_memv2di_mask, "__builtin_ia32_pbroadcastq128_mem_mask", IX86_BUILTIN_PBROADCASTQ128_MEM_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
32012 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32013 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32014 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32015 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
32016 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
32017 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
32018 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
32019 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32020 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32021 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32022 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32023 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32024 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32025 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32026 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32027 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32028 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32029 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32030 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32031 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32032 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32033 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32034 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32035 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32036 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32037 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32038 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32039 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32040 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32041 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32042 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32043 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32044 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32045 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32046 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32047 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32048 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32049 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32050 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32051 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32052 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32053 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32054 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32055 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32056 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32057 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32058 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32059 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32060 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32061 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32062 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32063 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32064 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32065 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32066 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32067 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32068 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32069 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32070 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32071 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32072 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32073 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32074 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32075 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32076 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32077 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32078 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32079 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32080 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32081 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32082 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32083 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32084 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32085 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32086 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32087 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32088 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32089 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32090 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32091 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32092 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32093 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32094 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32095 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32096 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32097 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32098 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32099 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32100 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32101 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32102 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32103 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32104 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32105 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32106 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32107 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32108 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32109 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32110 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32111 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32112 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32113 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32114 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32115 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32116 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32117 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32118 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32119 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32120 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32121 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32122 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32123 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32124 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32125 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32126 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32127 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32128 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32129 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32130 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32131 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32132 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32133 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32134 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32135 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32136 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32137 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32138 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32139 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32140 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32141 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32142 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32143 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32144 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32145 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32146 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32147 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32148 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32149 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32150 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32151 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32152 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32153 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32154 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32155 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32156 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32157 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32158 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32159 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32160 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32161 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32162 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32163 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32164 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32165 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32166 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32167 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32168 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32169 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32170 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32171 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32172 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32173 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32174 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32175 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32176 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32177 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32178 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32179 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32180 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32181 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32182 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32183 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32184 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32185 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32186 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32187 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32188 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32189 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32190 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32191 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32192 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32193 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32194 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32195 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32196 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32197 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32198 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32199 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32200 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32201 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32202 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32203 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32204 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32205 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32206 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32207 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32208 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32209 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32210 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32211 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32212 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32213 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32214 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32215 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32216 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32217 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32218 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32219 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32220 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32221 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32222 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32223 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32224 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32225 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32226 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32227 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32228 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32229 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32230 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32231 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32232 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32233 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32234 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32235 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32236 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32237 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32238 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32239 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32240 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32241 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32242 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32243 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32244 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32245 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32246 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32247 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32248 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32249 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32250 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32251 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32252 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32253 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32254 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32255 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32256 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32257 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32258 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32259 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32260 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32261 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32262 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32263 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32264 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32265 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32266 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32267 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32268 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32269 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32270 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32271 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32272 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32273 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32274 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32275 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32276 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32277 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32278 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32279 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32280 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32281 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32282 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32283 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32284 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32285 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32286 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32287 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
32288 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
32289 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32290 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32291 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
32292 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
32293 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
32294 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
32295 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32296 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32297 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32298 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32299 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32300 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32301 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32302 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32303 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32304 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32305 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32306 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32307 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32308 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32309 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32310 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32311 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32312 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32313 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32314 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32315 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32316 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32317 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32318 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32319 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32320 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32321 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32322 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32323 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32324 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32325 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32326 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32327 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32328 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32329 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32330 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32331 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32332 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32333 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32334 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32335 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32336 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32337 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32338 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32339 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32340 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32341 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32342 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32343 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32344 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32345 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32346 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32347 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32348 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32349 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32350 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32351 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32352 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32353 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32354 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32355 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32356 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32357 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32358 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32359 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32360 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32361 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32362 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32363 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32364 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32365 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32366 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32367 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32368 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32369 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32370 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32371 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32372 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32373 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32374 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32375 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32376 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32377 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32378 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32379 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32380 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32381 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32382 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32383 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32384 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32385 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32386 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32387 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32388 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32389 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32390 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32391 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32392 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32393 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32394 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32395 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
32396 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
32397 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
32398 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
32399 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
32400 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
32401 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
32402 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
32403 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
32404 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
32405 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
32406 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
32407 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
32408 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
32409 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
32410 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
32411 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
32412 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
32413 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
32414 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
32415 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
32416 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
32417 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32418 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32419 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32420 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32421 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32422 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32423 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32424 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32425 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32426 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32427 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32428 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32429 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32430 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32431 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32432 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32433 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32434 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32435 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32436 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32437 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32438 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32439 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32440 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32441 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32442 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32443 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32444 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32445 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32446 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32447 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32448 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32449 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
32450 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
32451 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
32452 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
32453 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32454 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32455 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32456 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32457 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32458 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32459 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32460 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32461 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32462 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32463 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32464 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32465 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32466 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32467 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32468 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32469 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32470 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32471 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32472 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32473 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32474 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32475 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32476 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32477 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32478 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32479 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32480 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32481 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32482 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32483 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32484 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32485 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32486 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32487 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32488 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32489 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32490 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32491 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32492 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32493 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32494 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32495 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32496 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32497 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32498 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32499 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32500 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32501 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32502 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32503 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32504 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32505 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32506 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32507 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32508 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32509 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32510 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32511 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32512 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32513 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32514 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32515 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32516 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32517 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32518 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32519 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32520 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32521 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32522 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32523 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32524 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32525 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32526 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32527 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
32528 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
32529 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
32530 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
32531 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
32532 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
32533 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32534 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32535 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32536 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32537 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32538 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32539 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32540 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32541 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32542 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32543 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32544 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32545 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32546 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32547 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32548 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32549 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32550 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32551 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32552 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32553 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32554 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32555 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32556 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32557 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32558 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32559 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32560 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32561 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32562 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32563 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32564 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32565 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32566 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32567 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32568 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32569 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32570 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32571 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32572 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32573 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32574 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32575 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32576 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32577 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32578 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32579 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32580 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32581 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32582 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32583 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32584 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32585 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32586 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32587 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32588 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32589 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32590 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32591 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32592 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
32593 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
32594 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32595 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32596 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32597 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32598 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32599 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32600 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32601 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32602 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32603 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32604 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32605 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
32606 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
32607 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32608 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32609 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32610 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32611 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32612 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32613 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32614 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32615 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
32616 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
32618 /* AVX512DQ. */
32619 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32620 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32621 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
32622 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
32623 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
32624 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
32625 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
32626 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
32627 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
32628 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
32629 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32630 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32631 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32632 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32633 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32634 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32635 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32636 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32637 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32638 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
32639 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32640 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
32641 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
32642 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
32643 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
32644 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
32645 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
32646 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
32647 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
32648 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
32649 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
32651 /* AVX512BW. */
32652 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
32653 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
32654 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32655 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32656 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32657 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32658 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
32659 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
32660 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32661 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32662 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
32663 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
32664 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
32665 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
32666 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
32667 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
32668 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32669 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32670 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32671 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32672 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32673 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32674 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32675 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32676 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32677 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32678 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32679 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32680 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32681 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32682 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32683 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32684 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32685 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32686 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32687 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32688 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32689 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32690 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32691 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32692 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32693 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32694 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32695 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32696 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32697 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32698 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32699 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32700 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32701 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32702 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32703 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32704 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32705 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32706 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32707 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32708 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
32709 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
32710 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32711 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32712 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32713 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32714 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32715 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32716 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32717 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32718 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32719 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32720 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32721 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32722 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
32723 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
32724 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
32725 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
32726 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32727 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32728 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32729 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32730 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32731 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32732 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32733 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32734 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32735 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32736 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32737 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32738 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32739 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32740 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32741 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32742 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32745 /* Builtins with rounding support. */
32746 static const struct builtin_description bdesc_round_args[] =
32748 /* AVX512F */
32749 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32750 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32751 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32752 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32753 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
32754 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
32755 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
32756 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
32757 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
32758 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
32759 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32760 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32761 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
32762 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32763 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
32764 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32765 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
32766 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32767 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
32768 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
32769 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
32770 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
32771 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
32772 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32773 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32774 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32775 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32776 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32777 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
32778 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
32779 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
32780 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32781 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32782 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32783 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32784 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32785 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32786 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32787 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32788 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32789 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32790 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
32791 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
32792 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32793 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32794 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32795 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32796 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
32797 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
32798 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32799 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32800 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32801 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32802 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32803 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32804 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32805 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32806 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32807 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32808 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32809 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32810 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32811 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32812 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
32813 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
32814 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32815 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32816 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32817 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32818 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32819 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32820 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32821 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32822 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32823 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32824 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32825 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32827 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
32829 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
32830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
32831 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
32832 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
32833 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
32834 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
32835 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
32836 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
32837 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
32838 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
32839 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
32840 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
32841 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
32842 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
32843 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
32844 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32845 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32846 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32847 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32848 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
32851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
32852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32857 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32858 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32860 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32861 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32862 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32863 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32864 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32865 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32866 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32867 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32869 /* AVX512ER */
32870 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32871 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32872 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32873 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32874 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32875 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32876 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32877 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32878 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32879 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32881 /* AVX512DQ. */
32882 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32883 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32884 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
32885 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
32886 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
32887 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
32888 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
32889 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
32890 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
32891 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
32892 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
32893 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
32894 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
32895 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
32896 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
32897 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
32900 /* Bultins for MPX. */
32901 static const struct builtin_description bdesc_mpx[] =
32903 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
32904 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
32905 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
32908 /* Const builtins for MPX. */
32909 static const struct builtin_description bdesc_mpx_const[] =
32911 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
32912 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
32913 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
32914 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
32915 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
32916 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
32917 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
32918 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
32921 /* FMA4 and XOP. */
32922 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
32923 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
32924 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
32925 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
32926 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
32927 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
32928 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
32929 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
32930 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
32931 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
32932 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
32933 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
32934 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
32935 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
32936 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
32937 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
32938 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
32939 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
32940 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
32941 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
32942 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
32943 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
32944 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
32945 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
32946 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
32947 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
32948 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
32949 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
32950 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
32951 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
32952 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
32953 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
32954 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
32955 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
32956 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
32957 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
32958 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
32959 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
32960 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
32961 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
32962 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
32963 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
32964 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
32965 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
32966 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
32967 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
32968 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
32969 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
32970 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
32971 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
32972 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
32973 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
32975 static const struct builtin_description bdesc_multi_arg[] =
32977 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
32978 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
32979 UNKNOWN, (int)MULTI_ARG_3_SF },
32980 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
32981 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
32982 UNKNOWN, (int)MULTI_ARG_3_DF },
32984 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
32985 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
32986 UNKNOWN, (int)MULTI_ARG_3_SF },
32987 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
32988 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
32989 UNKNOWN, (int)MULTI_ARG_3_DF },
32991 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
32992 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
32993 UNKNOWN, (int)MULTI_ARG_3_SF },
32994 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
32995 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
32996 UNKNOWN, (int)MULTI_ARG_3_DF },
32997 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
32998 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
32999 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33000 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
33001 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
33002 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33004 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
33005 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
33006 UNKNOWN, (int)MULTI_ARG_3_SF },
33007 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
33008 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
33009 UNKNOWN, (int)MULTI_ARG_3_DF },
33010 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
33011 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
33012 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33013 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
33014 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
33015 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33017 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
33018 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
33019 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
33020 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
33021 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
33022 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
33023 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
33025 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33026 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33027 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
33028 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
33029 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
33030 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
33031 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
33033 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
33035 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33036 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33037 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33038 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33039 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33040 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33041 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33042 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33043 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33044 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33045 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33046 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33048 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33049 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
33050 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
33051 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
33052 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
33053 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
33054 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
33055 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
33056 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33057 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
33058 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
33059 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
33060 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33061 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
33062 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
33063 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
33065 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
33066 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
33067 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
33068 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
33069 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
33070 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
33072 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33073 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33074 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33075 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33076 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33077 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33078 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33079 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33080 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33081 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33082 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33083 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33084 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33085 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33086 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33088 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
33089 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33090 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33091 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
33092 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
33093 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
33094 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
33096 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
33097 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33098 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33099 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
33100 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
33101 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
33102 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
33104 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
33105 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33106 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33107 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
33108 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
33109 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
33110 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
33112 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33113 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33114 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33115 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
33116 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
33117 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
33118 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
33120 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
33121 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33122 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33123 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
33124 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
33125 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
33126 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
33128 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
33129 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33130 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33131 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
33132 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
33133 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
33134 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
33136 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
33137 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33138 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33139 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
33140 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
33141 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
33142 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
33144 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33145 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33146 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33147 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
33148 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
33149 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
33150 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
33152 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33153 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33154 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33155 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33156 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33157 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33158 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33159 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33161 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33162 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33163 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33164 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33165 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33166 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33167 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33168 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33170 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
33171 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
33172 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
33173 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
33177 /* TM vector builtins. */
33179 /* Reuse the existing x86-specific `struct builtin_description' cause
33180 we're lazy. Add casts to make them fit. */
33181 static const struct builtin_description bdesc_tm[] =
33183 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33184 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33185 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33186 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33187 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33188 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33189 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33191 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33192 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33193 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33194 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33195 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33196 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33197 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33199 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33200 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33201 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33202 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33203 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33204 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33205 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33207 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
33208 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
33209 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
33212 /* TM callbacks. */
33214 /* Return the builtin decl needed to load a vector of TYPE. */
33216 static tree
33217 ix86_builtin_tm_load (tree type)
33219 if (TREE_CODE (type) == VECTOR_TYPE)
33221 switch (tree_to_uhwi (TYPE_SIZE (type)))
33223 case 64:
33224 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
33225 case 128:
33226 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
33227 case 256:
33228 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
33231 return NULL_TREE;
33234 /* Return the builtin decl needed to store a vector of TYPE. */
33236 static tree
33237 ix86_builtin_tm_store (tree type)
33239 if (TREE_CODE (type) == VECTOR_TYPE)
33241 switch (tree_to_uhwi (TYPE_SIZE (type)))
33243 case 64:
33244 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
33245 case 128:
33246 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
33247 case 256:
33248 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
33251 return NULL_TREE;
33254 /* Initialize the transactional memory vector load/store builtins. */
33256 static void
33257 ix86_init_tm_builtins (void)
33259 enum ix86_builtin_func_type ftype;
33260 const struct builtin_description *d;
33261 size_t i;
33262 tree decl;
33263 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
33264 tree attrs_log, attrs_type_log;
33266 if (!flag_tm)
33267 return;
33269 /* If there are no builtins defined, we must be compiling in a
33270 language without trans-mem support. */
33271 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
33272 return;
33274 /* Use whatever attributes a normal TM load has. */
33275 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
33276 attrs_load = DECL_ATTRIBUTES (decl);
33277 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33278 /* Use whatever attributes a normal TM store has. */
33279 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
33280 attrs_store = DECL_ATTRIBUTES (decl);
33281 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33282 /* Use whatever attributes a normal TM log has. */
33283 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
33284 attrs_log = DECL_ATTRIBUTES (decl);
33285 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33287 for (i = 0, d = bdesc_tm;
33288 i < ARRAY_SIZE (bdesc_tm);
33289 i++, d++)
33291 if ((d->mask & ix86_isa_flags) != 0
33292 || (lang_hooks.builtin_function
33293 == lang_hooks.builtin_function_ext_scope))
33295 tree type, attrs, attrs_type;
33296 enum built_in_function code = (enum built_in_function) d->code;
33298 ftype = (enum ix86_builtin_func_type) d->flag;
33299 type = ix86_get_builtin_func_type (ftype);
33301 if (BUILTIN_TM_LOAD_P (code))
33303 attrs = attrs_load;
33304 attrs_type = attrs_type_load;
33306 else if (BUILTIN_TM_STORE_P (code))
33308 attrs = attrs_store;
33309 attrs_type = attrs_type_store;
33311 else
33313 attrs = attrs_log;
33314 attrs_type = attrs_type_log;
33316 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
33317 /* The builtin without the prefix for
33318 calling it directly. */
33319 d->name + strlen ("__builtin_"),
33320 attrs);
33321 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
33322 set the TYPE_ATTRIBUTES. */
33323 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
33325 set_builtin_decl (code, decl, false);
33330 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
33331 in the current target ISA to allow the user to compile particular modules
33332 with different target specific options that differ from the command line
33333 options. */
33334 static void
33335 ix86_init_mmx_sse_builtins (void)
33337 const struct builtin_description * d;
33338 enum ix86_builtin_func_type ftype;
33339 size_t i;
33341 /* Add all special builtins with variable number of operands. */
33342 for (i = 0, d = bdesc_special_args;
33343 i < ARRAY_SIZE (bdesc_special_args);
33344 i++, d++)
33346 if (d->name == 0)
33347 continue;
33349 ftype = (enum ix86_builtin_func_type) d->flag;
33350 def_builtin (d->mask, d->name, ftype, d->code);
33353 /* Add all builtins with variable number of operands. */
33354 for (i = 0, d = bdesc_args;
33355 i < ARRAY_SIZE (bdesc_args);
33356 i++, d++)
33358 if (d->name == 0)
33359 continue;
33361 ftype = (enum ix86_builtin_func_type) d->flag;
33362 def_builtin_const (d->mask, d->name, ftype, d->code);
33365 /* Add all builtins with rounding. */
33366 for (i = 0, d = bdesc_round_args;
33367 i < ARRAY_SIZE (bdesc_round_args);
33368 i++, d++)
33370 if (d->name == 0)
33371 continue;
33373 ftype = (enum ix86_builtin_func_type) d->flag;
33374 def_builtin_const (d->mask, d->name, ftype, d->code);
33377 /* pcmpestr[im] insns. */
33378 for (i = 0, d = bdesc_pcmpestr;
33379 i < ARRAY_SIZE (bdesc_pcmpestr);
33380 i++, d++)
33382 if (d->code == IX86_BUILTIN_PCMPESTRM128)
33383 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
33384 else
33385 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
33386 def_builtin_const (d->mask, d->name, ftype, d->code);
33389 /* pcmpistr[im] insns. */
33390 for (i = 0, d = bdesc_pcmpistr;
33391 i < ARRAY_SIZE (bdesc_pcmpistr);
33392 i++, d++)
33394 if (d->code == IX86_BUILTIN_PCMPISTRM128)
33395 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
33396 else
33397 ftype = INT_FTYPE_V16QI_V16QI_INT;
33398 def_builtin_const (d->mask, d->name, ftype, d->code);
33401 /* comi/ucomi insns. */
33402 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
33404 if (d->mask == OPTION_MASK_ISA_SSE2)
33405 ftype = INT_FTYPE_V2DF_V2DF;
33406 else
33407 ftype = INT_FTYPE_V4SF_V4SF;
33408 def_builtin_const (d->mask, d->name, ftype, d->code);
33411 /* SSE */
33412 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
33413 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
33414 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
33415 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
33417 /* SSE or 3DNow!A */
33418 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33419 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
33420 IX86_BUILTIN_MASKMOVQ);
33422 /* SSE2 */
33423 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
33424 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
33426 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
33427 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
33428 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
33429 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
33431 /* SSE3. */
33432 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
33433 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
33434 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
33435 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
33437 /* AES */
33438 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
33439 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
33440 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33441 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33442 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33443 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33444 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33445 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33446 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33447 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33448 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33449 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33451 /* PCLMUL */
33452 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33453 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33455 /* RDRND */
33456 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33457 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33458 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33459 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33460 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33461 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33462 IX86_BUILTIN_RDRAND64_STEP);
33464 /* AVX2 */
33465 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33466 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33467 IX86_BUILTIN_GATHERSIV2DF);
33469 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33470 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33471 IX86_BUILTIN_GATHERSIV4DF);
33473 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33474 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33475 IX86_BUILTIN_GATHERDIV2DF);
33477 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33478 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33479 IX86_BUILTIN_GATHERDIV4DF);
33481 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33482 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33483 IX86_BUILTIN_GATHERSIV4SF);
33485 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33486 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33487 IX86_BUILTIN_GATHERSIV8SF);
33489 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33490 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33491 IX86_BUILTIN_GATHERDIV4SF);
33493 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33494 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33495 IX86_BUILTIN_GATHERDIV8SF);
33497 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33498 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33499 IX86_BUILTIN_GATHERSIV2DI);
33501 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33502 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33503 IX86_BUILTIN_GATHERSIV4DI);
33505 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33506 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33507 IX86_BUILTIN_GATHERDIV2DI);
33509 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33510 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33511 IX86_BUILTIN_GATHERDIV4DI);
33513 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
33514 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
33515 IX86_BUILTIN_GATHERSIV4SI);
33517 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
33518 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
33519 IX86_BUILTIN_GATHERSIV8SI);
33521 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
33522 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
33523 IX86_BUILTIN_GATHERDIV4SI);
33525 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
33526 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
33527 IX86_BUILTIN_GATHERDIV8SI);
33529 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
33530 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
33531 IX86_BUILTIN_GATHERALTSIV4DF);
33533 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
33534 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
33535 IX86_BUILTIN_GATHERALTDIV8SF);
33537 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
33538 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
33539 IX86_BUILTIN_GATHERALTSIV4DI);
33541 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
33542 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
33543 IX86_BUILTIN_GATHERALTDIV8SI);
33545 /* AVX512F */
33546 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
33547 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
33548 IX86_BUILTIN_GATHER3SIV16SF);
33550 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
33551 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
33552 IX86_BUILTIN_GATHER3SIV8DF);
33554 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
33555 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
33556 IX86_BUILTIN_GATHER3DIV16SF);
33558 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
33559 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
33560 IX86_BUILTIN_GATHER3DIV8DF);
33562 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
33563 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
33564 IX86_BUILTIN_GATHER3SIV16SI);
33566 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
33567 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
33568 IX86_BUILTIN_GATHER3SIV8DI);
33570 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
33571 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
33572 IX86_BUILTIN_GATHER3DIV16SI);
33574 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
33575 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
33576 IX86_BUILTIN_GATHER3DIV8DI);
33578 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
33579 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
33580 IX86_BUILTIN_GATHER3ALTSIV8DF);
33582 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
33583 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
33584 IX86_BUILTIN_GATHER3ALTDIV16SF);
33586 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
33587 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
33588 IX86_BUILTIN_GATHER3ALTSIV8DI);
33590 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
33591 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
33592 IX86_BUILTIN_GATHER3ALTDIV16SI);
33594 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
33595 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
33596 IX86_BUILTIN_SCATTERSIV16SF);
33598 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
33599 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
33600 IX86_BUILTIN_SCATTERSIV8DF);
33602 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
33603 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
33604 IX86_BUILTIN_SCATTERDIV16SF);
33606 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
33607 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
33608 IX86_BUILTIN_SCATTERDIV8DF);
33610 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
33611 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
33612 IX86_BUILTIN_SCATTERSIV16SI);
33614 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
33615 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
33616 IX86_BUILTIN_SCATTERSIV8DI);
33618 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
33619 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
33620 IX86_BUILTIN_SCATTERDIV16SI);
33622 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
33623 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
33624 IX86_BUILTIN_SCATTERDIV8DI);
33626 /* AVX512VL */
33627 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
33628 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
33629 IX86_BUILTIN_GATHER3SIV2DF);
33631 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
33632 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
33633 IX86_BUILTIN_GATHER3SIV4DF);
33635 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
33636 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
33637 IX86_BUILTIN_GATHER3DIV2DF);
33639 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
33640 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
33641 IX86_BUILTIN_GATHER3DIV4DF);
33643 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
33644 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
33645 IX86_BUILTIN_GATHER3SIV4SF);
33647 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
33648 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
33649 IX86_BUILTIN_GATHER3SIV8SF);
33651 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
33652 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
33653 IX86_BUILTIN_GATHER3DIV4SF);
33655 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
33656 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
33657 IX86_BUILTIN_GATHER3DIV8SF);
33659 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
33660 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
33661 IX86_BUILTIN_GATHER3SIV2DI);
33663 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
33664 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
33665 IX86_BUILTIN_GATHER3SIV4DI);
33667 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
33668 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
33669 IX86_BUILTIN_GATHER3DIV2DI);
33671 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
33672 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
33673 IX86_BUILTIN_GATHER3DIV4DI);
33675 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
33676 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
33677 IX86_BUILTIN_GATHER3SIV4SI);
33679 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
33680 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
33681 IX86_BUILTIN_GATHER3SIV8SI);
33683 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
33684 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
33685 IX86_BUILTIN_GATHER3DIV4SI);
33687 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
33688 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
33689 IX86_BUILTIN_GATHER3DIV8SI);
33691 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
33692 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
33693 IX86_BUILTIN_GATHER3ALTSIV4DF);
33695 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
33696 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
33697 IX86_BUILTIN_GATHER3ALTDIV8SF);
33699 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
33700 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
33701 IX86_BUILTIN_GATHER3ALTSIV4DI);
33703 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
33704 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
33705 IX86_BUILTIN_GATHER3ALTDIV8SI);
33707 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
33708 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
33709 IX86_BUILTIN_SCATTERSIV8SF);
33711 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
33712 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
33713 IX86_BUILTIN_SCATTERSIV4SF);
33715 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
33716 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
33717 IX86_BUILTIN_SCATTERSIV4DF);
33719 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
33720 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
33721 IX86_BUILTIN_SCATTERSIV2DF);
33723 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
33724 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
33725 IX86_BUILTIN_SCATTERDIV8SF);
33727 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
33728 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
33729 IX86_BUILTIN_SCATTERDIV4SF);
33731 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
33732 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
33733 IX86_BUILTIN_SCATTERDIV4DF);
33735 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
33736 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
33737 IX86_BUILTIN_SCATTERDIV2DF);
33739 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
33740 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
33741 IX86_BUILTIN_SCATTERSIV8SI);
33743 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
33744 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
33745 IX86_BUILTIN_SCATTERSIV4SI);
33747 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
33748 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
33749 IX86_BUILTIN_SCATTERSIV4DI);
33751 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
33752 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
33753 IX86_BUILTIN_SCATTERSIV2DI);
33755 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
33756 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
33757 IX86_BUILTIN_SCATTERDIV8SI);
33759 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
33760 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
33761 IX86_BUILTIN_SCATTERDIV4SI);
33763 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
33764 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
33765 IX86_BUILTIN_SCATTERDIV4DI);
33767 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
33768 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
33769 IX86_BUILTIN_SCATTERDIV2DI);
33771 /* AVX512PF */
33772 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
33773 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33774 IX86_BUILTIN_GATHERPFDPD);
33775 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
33776 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33777 IX86_BUILTIN_GATHERPFDPS);
33778 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
33779 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33780 IX86_BUILTIN_GATHERPFQPD);
33781 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
33782 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33783 IX86_BUILTIN_GATHERPFQPS);
33784 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
33785 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33786 IX86_BUILTIN_SCATTERPFDPD);
33787 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
33788 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33789 IX86_BUILTIN_SCATTERPFDPS);
33790 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
33791 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33792 IX86_BUILTIN_SCATTERPFQPD);
33793 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
33794 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33795 IX86_BUILTIN_SCATTERPFQPS);
33797 /* SHA */
33798 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
33799 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
33800 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
33801 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
33802 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
33803 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
33804 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
33805 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
33806 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
33807 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
33808 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
33809 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
33810 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
33811 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
33813 /* RTM. */
33814 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
33815 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
33817 /* MMX access to the vec_init patterns. */
33818 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
33819 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
33821 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
33822 V4HI_FTYPE_HI_HI_HI_HI,
33823 IX86_BUILTIN_VEC_INIT_V4HI);
33825 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
33826 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
33827 IX86_BUILTIN_VEC_INIT_V8QI);
33829 /* Access to the vec_extract patterns. */
33830 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
33831 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
33832 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
33833 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
33834 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
33835 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
33836 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
33837 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
33838 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
33839 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
33841 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33842 "__builtin_ia32_vec_ext_v4hi",
33843 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
33845 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
33846 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
33848 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
33849 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
33851 /* Access to the vec_set patterns. */
33852 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
33853 "__builtin_ia32_vec_set_v2di",
33854 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
33856 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
33857 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
33859 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
33860 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
33862 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
33863 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
33865 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33866 "__builtin_ia32_vec_set_v4hi",
33867 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
33869 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
33870 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
33872 /* RDSEED */
33873 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
33874 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
33875 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
33876 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
33877 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
33878 "__builtin_ia32_rdseed_di_step",
33879 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
33881 /* ADCX */
33882 def_builtin (0, "__builtin_ia32_addcarryx_u32",
33883 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
33884 def_builtin (OPTION_MASK_ISA_64BIT,
33885 "__builtin_ia32_addcarryx_u64",
33886 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
33887 IX86_BUILTIN_ADDCARRYX64);
33889 /* SBB */
33890 def_builtin (0, "__builtin_ia32_sbb_u32",
33891 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
33892 def_builtin (OPTION_MASK_ISA_64BIT,
33893 "__builtin_ia32_sbb_u64",
33894 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
33895 IX86_BUILTIN_SBB64);
33897 /* Read/write FLAGS. */
33898 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
33899 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
33900 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
33901 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
33902 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
33903 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
33904 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
33905 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
33907 /* CLFLUSHOPT. */
33908 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
33909 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
33911 /* Add FMA4 multi-arg argument instructions */
33912 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
33914 if (d->name == 0)
33915 continue;
33917 ftype = (enum ix86_builtin_func_type) d->flag;
33918 def_builtin_const (d->mask, d->name, ftype, d->code);
33922 static void
33923 ix86_init_mpx_builtins ()
33925 const struct builtin_description * d;
33926 enum ix86_builtin_func_type ftype;
33927 tree decl;
33928 size_t i;
33930 for (i = 0, d = bdesc_mpx;
33931 i < ARRAY_SIZE (bdesc_mpx);
33932 i++, d++)
33934 if (d->name == 0)
33935 continue;
33937 ftype = (enum ix86_builtin_func_type) d->flag;
33938 decl = def_builtin (d->mask, d->name, ftype, d->code);
33940 /* With no leaf and nothrow flags for MPX builtins
33941 abnormal edges may follow its call when setjmp
33942 presents in the function. Since we may have a lot
33943 of MPX builtins calls it causes lots of useless
33944 edges and enormous PHI nodes. To avoid this we mark
33945 MPX builtins as leaf and nothrow. */
33946 if (decl)
33948 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
33949 NULL_TREE);
33950 TREE_NOTHROW (decl) = 1;
33952 else
33954 ix86_builtins_isa[(int)d->code].leaf_p = true;
33955 ix86_builtins_isa[(int)d->code].nothrow_p = true;
33959 for (i = 0, d = bdesc_mpx_const;
33960 i < ARRAY_SIZE (bdesc_mpx_const);
33961 i++, d++)
33963 if (d->name == 0)
33964 continue;
33966 ftype = (enum ix86_builtin_func_type) d->flag;
33967 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
33969 if (decl)
33971 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
33972 NULL_TREE);
33973 TREE_NOTHROW (decl) = 1;
33975 else
33977 ix86_builtins_isa[(int)d->code].leaf_p = true;
33978 ix86_builtins_isa[(int)d->code].nothrow_p = true;
33983 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
33984 to return a pointer to VERSION_DECL if the outcome of the expression
33985 formed by PREDICATE_CHAIN is true. This function will be called during
33986 version dispatch to decide which function version to execute. It returns
33987 the basic block at the end, to which more conditions can be added. */
33989 static basic_block
33990 add_condition_to_bb (tree function_decl, tree version_decl,
33991 tree predicate_chain, basic_block new_bb)
33993 gimple return_stmt;
33994 tree convert_expr, result_var;
33995 gimple convert_stmt;
33996 gimple call_cond_stmt;
33997 gimple if_else_stmt;
33999 basic_block bb1, bb2, bb3;
34000 edge e12, e23;
34002 tree cond_var, and_expr_var = NULL_TREE;
34003 gimple_seq gseq;
34005 tree predicate_decl, predicate_arg;
34007 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
34009 gcc_assert (new_bb != NULL);
34010 gseq = bb_seq (new_bb);
34013 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
34014 build_fold_addr_expr (version_decl));
34015 result_var = create_tmp_var (ptr_type_node, NULL);
34016 convert_stmt = gimple_build_assign (result_var, convert_expr);
34017 return_stmt = gimple_build_return (result_var);
34019 if (predicate_chain == NULL_TREE)
34021 gimple_seq_add_stmt (&gseq, convert_stmt);
34022 gimple_seq_add_stmt (&gseq, return_stmt);
34023 set_bb_seq (new_bb, gseq);
34024 gimple_set_bb (convert_stmt, new_bb);
34025 gimple_set_bb (return_stmt, new_bb);
34026 pop_cfun ();
34027 return new_bb;
34030 while (predicate_chain != NULL)
34032 cond_var = create_tmp_var (integer_type_node, NULL);
34033 predicate_decl = TREE_PURPOSE (predicate_chain);
34034 predicate_arg = TREE_VALUE (predicate_chain);
34035 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
34036 gimple_call_set_lhs (call_cond_stmt, cond_var);
34038 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
34039 gimple_set_bb (call_cond_stmt, new_bb);
34040 gimple_seq_add_stmt (&gseq, call_cond_stmt);
34042 predicate_chain = TREE_CHAIN (predicate_chain);
34044 if (and_expr_var == NULL)
34045 and_expr_var = cond_var;
34046 else
34048 gimple assign_stmt;
34049 /* Use MIN_EXPR to check if any integer is zero?.
34050 and_expr_var = min_expr <cond_var, and_expr_var> */
34051 assign_stmt = gimple_build_assign (and_expr_var,
34052 build2 (MIN_EXPR, integer_type_node,
34053 cond_var, and_expr_var));
34055 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
34056 gimple_set_bb (assign_stmt, new_bb);
34057 gimple_seq_add_stmt (&gseq, assign_stmt);
34061 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
34062 integer_zero_node,
34063 NULL_TREE, NULL_TREE);
34064 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
34065 gimple_set_bb (if_else_stmt, new_bb);
34066 gimple_seq_add_stmt (&gseq, if_else_stmt);
34068 gimple_seq_add_stmt (&gseq, convert_stmt);
34069 gimple_seq_add_stmt (&gseq, return_stmt);
34070 set_bb_seq (new_bb, gseq);
34072 bb1 = new_bb;
34073 e12 = split_block (bb1, if_else_stmt);
34074 bb2 = e12->dest;
34075 e12->flags &= ~EDGE_FALLTHRU;
34076 e12->flags |= EDGE_TRUE_VALUE;
34078 e23 = split_block (bb2, return_stmt);
34080 gimple_set_bb (convert_stmt, bb2);
34081 gimple_set_bb (return_stmt, bb2);
34083 bb3 = e23->dest;
34084 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
34086 remove_edge (e23);
34087 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
34089 pop_cfun ();
34091 return bb3;
34094 /* This parses the attribute arguments to target in DECL and determines
34095 the right builtin to use to match the platform specification.
34096 It returns the priority value for this version decl. If PREDICATE_LIST
34097 is not NULL, it stores the list of cpu features that need to be checked
34098 before dispatching this function. */
34100 static unsigned int
34101 get_builtin_code_for_version (tree decl, tree *predicate_list)
34103 tree attrs;
34104 struct cl_target_option cur_target;
34105 tree target_node;
34106 struct cl_target_option *new_target;
34107 const char *arg_str = NULL;
34108 const char *attrs_str = NULL;
34109 char *tok_str = NULL;
34110 char *token;
34112 /* Priority of i386 features, greater value is higher priority. This is
34113 used to decide the order in which function dispatch must happen. For
34114 instance, a version specialized for SSE4.2 should be checked for dispatch
34115 before a version for SSE3, as SSE4.2 implies SSE3. */
34116 enum feature_priority
34118 P_ZERO = 0,
34119 P_MMX,
34120 P_SSE,
34121 P_SSE2,
34122 P_SSE3,
34123 P_SSSE3,
34124 P_PROC_SSSE3,
34125 P_SSE4_A,
34126 P_PROC_SSE4_A,
34127 P_SSE4_1,
34128 P_SSE4_2,
34129 P_PROC_SSE4_2,
34130 P_POPCNT,
34131 P_AVX,
34132 P_PROC_AVX,
34133 P_FMA4,
34134 P_XOP,
34135 P_PROC_XOP,
34136 P_FMA,
34137 P_PROC_FMA,
34138 P_AVX2,
34139 P_PROC_AVX2
34142 enum feature_priority priority = P_ZERO;
34144 /* These are the target attribute strings for which a dispatcher is
34145 available, from fold_builtin_cpu. */
34147 static struct _feature_list
34149 const char *const name;
34150 const enum feature_priority priority;
34152 const feature_list[] =
34154 {"mmx", P_MMX},
34155 {"sse", P_SSE},
34156 {"sse2", P_SSE2},
34157 {"sse3", P_SSE3},
34158 {"sse4a", P_SSE4_A},
34159 {"ssse3", P_SSSE3},
34160 {"sse4.1", P_SSE4_1},
34161 {"sse4.2", P_SSE4_2},
34162 {"popcnt", P_POPCNT},
34163 {"avx", P_AVX},
34164 {"fma4", P_FMA4},
34165 {"xop", P_XOP},
34166 {"fma", P_FMA},
34167 {"avx2", P_AVX2}
34171 static unsigned int NUM_FEATURES
34172 = sizeof (feature_list) / sizeof (struct _feature_list);
34174 unsigned int i;
34176 tree predicate_chain = NULL_TREE;
34177 tree predicate_decl, predicate_arg;
34179 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34180 gcc_assert (attrs != NULL);
34182 attrs = TREE_VALUE (TREE_VALUE (attrs));
34184 gcc_assert (TREE_CODE (attrs) == STRING_CST);
34185 attrs_str = TREE_STRING_POINTER (attrs);
34187 /* Return priority zero for default function. */
34188 if (strcmp (attrs_str, "default") == 0)
34189 return 0;
34191 /* Handle arch= if specified. For priority, set it to be 1 more than
34192 the best instruction set the processor can handle. For instance, if
34193 there is a version for atom and a version for ssse3 (the highest ISA
34194 priority for atom), the atom version must be checked for dispatch
34195 before the ssse3 version. */
34196 if (strstr (attrs_str, "arch=") != NULL)
34198 cl_target_option_save (&cur_target, &global_options);
34199 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
34200 &global_options_set);
34202 gcc_assert (target_node);
34203 new_target = TREE_TARGET_OPTION (target_node);
34204 gcc_assert (new_target);
34206 if (new_target->arch_specified && new_target->arch > 0)
34208 switch (new_target->arch)
34210 case PROCESSOR_CORE2:
34211 arg_str = "core2";
34212 priority = P_PROC_SSSE3;
34213 break;
34214 case PROCESSOR_NEHALEM:
34215 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
34216 arg_str = "westmere";
34217 else
34218 /* We translate "arch=corei7" and "arch=nehalem" to
34219 "corei7" so that it will be mapped to M_INTEL_COREI7
34220 as cpu type to cover all M_INTEL_COREI7_XXXs. */
34221 arg_str = "corei7";
34222 priority = P_PROC_SSE4_2;
34223 break;
34224 case PROCESSOR_SANDYBRIDGE:
34225 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
34226 arg_str = "ivybridge";
34227 else
34228 arg_str = "sandybridge";
34229 priority = P_PROC_AVX;
34230 break;
34231 case PROCESSOR_HASWELL:
34232 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
34233 arg_str = "broadwell";
34234 else
34235 arg_str = "haswell";
34236 priority = P_PROC_AVX2;
34237 break;
34238 case PROCESSOR_BONNELL:
34239 arg_str = "bonnell";
34240 priority = P_PROC_SSSE3;
34241 break;
34242 case PROCESSOR_SILVERMONT:
34243 arg_str = "silvermont";
34244 priority = P_PROC_SSE4_2;
34245 break;
34246 case PROCESSOR_AMDFAM10:
34247 arg_str = "amdfam10h";
34248 priority = P_PROC_SSE4_A;
34249 break;
34250 case PROCESSOR_BTVER1:
34251 arg_str = "btver1";
34252 priority = P_PROC_SSE4_A;
34253 break;
34254 case PROCESSOR_BTVER2:
34255 arg_str = "btver2";
34256 priority = P_PROC_AVX;
34257 break;
34258 case PROCESSOR_BDVER1:
34259 arg_str = "bdver1";
34260 priority = P_PROC_XOP;
34261 break;
34262 case PROCESSOR_BDVER2:
34263 arg_str = "bdver2";
34264 priority = P_PROC_FMA;
34265 break;
34266 case PROCESSOR_BDVER3:
34267 arg_str = "bdver3";
34268 priority = P_PROC_FMA;
34269 break;
34270 case PROCESSOR_BDVER4:
34271 arg_str = "bdver4";
34272 priority = P_PROC_AVX2;
34273 break;
34277 cl_target_option_restore (&global_options, &cur_target);
34279 if (predicate_list && arg_str == NULL)
34281 error_at (DECL_SOURCE_LOCATION (decl),
34282 "No dispatcher found for the versioning attributes");
34283 return 0;
34286 if (predicate_list)
34288 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
34289 /* For a C string literal the length includes the trailing NULL. */
34290 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
34291 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34292 predicate_chain);
34296 /* Process feature name. */
34297 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
34298 strcpy (tok_str, attrs_str);
34299 token = strtok (tok_str, ",");
34300 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
34302 while (token != NULL)
34304 /* Do not process "arch=" */
34305 if (strncmp (token, "arch=", 5) == 0)
34307 token = strtok (NULL, ",");
34308 continue;
34310 for (i = 0; i < NUM_FEATURES; ++i)
34312 if (strcmp (token, feature_list[i].name) == 0)
34314 if (predicate_list)
34316 predicate_arg = build_string_literal (
34317 strlen (feature_list[i].name) + 1,
34318 feature_list[i].name);
34319 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34320 predicate_chain);
34322 /* Find the maximum priority feature. */
34323 if (feature_list[i].priority > priority)
34324 priority = feature_list[i].priority;
34326 break;
34329 if (predicate_list && i == NUM_FEATURES)
34331 error_at (DECL_SOURCE_LOCATION (decl),
34332 "No dispatcher found for %s", token);
34333 return 0;
34335 token = strtok (NULL, ",");
34337 free (tok_str);
34339 if (predicate_list && predicate_chain == NULL_TREE)
34341 error_at (DECL_SOURCE_LOCATION (decl),
34342 "No dispatcher found for the versioning attributes : %s",
34343 attrs_str);
34344 return 0;
34346 else if (predicate_list)
34348 predicate_chain = nreverse (predicate_chain);
34349 *predicate_list = predicate_chain;
34352 return priority;
34355 /* This compares the priority of target features in function DECL1
34356 and DECL2. It returns positive value if DECL1 is higher priority,
34357 negative value if DECL2 is higher priority and 0 if they are the
34358 same. */
34360 static int
34361 ix86_compare_version_priority (tree decl1, tree decl2)
34363 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
34364 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
34366 return (int)priority1 - (int)priority2;
34369 /* V1 and V2 point to function versions with different priorities
34370 based on the target ISA. This function compares their priorities. */
34372 static int
34373 feature_compare (const void *v1, const void *v2)
34375 typedef struct _function_version_info
34377 tree version_decl;
34378 tree predicate_chain;
34379 unsigned int dispatch_priority;
34380 } function_version_info;
34382 const function_version_info c1 = *(const function_version_info *)v1;
34383 const function_version_info c2 = *(const function_version_info *)v2;
34384 return (c2.dispatch_priority - c1.dispatch_priority);
34387 /* This function generates the dispatch function for
34388 multi-versioned functions. DISPATCH_DECL is the function which will
34389 contain the dispatch logic. FNDECLS are the function choices for
34390 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
34391 in DISPATCH_DECL in which the dispatch code is generated. */
34393 static int
34394 dispatch_function_versions (tree dispatch_decl,
34395 void *fndecls_p,
34396 basic_block *empty_bb)
34398 tree default_decl;
34399 gimple ifunc_cpu_init_stmt;
34400 gimple_seq gseq;
34401 int ix;
34402 tree ele;
34403 vec<tree> *fndecls;
34404 unsigned int num_versions = 0;
34405 unsigned int actual_versions = 0;
34406 unsigned int i;
34408 struct _function_version_info
34410 tree version_decl;
34411 tree predicate_chain;
34412 unsigned int dispatch_priority;
34413 }*function_version_info;
34415 gcc_assert (dispatch_decl != NULL
34416 && fndecls_p != NULL
34417 && empty_bb != NULL);
34419 /*fndecls_p is actually a vector. */
34420 fndecls = static_cast<vec<tree> *> (fndecls_p);
34422 /* At least one more version other than the default. */
34423 num_versions = fndecls->length ();
34424 gcc_assert (num_versions >= 2);
34426 function_version_info = (struct _function_version_info *)
34427 XNEWVEC (struct _function_version_info, (num_versions - 1));
34429 /* The first version in the vector is the default decl. */
34430 default_decl = (*fndecls)[0];
34432 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
34434 gseq = bb_seq (*empty_bb);
34435 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
34436 constructors, so explicity call __builtin_cpu_init here. */
34437 ifunc_cpu_init_stmt = gimple_build_call_vec (
34438 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
34439 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
34440 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
34441 set_bb_seq (*empty_bb, gseq);
34443 pop_cfun ();
34446 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
34448 tree version_decl = ele;
34449 tree predicate_chain = NULL_TREE;
34450 unsigned int priority;
34451 /* Get attribute string, parse it and find the right predicate decl.
34452 The predicate function could be a lengthy combination of many
34453 features, like arch-type and various isa-variants. */
34454 priority = get_builtin_code_for_version (version_decl,
34455 &predicate_chain);
34457 if (predicate_chain == NULL_TREE)
34458 continue;
34460 function_version_info [actual_versions].version_decl = version_decl;
34461 function_version_info [actual_versions].predicate_chain
34462 = predicate_chain;
34463 function_version_info [actual_versions].dispatch_priority = priority;
34464 actual_versions++;
34467 /* Sort the versions according to descending order of dispatch priority. The
34468 priority is based on the ISA. This is not a perfect solution. There
34469 could still be ambiguity. If more than one function version is suitable
34470 to execute, which one should be dispatched? In future, allow the user
34471 to specify a dispatch priority next to the version. */
34472 qsort (function_version_info, actual_versions,
34473 sizeof (struct _function_version_info), feature_compare);
34475 for (i = 0; i < actual_versions; ++i)
34476 *empty_bb = add_condition_to_bb (dispatch_decl,
34477 function_version_info[i].version_decl,
34478 function_version_info[i].predicate_chain,
34479 *empty_bb);
34481 /* dispatch default version at the end. */
34482 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
34483 NULL, *empty_bb);
34485 free (function_version_info);
34486 return 0;
34489 /* Comparator function to be used in qsort routine to sort attribute
34490 specification strings to "target". */
34492 static int
34493 attr_strcmp (const void *v1, const void *v2)
34495 const char *c1 = *(char *const*)v1;
34496 const char *c2 = *(char *const*)v2;
34497 return strcmp (c1, c2);
34500 /* ARGLIST is the argument to target attribute. This function tokenizes
34501 the comma separated arguments, sorts them and returns a string which
34502 is a unique identifier for the comma separated arguments. It also
34503 replaces non-identifier characters "=,-" with "_". */
34505 static char *
34506 sorted_attr_string (tree arglist)
34508 tree arg;
34509 size_t str_len_sum = 0;
34510 char **args = NULL;
34511 char *attr_str, *ret_str;
34512 char *attr = NULL;
34513 unsigned int argnum = 1;
34514 unsigned int i;
34516 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34518 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34519 size_t len = strlen (str);
34520 str_len_sum += len + 1;
34521 if (arg != arglist)
34522 argnum++;
34523 for (i = 0; i < strlen (str); i++)
34524 if (str[i] == ',')
34525 argnum++;
34528 attr_str = XNEWVEC (char, str_len_sum);
34529 str_len_sum = 0;
34530 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34532 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34533 size_t len = strlen (str);
34534 memcpy (attr_str + str_len_sum, str, len);
34535 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
34536 str_len_sum += len + 1;
34539 /* Replace "=,-" with "_". */
34540 for (i = 0; i < strlen (attr_str); i++)
34541 if (attr_str[i] == '=' || attr_str[i]== '-')
34542 attr_str[i] = '_';
34544 if (argnum == 1)
34545 return attr_str;
34547 args = XNEWVEC (char *, argnum);
34549 i = 0;
34550 attr = strtok (attr_str, ",");
34551 while (attr != NULL)
34553 args[i] = attr;
34554 i++;
34555 attr = strtok (NULL, ",");
34558 qsort (args, argnum, sizeof (char *), attr_strcmp);
34560 ret_str = XNEWVEC (char, str_len_sum);
34561 str_len_sum = 0;
34562 for (i = 0; i < argnum; i++)
34564 size_t len = strlen (args[i]);
34565 memcpy (ret_str + str_len_sum, args[i], len);
34566 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
34567 str_len_sum += len + 1;
34570 XDELETEVEC (args);
34571 XDELETEVEC (attr_str);
34572 return ret_str;
34575 /* This function changes the assembler name for functions that are
34576 versions. If DECL is a function version and has a "target"
34577 attribute, it appends the attribute string to its assembler name. */
34579 static tree
34580 ix86_mangle_function_version_assembler_name (tree decl, tree id)
34582 tree version_attr;
34583 const char *orig_name, *version_string;
34584 char *attr_str, *assembler_name;
34586 if (DECL_DECLARED_INLINE_P (decl)
34587 && lookup_attribute ("gnu_inline",
34588 DECL_ATTRIBUTES (decl)))
34589 error_at (DECL_SOURCE_LOCATION (decl),
34590 "Function versions cannot be marked as gnu_inline,"
34591 " bodies have to be generated");
34593 if (DECL_VIRTUAL_P (decl)
34594 || DECL_VINDEX (decl))
34595 sorry ("Virtual function multiversioning not supported");
34597 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34599 /* target attribute string cannot be NULL. */
34600 gcc_assert (version_attr != NULL_TREE);
34602 orig_name = IDENTIFIER_POINTER (id);
34603 version_string
34604 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
34606 if (strcmp (version_string, "default") == 0)
34607 return id;
34609 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
34610 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
34612 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
34614 /* Allow assembler name to be modified if already set. */
34615 if (DECL_ASSEMBLER_NAME_SET_P (decl))
34616 SET_DECL_RTL (decl, NULL);
34618 tree ret = get_identifier (assembler_name);
34619 XDELETEVEC (attr_str);
34620 XDELETEVEC (assembler_name);
34621 return ret;
34624 /* This function returns true if FN1 and FN2 are versions of the same function,
34625 that is, the target strings of the function decls are different. This assumes
34626 that FN1 and FN2 have the same signature. */
34628 static bool
34629 ix86_function_versions (tree fn1, tree fn2)
34631 tree attr1, attr2;
34632 char *target1, *target2;
34633 bool result;
34635 if (TREE_CODE (fn1) != FUNCTION_DECL
34636 || TREE_CODE (fn2) != FUNCTION_DECL)
34637 return false;
34639 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
34640 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
34642 /* At least one function decl should have the target attribute specified. */
34643 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
34644 return false;
34646 /* Diagnose missing target attribute if one of the decls is already
34647 multi-versioned. */
34648 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
34650 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
34652 if (attr2 != NULL_TREE)
34654 tree tem = fn1;
34655 fn1 = fn2;
34656 fn2 = tem;
34657 attr1 = attr2;
34659 error_at (DECL_SOURCE_LOCATION (fn2),
34660 "missing %<target%> attribute for multi-versioned %D",
34661 fn2);
34662 inform (DECL_SOURCE_LOCATION (fn1),
34663 "previous declaration of %D", fn1);
34664 /* Prevent diagnosing of the same error multiple times. */
34665 DECL_ATTRIBUTES (fn2)
34666 = tree_cons (get_identifier ("target"),
34667 copy_node (TREE_VALUE (attr1)),
34668 DECL_ATTRIBUTES (fn2));
34670 return false;
34673 target1 = sorted_attr_string (TREE_VALUE (attr1));
34674 target2 = sorted_attr_string (TREE_VALUE (attr2));
34676 /* The sorted target strings must be different for fn1 and fn2
34677 to be versions. */
34678 if (strcmp (target1, target2) == 0)
34679 result = false;
34680 else
34681 result = true;
34683 XDELETEVEC (target1);
34684 XDELETEVEC (target2);
34686 return result;
34689 static tree
34690 ix86_mangle_decl_assembler_name (tree decl, tree id)
34692 /* For function version, add the target suffix to the assembler name. */
34693 if (TREE_CODE (decl) == FUNCTION_DECL
34694 && DECL_FUNCTION_VERSIONED (decl))
34695 id = ix86_mangle_function_version_assembler_name (decl, id);
34696 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
34697 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
34698 #endif
34700 return id;
34703 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
34704 is true, append the full path name of the source file. */
34706 static char *
34707 make_name (tree decl, const char *suffix, bool make_unique)
34709 char *global_var_name;
34710 int name_len;
34711 const char *name;
34712 const char *unique_name = NULL;
34714 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
34716 /* Get a unique name that can be used globally without any chances
34717 of collision at link time. */
34718 if (make_unique)
34719 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
34721 name_len = strlen (name) + strlen (suffix) + 2;
34723 if (make_unique)
34724 name_len += strlen (unique_name) + 1;
34725 global_var_name = XNEWVEC (char, name_len);
34727 /* Use '.' to concatenate names as it is demangler friendly. */
34728 if (make_unique)
34729 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
34730 suffix);
34731 else
34732 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
34734 return global_var_name;
34737 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
34739 /* Make a dispatcher declaration for the multi-versioned function DECL.
34740 Calls to DECL function will be replaced with calls to the dispatcher
34741 by the front-end. Return the decl created. */
34743 static tree
34744 make_dispatcher_decl (const tree decl)
34746 tree func_decl;
34747 char *func_name;
34748 tree fn_type, func_type;
34749 bool is_uniq = false;
34751 if (TREE_PUBLIC (decl) == 0)
34752 is_uniq = true;
34754 func_name = make_name (decl, "ifunc", is_uniq);
34756 fn_type = TREE_TYPE (decl);
34757 func_type = build_function_type (TREE_TYPE (fn_type),
34758 TYPE_ARG_TYPES (fn_type));
34760 func_decl = build_fn_decl (func_name, func_type);
34761 XDELETEVEC (func_name);
34762 TREE_USED (func_decl) = 1;
34763 DECL_CONTEXT (func_decl) = NULL_TREE;
34764 DECL_INITIAL (func_decl) = error_mark_node;
34765 DECL_ARTIFICIAL (func_decl) = 1;
34766 /* Mark this func as external, the resolver will flip it again if
34767 it gets generated. */
34768 DECL_EXTERNAL (func_decl) = 1;
34769 /* This will be of type IFUNCs have to be externally visible. */
34770 TREE_PUBLIC (func_decl) = 1;
34772 return func_decl;
34775 #endif
34777 /* Returns true if decl is multi-versioned and DECL is the default function,
34778 that is it is not tagged with target specific optimization. */
34780 static bool
34781 is_function_default_version (const tree decl)
34783 if (TREE_CODE (decl) != FUNCTION_DECL
34784 || !DECL_FUNCTION_VERSIONED (decl))
34785 return false;
34786 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34787 gcc_assert (attr);
34788 attr = TREE_VALUE (TREE_VALUE (attr));
34789 return (TREE_CODE (attr) == STRING_CST
34790 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
34793 /* Make a dispatcher declaration for the multi-versioned function DECL.
34794 Calls to DECL function will be replaced with calls to the dispatcher
34795 by the front-end. Returns the decl of the dispatcher function. */
34797 static tree
34798 ix86_get_function_versions_dispatcher (void *decl)
34800 tree fn = (tree) decl;
34801 struct cgraph_node *node = NULL;
34802 struct cgraph_node *default_node = NULL;
34803 struct cgraph_function_version_info *node_v = NULL;
34804 struct cgraph_function_version_info *first_v = NULL;
34806 tree dispatch_decl = NULL;
34808 struct cgraph_function_version_info *default_version_info = NULL;
34810 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
34812 node = cgraph_node::get (fn);
34813 gcc_assert (node != NULL);
34815 node_v = node->function_version ();
34816 gcc_assert (node_v != NULL);
34818 if (node_v->dispatcher_resolver != NULL)
34819 return node_v->dispatcher_resolver;
34821 /* Find the default version and make it the first node. */
34822 first_v = node_v;
34823 /* Go to the beginning of the chain. */
34824 while (first_v->prev != NULL)
34825 first_v = first_v->prev;
34826 default_version_info = first_v;
34827 while (default_version_info != NULL)
34829 if (is_function_default_version
34830 (default_version_info->this_node->decl))
34831 break;
34832 default_version_info = default_version_info->next;
34835 /* If there is no default node, just return NULL. */
34836 if (default_version_info == NULL)
34837 return NULL;
34839 /* Make default info the first node. */
34840 if (first_v != default_version_info)
34842 default_version_info->prev->next = default_version_info->next;
34843 if (default_version_info->next)
34844 default_version_info->next->prev = default_version_info->prev;
34845 first_v->prev = default_version_info;
34846 default_version_info->next = first_v;
34847 default_version_info->prev = NULL;
34850 default_node = default_version_info->this_node;
34852 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
34853 if (targetm.has_ifunc_p ())
34855 struct cgraph_function_version_info *it_v = NULL;
34856 struct cgraph_node *dispatcher_node = NULL;
34857 struct cgraph_function_version_info *dispatcher_version_info = NULL;
34859 /* Right now, the dispatching is done via ifunc. */
34860 dispatch_decl = make_dispatcher_decl (default_node->decl);
34862 dispatcher_node = cgraph_node::get_create (dispatch_decl);
34863 gcc_assert (dispatcher_node != NULL);
34864 dispatcher_node->dispatcher_function = 1;
34865 dispatcher_version_info
34866 = dispatcher_node->insert_new_function_version ();
34867 dispatcher_version_info->next = default_version_info;
34868 dispatcher_node->definition = 1;
34870 /* Set the dispatcher for all the versions. */
34871 it_v = default_version_info;
34872 while (it_v != NULL)
34874 it_v->dispatcher_resolver = dispatch_decl;
34875 it_v = it_v->next;
34878 else
34879 #endif
34881 error_at (DECL_SOURCE_LOCATION (default_node->decl),
34882 "multiversioning needs ifunc which is not supported "
34883 "on this target");
34886 return dispatch_decl;
34889 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
34890 it to CHAIN. */
34892 static tree
34893 make_attribute (const char *name, const char *arg_name, tree chain)
34895 tree attr_name;
34896 tree attr_arg_name;
34897 tree attr_args;
34898 tree attr;
34900 attr_name = get_identifier (name);
34901 attr_arg_name = build_string (strlen (arg_name), arg_name);
34902 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
34903 attr = tree_cons (attr_name, attr_args, chain);
34904 return attr;
34907 /* Make the resolver function decl to dispatch the versions of
34908 a multi-versioned function, DEFAULT_DECL. Create an
34909 empty basic block in the resolver and store the pointer in
34910 EMPTY_BB. Return the decl of the resolver function. */
34912 static tree
34913 make_resolver_func (const tree default_decl,
34914 const tree dispatch_decl,
34915 basic_block *empty_bb)
34917 char *resolver_name;
34918 tree decl, type, decl_name, t;
34919 bool is_uniq = false;
34921 /* IFUNC's have to be globally visible. So, if the default_decl is
34922 not, then the name of the IFUNC should be made unique. */
34923 if (TREE_PUBLIC (default_decl) == 0)
34924 is_uniq = true;
34926 /* Append the filename to the resolver function if the versions are
34927 not externally visible. This is because the resolver function has
34928 to be externally visible for the loader to find it. So, appending
34929 the filename will prevent conflicts with a resolver function from
34930 another module which is based on the same version name. */
34931 resolver_name = make_name (default_decl, "resolver", is_uniq);
34933 /* The resolver function should return a (void *). */
34934 type = build_function_type_list (ptr_type_node, NULL_TREE);
34936 decl = build_fn_decl (resolver_name, type);
34937 decl_name = get_identifier (resolver_name);
34938 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
34940 DECL_NAME (decl) = decl_name;
34941 TREE_USED (decl) = 1;
34942 DECL_ARTIFICIAL (decl) = 1;
34943 DECL_IGNORED_P (decl) = 0;
34944 /* IFUNC resolvers have to be externally visible. */
34945 TREE_PUBLIC (decl) = 1;
34946 DECL_UNINLINABLE (decl) = 1;
34948 /* Resolver is not external, body is generated. */
34949 DECL_EXTERNAL (decl) = 0;
34950 DECL_EXTERNAL (dispatch_decl) = 0;
34952 DECL_CONTEXT (decl) = NULL_TREE;
34953 DECL_INITIAL (decl) = make_node (BLOCK);
34954 DECL_STATIC_CONSTRUCTOR (decl) = 0;
34956 if (DECL_COMDAT_GROUP (default_decl)
34957 || TREE_PUBLIC (default_decl))
34959 /* In this case, each translation unit with a call to this
34960 versioned function will put out a resolver. Ensure it
34961 is comdat to keep just one copy. */
34962 DECL_COMDAT (decl) = 1;
34963 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
34965 /* Build result decl and add to function_decl. */
34966 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
34967 DECL_ARTIFICIAL (t) = 1;
34968 DECL_IGNORED_P (t) = 1;
34969 DECL_RESULT (decl) = t;
34971 gimplify_function_tree (decl);
34972 push_cfun (DECL_STRUCT_FUNCTION (decl));
34973 *empty_bb = init_lowered_empty_function (decl, false);
34975 cgraph_node::add_new_function (decl, true);
34976 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
34978 pop_cfun ();
34980 gcc_assert (dispatch_decl != NULL);
34981 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
34982 DECL_ATTRIBUTES (dispatch_decl)
34983 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
34985 /* Create the alias for dispatch to resolver here. */
34986 /*cgraph_create_function_alias (dispatch_decl, decl);*/
34987 cgraph_node::create_same_body_alias (dispatch_decl, decl);
34988 XDELETEVEC (resolver_name);
34989 return decl;
34992 /* Generate the dispatching code body to dispatch multi-versioned function
34993 DECL. The target hook is called to process the "target" attributes and
34994 provide the code to dispatch the right function at run-time. NODE points
34995 to the dispatcher decl whose body will be created. */
34997 static tree
34998 ix86_generate_version_dispatcher_body (void *node_p)
35000 tree resolver_decl;
35001 basic_block empty_bb;
35002 tree default_ver_decl;
35003 struct cgraph_node *versn;
35004 struct cgraph_node *node;
35006 struct cgraph_function_version_info *node_version_info = NULL;
35007 struct cgraph_function_version_info *versn_info = NULL;
35009 node = (cgraph_node *)node_p;
35011 node_version_info = node->function_version ();
35012 gcc_assert (node->dispatcher_function
35013 && node_version_info != NULL);
35015 if (node_version_info->dispatcher_resolver)
35016 return node_version_info->dispatcher_resolver;
35018 /* The first version in the chain corresponds to the default version. */
35019 default_ver_decl = node_version_info->next->this_node->decl;
35021 /* node is going to be an alias, so remove the finalized bit. */
35022 node->definition = false;
35024 resolver_decl = make_resolver_func (default_ver_decl,
35025 node->decl, &empty_bb);
35027 node_version_info->dispatcher_resolver = resolver_decl;
35029 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
35031 auto_vec<tree, 2> fn_ver_vec;
35033 for (versn_info = node_version_info->next; versn_info;
35034 versn_info = versn_info->next)
35036 versn = versn_info->this_node;
35037 /* Check for virtual functions here again, as by this time it should
35038 have been determined if this function needs a vtable index or
35039 not. This happens for methods in derived classes that override
35040 virtual methods in base classes but are not explicitly marked as
35041 virtual. */
35042 if (DECL_VINDEX (versn->decl))
35043 sorry ("Virtual function multiversioning not supported");
35045 fn_ver_vec.safe_push (versn->decl);
35048 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
35049 cgraph_edge::rebuild_edges ();
35050 pop_cfun ();
35051 return resolver_decl;
35053 /* This builds the processor_model struct type defined in
35054 libgcc/config/i386/cpuinfo.c */
35056 static tree
35057 build_processor_model_struct (void)
35059 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
35060 "__cpu_features"};
35061 tree field = NULL_TREE, field_chain = NULL_TREE;
35062 int i;
35063 tree type = make_node (RECORD_TYPE);
35065 /* The first 3 fields are unsigned int. */
35066 for (i = 0; i < 3; ++i)
35068 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35069 get_identifier (field_name[i]), unsigned_type_node);
35070 if (field_chain != NULL_TREE)
35071 DECL_CHAIN (field) = field_chain;
35072 field_chain = field;
35075 /* The last field is an array of unsigned integers of size one. */
35076 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35077 get_identifier (field_name[3]),
35078 build_array_type (unsigned_type_node,
35079 build_index_type (size_one_node)));
35080 if (field_chain != NULL_TREE)
35081 DECL_CHAIN (field) = field_chain;
35082 field_chain = field;
35084 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
35085 return type;
35088 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
35090 static tree
35091 make_var_decl (tree type, const char *name)
35093 tree new_decl;
35095 new_decl = build_decl (UNKNOWN_LOCATION,
35096 VAR_DECL,
35097 get_identifier(name),
35098 type);
35100 DECL_EXTERNAL (new_decl) = 1;
35101 TREE_STATIC (new_decl) = 1;
35102 TREE_PUBLIC (new_decl) = 1;
35103 DECL_INITIAL (new_decl) = 0;
35104 DECL_ARTIFICIAL (new_decl) = 0;
35105 DECL_PRESERVE_P (new_decl) = 1;
35107 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
35108 assemble_variable (new_decl, 0, 0, 0);
35110 return new_decl;
35113 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
35114 into an integer defined in libgcc/config/i386/cpuinfo.c */
35116 static tree
35117 fold_builtin_cpu (tree fndecl, tree *args)
35119 unsigned int i;
35120 enum ix86_builtins fn_code = (enum ix86_builtins)
35121 DECL_FUNCTION_CODE (fndecl);
35122 tree param_string_cst = NULL;
35124 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
35125 enum processor_features
35127 F_CMOV = 0,
35128 F_MMX,
35129 F_POPCNT,
35130 F_SSE,
35131 F_SSE2,
35132 F_SSE3,
35133 F_SSSE3,
35134 F_SSE4_1,
35135 F_SSE4_2,
35136 F_AVX,
35137 F_AVX2,
35138 F_SSE4_A,
35139 F_FMA4,
35140 F_XOP,
35141 F_FMA,
35142 F_MAX
35145 /* These are the values for vendor types and cpu types and subtypes
35146 in cpuinfo.c. Cpu types and subtypes should be subtracted by
35147 the corresponding start value. */
35148 enum processor_model
35150 M_INTEL = 1,
35151 M_AMD,
35152 M_CPU_TYPE_START,
35153 M_INTEL_BONNELL,
35154 M_INTEL_CORE2,
35155 M_INTEL_COREI7,
35156 M_AMDFAM10H,
35157 M_AMDFAM15H,
35158 M_INTEL_SILVERMONT,
35159 M_AMD_BTVER1,
35160 M_AMD_BTVER2,
35161 M_CPU_SUBTYPE_START,
35162 M_INTEL_COREI7_NEHALEM,
35163 M_INTEL_COREI7_WESTMERE,
35164 M_INTEL_COREI7_SANDYBRIDGE,
35165 M_AMDFAM10H_BARCELONA,
35166 M_AMDFAM10H_SHANGHAI,
35167 M_AMDFAM10H_ISTANBUL,
35168 M_AMDFAM15H_BDVER1,
35169 M_AMDFAM15H_BDVER2,
35170 M_AMDFAM15H_BDVER3,
35171 M_AMDFAM15H_BDVER4,
35172 M_INTEL_COREI7_IVYBRIDGE,
35173 M_INTEL_COREI7_HASWELL
35176 static struct _arch_names_table
35178 const char *const name;
35179 const enum processor_model model;
35181 const arch_names_table[] =
35183 {"amd", M_AMD},
35184 {"intel", M_INTEL},
35185 {"atom", M_INTEL_BONNELL},
35186 {"slm", M_INTEL_SILVERMONT},
35187 {"core2", M_INTEL_CORE2},
35188 {"corei7", M_INTEL_COREI7},
35189 {"nehalem", M_INTEL_COREI7_NEHALEM},
35190 {"westmere", M_INTEL_COREI7_WESTMERE},
35191 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
35192 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
35193 {"haswell", M_INTEL_COREI7_HASWELL},
35194 {"bonnell", M_INTEL_BONNELL},
35195 {"silvermont", M_INTEL_SILVERMONT},
35196 {"amdfam10h", M_AMDFAM10H},
35197 {"barcelona", M_AMDFAM10H_BARCELONA},
35198 {"shanghai", M_AMDFAM10H_SHANGHAI},
35199 {"istanbul", M_AMDFAM10H_ISTANBUL},
35200 {"btver1", M_AMD_BTVER1},
35201 {"amdfam15h", M_AMDFAM15H},
35202 {"bdver1", M_AMDFAM15H_BDVER1},
35203 {"bdver2", M_AMDFAM15H_BDVER2},
35204 {"bdver3", M_AMDFAM15H_BDVER3},
35205 {"bdver4", M_AMDFAM15H_BDVER4},
35206 {"btver2", M_AMD_BTVER2},
35209 static struct _isa_names_table
35211 const char *const name;
35212 const enum processor_features feature;
35214 const isa_names_table[] =
35216 {"cmov", F_CMOV},
35217 {"mmx", F_MMX},
35218 {"popcnt", F_POPCNT},
35219 {"sse", F_SSE},
35220 {"sse2", F_SSE2},
35221 {"sse3", F_SSE3},
35222 {"ssse3", F_SSSE3},
35223 {"sse4a", F_SSE4_A},
35224 {"sse4.1", F_SSE4_1},
35225 {"sse4.2", F_SSE4_2},
35226 {"avx", F_AVX},
35227 {"fma4", F_FMA4},
35228 {"xop", F_XOP},
35229 {"fma", F_FMA},
35230 {"avx2", F_AVX2}
35233 tree __processor_model_type = build_processor_model_struct ();
35234 tree __cpu_model_var = make_var_decl (__processor_model_type,
35235 "__cpu_model");
35238 varpool_node::add (__cpu_model_var);
35240 gcc_assert ((args != NULL) && (*args != NULL));
35242 param_string_cst = *args;
35243 while (param_string_cst
35244 && TREE_CODE (param_string_cst) != STRING_CST)
35246 /* *args must be a expr that can contain other EXPRS leading to a
35247 STRING_CST. */
35248 if (!EXPR_P (param_string_cst))
35250 error ("Parameter to builtin must be a string constant or literal");
35251 return integer_zero_node;
35253 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
35256 gcc_assert (param_string_cst);
35258 if (fn_code == IX86_BUILTIN_CPU_IS)
35260 tree ref;
35261 tree field;
35262 tree final;
35264 unsigned int field_val = 0;
35265 unsigned int NUM_ARCH_NAMES
35266 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
35268 for (i = 0; i < NUM_ARCH_NAMES; i++)
35269 if (strcmp (arch_names_table[i].name,
35270 TREE_STRING_POINTER (param_string_cst)) == 0)
35271 break;
35273 if (i == NUM_ARCH_NAMES)
35275 error ("Parameter to builtin not valid: %s",
35276 TREE_STRING_POINTER (param_string_cst));
35277 return integer_zero_node;
35280 field = TYPE_FIELDS (__processor_model_type);
35281 field_val = arch_names_table[i].model;
35283 /* CPU types are stored in the next field. */
35284 if (field_val > M_CPU_TYPE_START
35285 && field_val < M_CPU_SUBTYPE_START)
35287 field = DECL_CHAIN (field);
35288 field_val -= M_CPU_TYPE_START;
35291 /* CPU subtypes are stored in the next field. */
35292 if (field_val > M_CPU_SUBTYPE_START)
35294 field = DECL_CHAIN ( DECL_CHAIN (field));
35295 field_val -= M_CPU_SUBTYPE_START;
35298 /* Get the appropriate field in __cpu_model. */
35299 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35300 field, NULL_TREE);
35302 /* Check the value. */
35303 final = build2 (EQ_EXPR, unsigned_type_node, ref,
35304 build_int_cstu (unsigned_type_node, field_val));
35305 return build1 (CONVERT_EXPR, integer_type_node, final);
35307 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35309 tree ref;
35310 tree array_elt;
35311 tree field;
35312 tree final;
35314 unsigned int field_val = 0;
35315 unsigned int NUM_ISA_NAMES
35316 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
35318 for (i = 0; i < NUM_ISA_NAMES; i++)
35319 if (strcmp (isa_names_table[i].name,
35320 TREE_STRING_POINTER (param_string_cst)) == 0)
35321 break;
35323 if (i == NUM_ISA_NAMES)
35325 error ("Parameter to builtin not valid: %s",
35326 TREE_STRING_POINTER (param_string_cst));
35327 return integer_zero_node;
35330 field = TYPE_FIELDS (__processor_model_type);
35331 /* Get the last field, which is __cpu_features. */
35332 while (DECL_CHAIN (field))
35333 field = DECL_CHAIN (field);
35335 /* Get the appropriate field: __cpu_model.__cpu_features */
35336 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35337 field, NULL_TREE);
35339 /* Access the 0th element of __cpu_features array. */
35340 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
35341 integer_zero_node, NULL_TREE, NULL_TREE);
35343 field_val = (1 << isa_names_table[i].feature);
35344 /* Return __cpu_model.__cpu_features[0] & field_val */
35345 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
35346 build_int_cstu (unsigned_type_node, field_val));
35347 return build1 (CONVERT_EXPR, integer_type_node, final);
35349 gcc_unreachable ();
35352 static tree
35353 ix86_fold_builtin (tree fndecl, int n_args,
35354 tree *args, bool ignore ATTRIBUTE_UNUSED)
35356 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35358 enum ix86_builtins fn_code = (enum ix86_builtins)
35359 DECL_FUNCTION_CODE (fndecl);
35360 if (fn_code == IX86_BUILTIN_CPU_IS
35361 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35363 gcc_assert (n_args == 1);
35364 return fold_builtin_cpu (fndecl, args);
35368 #ifdef SUBTARGET_FOLD_BUILTIN
35369 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
35370 #endif
35372 return NULL_TREE;
35375 /* Make builtins to detect cpu type and features supported. NAME is
35376 the builtin name, CODE is the builtin code, and FTYPE is the function
35377 type of the builtin. */
35379 static void
35380 make_cpu_type_builtin (const char* name, int code,
35381 enum ix86_builtin_func_type ftype, bool is_const)
35383 tree decl;
35384 tree type;
35386 type = ix86_get_builtin_func_type (ftype);
35387 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
35388 NULL, NULL_TREE);
35389 gcc_assert (decl != NULL_TREE);
35390 ix86_builtins[(int) code] = decl;
35391 TREE_READONLY (decl) = is_const;
35394 /* Make builtins to get CPU type and features supported. The created
35395 builtins are :
35397 __builtin_cpu_init (), to detect cpu type and features,
35398 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
35399 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
35402 static void
35403 ix86_init_platform_type_builtins (void)
35405 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
35406 INT_FTYPE_VOID, false);
35407 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
35408 INT_FTYPE_PCCHAR, true);
35409 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
35410 INT_FTYPE_PCCHAR, true);
35413 /* Internal method for ix86_init_builtins. */
35415 static void
35416 ix86_init_builtins_va_builtins_abi (void)
35418 tree ms_va_ref, sysv_va_ref;
35419 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
35420 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
35421 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
35422 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
35424 if (!TARGET_64BIT)
35425 return;
35426 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
35427 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
35428 ms_va_ref = build_reference_type (ms_va_list_type_node);
35429 sysv_va_ref =
35430 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
35432 fnvoid_va_end_ms =
35433 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35434 fnvoid_va_start_ms =
35435 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35436 fnvoid_va_end_sysv =
35437 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
35438 fnvoid_va_start_sysv =
35439 build_varargs_function_type_list (void_type_node, sysv_va_ref,
35440 NULL_TREE);
35441 fnvoid_va_copy_ms =
35442 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
35443 NULL_TREE);
35444 fnvoid_va_copy_sysv =
35445 build_function_type_list (void_type_node, sysv_va_ref,
35446 sysv_va_ref, NULL_TREE);
35448 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
35449 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
35450 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
35451 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
35452 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
35453 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
35454 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
35455 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35456 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
35457 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35458 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
35459 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35462 static void
35463 ix86_init_builtin_types (void)
35465 tree float128_type_node, float80_type_node;
35467 /* The __float80 type. */
35468 float80_type_node = long_double_type_node;
35469 if (TYPE_MODE (float80_type_node) != XFmode)
35471 /* The __float80 type. */
35472 float80_type_node = make_node (REAL_TYPE);
35474 TYPE_PRECISION (float80_type_node) = 80;
35475 layout_type (float80_type_node);
35477 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
35479 /* The __float128 type. */
35480 float128_type_node = make_node (REAL_TYPE);
35481 TYPE_PRECISION (float128_type_node) = 128;
35482 layout_type (float128_type_node);
35483 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
35485 /* This macro is built by i386-builtin-types.awk. */
35486 DEFINE_BUILTIN_PRIMITIVE_TYPES;
35489 static void
35490 ix86_init_builtins (void)
35492 tree t;
35494 ix86_init_builtin_types ();
35496 /* Builtins to get CPU type and features. */
35497 ix86_init_platform_type_builtins ();
35499 /* TFmode support builtins. */
35500 def_builtin_const (0, "__builtin_infq",
35501 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
35502 def_builtin_const (0, "__builtin_huge_valq",
35503 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
35505 /* We will expand them to normal call if SSE isn't available since
35506 they are used by libgcc. */
35507 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
35508 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
35509 BUILT_IN_MD, "__fabstf2", NULL_TREE);
35510 TREE_READONLY (t) = 1;
35511 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
35513 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
35514 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
35515 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
35516 TREE_READONLY (t) = 1;
35517 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
35519 ix86_init_tm_builtins ();
35520 ix86_init_mmx_sse_builtins ();
35521 ix86_init_mpx_builtins ();
35523 if (TARGET_LP64)
35524 ix86_init_builtins_va_builtins_abi ();
35526 #ifdef SUBTARGET_INIT_BUILTINS
35527 SUBTARGET_INIT_BUILTINS;
35528 #endif
35531 /* Return the ix86 builtin for CODE. */
35533 static tree
35534 ix86_builtin_decl (unsigned code, bool)
35536 if (code >= IX86_BUILTIN_MAX)
35537 return error_mark_node;
35539 return ix86_builtins[code];
35542 /* Errors in the source file can cause expand_expr to return const0_rtx
35543 where we expect a vector. To avoid crashing, use one of the vector
35544 clear instructions. */
35545 static rtx
35546 safe_vector_operand (rtx x, machine_mode mode)
35548 if (x == const0_rtx)
35549 x = CONST0_RTX (mode);
35550 return x;
35553 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
35555 static rtx
35556 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
35558 rtx pat;
35559 tree arg0 = CALL_EXPR_ARG (exp, 0);
35560 tree arg1 = CALL_EXPR_ARG (exp, 1);
35561 rtx op0 = expand_normal (arg0);
35562 rtx op1 = expand_normal (arg1);
35563 machine_mode tmode = insn_data[icode].operand[0].mode;
35564 machine_mode mode0 = insn_data[icode].operand[1].mode;
35565 machine_mode mode1 = insn_data[icode].operand[2].mode;
35567 if (VECTOR_MODE_P (mode0))
35568 op0 = safe_vector_operand (op0, mode0);
35569 if (VECTOR_MODE_P (mode1))
35570 op1 = safe_vector_operand (op1, mode1);
35572 if (optimize || !target
35573 || GET_MODE (target) != tmode
35574 || !insn_data[icode].operand[0].predicate (target, tmode))
35575 target = gen_reg_rtx (tmode);
35577 if (GET_MODE (op1) == SImode && mode1 == TImode)
35579 rtx x = gen_reg_rtx (V4SImode);
35580 emit_insn (gen_sse2_loadd (x, op1));
35581 op1 = gen_lowpart (TImode, x);
35584 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35585 op0 = copy_to_mode_reg (mode0, op0);
35586 if (!insn_data[icode].operand[2].predicate (op1, mode1))
35587 op1 = copy_to_mode_reg (mode1, op1);
35589 pat = GEN_FCN (icode) (target, op0, op1);
35590 if (! pat)
35591 return 0;
35593 emit_insn (pat);
35595 return target;
35598 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
35600 static rtx
35601 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
35602 enum ix86_builtin_func_type m_type,
35603 enum rtx_code sub_code)
35605 rtx pat;
35606 int i;
35607 int nargs;
35608 bool comparison_p = false;
35609 bool tf_p = false;
35610 bool last_arg_constant = false;
35611 int num_memory = 0;
35612 struct {
35613 rtx op;
35614 machine_mode mode;
35615 } args[4];
35617 machine_mode tmode = insn_data[icode].operand[0].mode;
35619 switch (m_type)
35621 case MULTI_ARG_4_DF2_DI_I:
35622 case MULTI_ARG_4_DF2_DI_I1:
35623 case MULTI_ARG_4_SF2_SI_I:
35624 case MULTI_ARG_4_SF2_SI_I1:
35625 nargs = 4;
35626 last_arg_constant = true;
35627 break;
35629 case MULTI_ARG_3_SF:
35630 case MULTI_ARG_3_DF:
35631 case MULTI_ARG_3_SF2:
35632 case MULTI_ARG_3_DF2:
35633 case MULTI_ARG_3_DI:
35634 case MULTI_ARG_3_SI:
35635 case MULTI_ARG_3_SI_DI:
35636 case MULTI_ARG_3_HI:
35637 case MULTI_ARG_3_HI_SI:
35638 case MULTI_ARG_3_QI:
35639 case MULTI_ARG_3_DI2:
35640 case MULTI_ARG_3_SI2:
35641 case MULTI_ARG_3_HI2:
35642 case MULTI_ARG_3_QI2:
35643 nargs = 3;
35644 break;
35646 case MULTI_ARG_2_SF:
35647 case MULTI_ARG_2_DF:
35648 case MULTI_ARG_2_DI:
35649 case MULTI_ARG_2_SI:
35650 case MULTI_ARG_2_HI:
35651 case MULTI_ARG_2_QI:
35652 nargs = 2;
35653 break;
35655 case MULTI_ARG_2_DI_IMM:
35656 case MULTI_ARG_2_SI_IMM:
35657 case MULTI_ARG_2_HI_IMM:
35658 case MULTI_ARG_2_QI_IMM:
35659 nargs = 2;
35660 last_arg_constant = true;
35661 break;
35663 case MULTI_ARG_1_SF:
35664 case MULTI_ARG_1_DF:
35665 case MULTI_ARG_1_SF2:
35666 case MULTI_ARG_1_DF2:
35667 case MULTI_ARG_1_DI:
35668 case MULTI_ARG_1_SI:
35669 case MULTI_ARG_1_HI:
35670 case MULTI_ARG_1_QI:
35671 case MULTI_ARG_1_SI_DI:
35672 case MULTI_ARG_1_HI_DI:
35673 case MULTI_ARG_1_HI_SI:
35674 case MULTI_ARG_1_QI_DI:
35675 case MULTI_ARG_1_QI_SI:
35676 case MULTI_ARG_1_QI_HI:
35677 nargs = 1;
35678 break;
35680 case MULTI_ARG_2_DI_CMP:
35681 case MULTI_ARG_2_SI_CMP:
35682 case MULTI_ARG_2_HI_CMP:
35683 case MULTI_ARG_2_QI_CMP:
35684 nargs = 2;
35685 comparison_p = true;
35686 break;
35688 case MULTI_ARG_2_SF_TF:
35689 case MULTI_ARG_2_DF_TF:
35690 case MULTI_ARG_2_DI_TF:
35691 case MULTI_ARG_2_SI_TF:
35692 case MULTI_ARG_2_HI_TF:
35693 case MULTI_ARG_2_QI_TF:
35694 nargs = 2;
35695 tf_p = true;
35696 break;
35698 default:
35699 gcc_unreachable ();
35702 if (optimize || !target
35703 || GET_MODE (target) != tmode
35704 || !insn_data[icode].operand[0].predicate (target, tmode))
35705 target = gen_reg_rtx (tmode);
35707 gcc_assert (nargs <= 4);
35709 for (i = 0; i < nargs; i++)
35711 tree arg = CALL_EXPR_ARG (exp, i);
35712 rtx op = expand_normal (arg);
35713 int adjust = (comparison_p) ? 1 : 0;
35714 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
35716 if (last_arg_constant && i == nargs - 1)
35718 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
35720 enum insn_code new_icode = icode;
35721 switch (icode)
35723 case CODE_FOR_xop_vpermil2v2df3:
35724 case CODE_FOR_xop_vpermil2v4sf3:
35725 case CODE_FOR_xop_vpermil2v4df3:
35726 case CODE_FOR_xop_vpermil2v8sf3:
35727 error ("the last argument must be a 2-bit immediate");
35728 return gen_reg_rtx (tmode);
35729 case CODE_FOR_xop_rotlv2di3:
35730 new_icode = CODE_FOR_rotlv2di3;
35731 goto xop_rotl;
35732 case CODE_FOR_xop_rotlv4si3:
35733 new_icode = CODE_FOR_rotlv4si3;
35734 goto xop_rotl;
35735 case CODE_FOR_xop_rotlv8hi3:
35736 new_icode = CODE_FOR_rotlv8hi3;
35737 goto xop_rotl;
35738 case CODE_FOR_xop_rotlv16qi3:
35739 new_icode = CODE_FOR_rotlv16qi3;
35740 xop_rotl:
35741 if (CONST_INT_P (op))
35743 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
35744 op = GEN_INT (INTVAL (op) & mask);
35745 gcc_checking_assert
35746 (insn_data[icode].operand[i + 1].predicate (op, mode));
35748 else
35750 gcc_checking_assert
35751 (nargs == 2
35752 && insn_data[new_icode].operand[0].mode == tmode
35753 && insn_data[new_icode].operand[1].mode == tmode
35754 && insn_data[new_icode].operand[2].mode == mode
35755 && insn_data[new_icode].operand[0].predicate
35756 == insn_data[icode].operand[0].predicate
35757 && insn_data[new_icode].operand[1].predicate
35758 == insn_data[icode].operand[1].predicate);
35759 icode = new_icode;
35760 goto non_constant;
35762 break;
35763 default:
35764 gcc_unreachable ();
35768 else
35770 non_constant:
35771 if (VECTOR_MODE_P (mode))
35772 op = safe_vector_operand (op, mode);
35774 /* If we aren't optimizing, only allow one memory operand to be
35775 generated. */
35776 if (memory_operand (op, mode))
35777 num_memory++;
35779 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
35781 if (optimize
35782 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
35783 || num_memory > 1)
35784 op = force_reg (mode, op);
35787 args[i].op = op;
35788 args[i].mode = mode;
35791 switch (nargs)
35793 case 1:
35794 pat = GEN_FCN (icode) (target, args[0].op);
35795 break;
35797 case 2:
35798 if (tf_p)
35799 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
35800 GEN_INT ((int)sub_code));
35801 else if (! comparison_p)
35802 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
35803 else
35805 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
35806 args[0].op,
35807 args[1].op);
35809 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
35811 break;
35813 case 3:
35814 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
35815 break;
35817 case 4:
35818 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
35819 break;
35821 default:
35822 gcc_unreachable ();
35825 if (! pat)
35826 return 0;
35828 emit_insn (pat);
35829 return target;
35832 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
35833 insns with vec_merge. */
35835 static rtx
35836 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
35837 rtx target)
35839 rtx pat;
35840 tree arg0 = CALL_EXPR_ARG (exp, 0);
35841 rtx op1, op0 = expand_normal (arg0);
35842 machine_mode tmode = insn_data[icode].operand[0].mode;
35843 machine_mode mode0 = insn_data[icode].operand[1].mode;
35845 if (optimize || !target
35846 || GET_MODE (target) != tmode
35847 || !insn_data[icode].operand[0].predicate (target, tmode))
35848 target = gen_reg_rtx (tmode);
35850 if (VECTOR_MODE_P (mode0))
35851 op0 = safe_vector_operand (op0, mode0);
35853 if ((optimize && !register_operand (op0, mode0))
35854 || !insn_data[icode].operand[1].predicate (op0, mode0))
35855 op0 = copy_to_mode_reg (mode0, op0);
35857 op1 = op0;
35858 if (!insn_data[icode].operand[2].predicate (op1, mode0))
35859 op1 = copy_to_mode_reg (mode0, op1);
35861 pat = GEN_FCN (icode) (target, op0, op1);
35862 if (! pat)
35863 return 0;
35864 emit_insn (pat);
35865 return target;
35868 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
35870 static rtx
35871 ix86_expand_sse_compare (const struct builtin_description *d,
35872 tree exp, rtx target, bool swap)
35874 rtx pat;
35875 tree arg0 = CALL_EXPR_ARG (exp, 0);
35876 tree arg1 = CALL_EXPR_ARG (exp, 1);
35877 rtx op0 = expand_normal (arg0);
35878 rtx op1 = expand_normal (arg1);
35879 rtx op2;
35880 machine_mode tmode = insn_data[d->icode].operand[0].mode;
35881 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
35882 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
35883 enum rtx_code comparison = d->comparison;
35885 if (VECTOR_MODE_P (mode0))
35886 op0 = safe_vector_operand (op0, mode0);
35887 if (VECTOR_MODE_P (mode1))
35888 op1 = safe_vector_operand (op1, mode1);
35890 /* Swap operands if we have a comparison that isn't available in
35891 hardware. */
35892 if (swap)
35894 rtx tmp = gen_reg_rtx (mode1);
35895 emit_move_insn (tmp, op1);
35896 op1 = op0;
35897 op0 = tmp;
35900 if (optimize || !target
35901 || GET_MODE (target) != tmode
35902 || !insn_data[d->icode].operand[0].predicate (target, tmode))
35903 target = gen_reg_rtx (tmode);
35905 if ((optimize && !register_operand (op0, mode0))
35906 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
35907 op0 = copy_to_mode_reg (mode0, op0);
35908 if ((optimize && !register_operand (op1, mode1))
35909 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
35910 op1 = copy_to_mode_reg (mode1, op1);
35912 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
35913 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
35914 if (! pat)
35915 return 0;
35916 emit_insn (pat);
35917 return target;
35920 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
35922 static rtx
35923 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
35924 rtx target)
35926 rtx pat;
35927 tree arg0 = CALL_EXPR_ARG (exp, 0);
35928 tree arg1 = CALL_EXPR_ARG (exp, 1);
35929 rtx op0 = expand_normal (arg0);
35930 rtx op1 = expand_normal (arg1);
35931 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
35932 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
35933 enum rtx_code comparison = d->comparison;
35935 if (VECTOR_MODE_P (mode0))
35936 op0 = safe_vector_operand (op0, mode0);
35937 if (VECTOR_MODE_P (mode1))
35938 op1 = safe_vector_operand (op1, mode1);
35940 /* Swap operands if we have a comparison that isn't available in
35941 hardware. */
35942 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
35943 std::swap (op1, op0);
35945 target = gen_reg_rtx (SImode);
35946 emit_move_insn (target, const0_rtx);
35947 target = gen_rtx_SUBREG (QImode, target, 0);
35949 if ((optimize && !register_operand (op0, mode0))
35950 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
35951 op0 = copy_to_mode_reg (mode0, op0);
35952 if ((optimize && !register_operand (op1, mode1))
35953 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
35954 op1 = copy_to_mode_reg (mode1, op1);
35956 pat = GEN_FCN (d->icode) (op0, op1);
35957 if (! pat)
35958 return 0;
35959 emit_insn (pat);
35960 emit_insn (gen_rtx_SET (VOIDmode,
35961 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
35962 gen_rtx_fmt_ee (comparison, QImode,
35963 SET_DEST (pat),
35964 const0_rtx)));
35966 return SUBREG_REG (target);
35969 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
35971 static rtx
35972 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
35973 rtx target)
35975 rtx pat;
35976 tree arg0 = CALL_EXPR_ARG (exp, 0);
35977 rtx op1, op0 = expand_normal (arg0);
35978 machine_mode tmode = insn_data[d->icode].operand[0].mode;
35979 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
35981 if (optimize || target == 0
35982 || GET_MODE (target) != tmode
35983 || !insn_data[d->icode].operand[0].predicate (target, tmode))
35984 target = gen_reg_rtx (tmode);
35986 if (VECTOR_MODE_P (mode0))
35987 op0 = safe_vector_operand (op0, mode0);
35989 if ((optimize && !register_operand (op0, mode0))
35990 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
35991 op0 = copy_to_mode_reg (mode0, op0);
35993 op1 = GEN_INT (d->comparison);
35995 pat = GEN_FCN (d->icode) (target, op0, op1);
35996 if (! pat)
35997 return 0;
35998 emit_insn (pat);
35999 return target;
36002 static rtx
36003 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
36004 tree exp, rtx target)
36006 rtx pat;
36007 tree arg0 = CALL_EXPR_ARG (exp, 0);
36008 tree arg1 = CALL_EXPR_ARG (exp, 1);
36009 rtx op0 = expand_normal (arg0);
36010 rtx op1 = expand_normal (arg1);
36011 rtx op2;
36012 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36013 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36014 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36016 if (optimize || target == 0
36017 || GET_MODE (target) != tmode
36018 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36019 target = gen_reg_rtx (tmode);
36021 op0 = safe_vector_operand (op0, mode0);
36022 op1 = safe_vector_operand (op1, mode1);
36024 if ((optimize && !register_operand (op0, mode0))
36025 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36026 op0 = copy_to_mode_reg (mode0, op0);
36027 if ((optimize && !register_operand (op1, mode1))
36028 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36029 op1 = copy_to_mode_reg (mode1, op1);
36031 op2 = GEN_INT (d->comparison);
36033 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36034 if (! pat)
36035 return 0;
36036 emit_insn (pat);
36037 return target;
36040 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
36042 static rtx
36043 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
36044 rtx target)
36046 rtx pat;
36047 tree arg0 = CALL_EXPR_ARG (exp, 0);
36048 tree arg1 = CALL_EXPR_ARG (exp, 1);
36049 rtx op0 = expand_normal (arg0);
36050 rtx op1 = expand_normal (arg1);
36051 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36052 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36053 enum rtx_code comparison = d->comparison;
36055 if (VECTOR_MODE_P (mode0))
36056 op0 = safe_vector_operand (op0, mode0);
36057 if (VECTOR_MODE_P (mode1))
36058 op1 = safe_vector_operand (op1, mode1);
36060 target = gen_reg_rtx (SImode);
36061 emit_move_insn (target, const0_rtx);
36062 target = gen_rtx_SUBREG (QImode, target, 0);
36064 if ((optimize && !register_operand (op0, mode0))
36065 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36066 op0 = copy_to_mode_reg (mode0, op0);
36067 if ((optimize && !register_operand (op1, mode1))
36068 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36069 op1 = copy_to_mode_reg (mode1, op1);
36071 pat = GEN_FCN (d->icode) (op0, op1);
36072 if (! pat)
36073 return 0;
36074 emit_insn (pat);
36075 emit_insn (gen_rtx_SET (VOIDmode,
36076 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36077 gen_rtx_fmt_ee (comparison, QImode,
36078 SET_DEST (pat),
36079 const0_rtx)));
36081 return SUBREG_REG (target);
36084 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
36086 static rtx
36087 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
36088 tree exp, rtx target)
36090 rtx pat;
36091 tree arg0 = CALL_EXPR_ARG (exp, 0);
36092 tree arg1 = CALL_EXPR_ARG (exp, 1);
36093 tree arg2 = CALL_EXPR_ARG (exp, 2);
36094 tree arg3 = CALL_EXPR_ARG (exp, 3);
36095 tree arg4 = CALL_EXPR_ARG (exp, 4);
36096 rtx scratch0, scratch1;
36097 rtx op0 = expand_normal (arg0);
36098 rtx op1 = expand_normal (arg1);
36099 rtx op2 = expand_normal (arg2);
36100 rtx op3 = expand_normal (arg3);
36101 rtx op4 = expand_normal (arg4);
36102 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
36104 tmode0 = insn_data[d->icode].operand[0].mode;
36105 tmode1 = insn_data[d->icode].operand[1].mode;
36106 modev2 = insn_data[d->icode].operand[2].mode;
36107 modei3 = insn_data[d->icode].operand[3].mode;
36108 modev4 = insn_data[d->icode].operand[4].mode;
36109 modei5 = insn_data[d->icode].operand[5].mode;
36110 modeimm = insn_data[d->icode].operand[6].mode;
36112 if (VECTOR_MODE_P (modev2))
36113 op0 = safe_vector_operand (op0, modev2);
36114 if (VECTOR_MODE_P (modev4))
36115 op2 = safe_vector_operand (op2, modev4);
36117 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36118 op0 = copy_to_mode_reg (modev2, op0);
36119 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
36120 op1 = copy_to_mode_reg (modei3, op1);
36121 if ((optimize && !register_operand (op2, modev4))
36122 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
36123 op2 = copy_to_mode_reg (modev4, op2);
36124 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
36125 op3 = copy_to_mode_reg (modei5, op3);
36127 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
36129 error ("the fifth argument must be an 8-bit immediate");
36130 return const0_rtx;
36133 if (d->code == IX86_BUILTIN_PCMPESTRI128)
36135 if (optimize || !target
36136 || GET_MODE (target) != tmode0
36137 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36138 target = gen_reg_rtx (tmode0);
36140 scratch1 = gen_reg_rtx (tmode1);
36142 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
36144 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
36146 if (optimize || !target
36147 || GET_MODE (target) != tmode1
36148 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36149 target = gen_reg_rtx (tmode1);
36151 scratch0 = gen_reg_rtx (tmode0);
36153 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
36155 else
36157 gcc_assert (d->flag);
36159 scratch0 = gen_reg_rtx (tmode0);
36160 scratch1 = gen_reg_rtx (tmode1);
36162 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
36165 if (! pat)
36166 return 0;
36168 emit_insn (pat);
36170 if (d->flag)
36172 target = gen_reg_rtx (SImode);
36173 emit_move_insn (target, const0_rtx);
36174 target = gen_rtx_SUBREG (QImode, target, 0);
36176 emit_insn
36177 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36178 gen_rtx_fmt_ee (EQ, QImode,
36179 gen_rtx_REG ((machine_mode) d->flag,
36180 FLAGS_REG),
36181 const0_rtx)));
36182 return SUBREG_REG (target);
36184 else
36185 return target;
36189 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
36191 static rtx
36192 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
36193 tree exp, rtx target)
36195 rtx pat;
36196 tree arg0 = CALL_EXPR_ARG (exp, 0);
36197 tree arg1 = CALL_EXPR_ARG (exp, 1);
36198 tree arg2 = CALL_EXPR_ARG (exp, 2);
36199 rtx scratch0, scratch1;
36200 rtx op0 = expand_normal (arg0);
36201 rtx op1 = expand_normal (arg1);
36202 rtx op2 = expand_normal (arg2);
36203 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
36205 tmode0 = insn_data[d->icode].operand[0].mode;
36206 tmode1 = insn_data[d->icode].operand[1].mode;
36207 modev2 = insn_data[d->icode].operand[2].mode;
36208 modev3 = insn_data[d->icode].operand[3].mode;
36209 modeimm = insn_data[d->icode].operand[4].mode;
36211 if (VECTOR_MODE_P (modev2))
36212 op0 = safe_vector_operand (op0, modev2);
36213 if (VECTOR_MODE_P (modev3))
36214 op1 = safe_vector_operand (op1, modev3);
36216 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36217 op0 = copy_to_mode_reg (modev2, op0);
36218 if ((optimize && !register_operand (op1, modev3))
36219 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
36220 op1 = copy_to_mode_reg (modev3, op1);
36222 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
36224 error ("the third argument must be an 8-bit immediate");
36225 return const0_rtx;
36228 if (d->code == IX86_BUILTIN_PCMPISTRI128)
36230 if (optimize || !target
36231 || GET_MODE (target) != tmode0
36232 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36233 target = gen_reg_rtx (tmode0);
36235 scratch1 = gen_reg_rtx (tmode1);
36237 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
36239 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
36241 if (optimize || !target
36242 || GET_MODE (target) != tmode1
36243 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36244 target = gen_reg_rtx (tmode1);
36246 scratch0 = gen_reg_rtx (tmode0);
36248 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
36250 else
36252 gcc_assert (d->flag);
36254 scratch0 = gen_reg_rtx (tmode0);
36255 scratch1 = gen_reg_rtx (tmode1);
36257 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
36260 if (! pat)
36261 return 0;
36263 emit_insn (pat);
36265 if (d->flag)
36267 target = gen_reg_rtx (SImode);
36268 emit_move_insn (target, const0_rtx);
36269 target = gen_rtx_SUBREG (QImode, target, 0);
36271 emit_insn
36272 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36273 gen_rtx_fmt_ee (EQ, QImode,
36274 gen_rtx_REG ((machine_mode) d->flag,
36275 FLAGS_REG),
36276 const0_rtx)));
36277 return SUBREG_REG (target);
36279 else
36280 return target;
36283 /* Subroutine of ix86_expand_builtin to take care of insns with
36284 variable number of operands. */
36286 static rtx
36287 ix86_expand_args_builtin (const struct builtin_description *d,
36288 tree exp, rtx target)
36290 rtx pat, real_target;
36291 unsigned int i, nargs;
36292 unsigned int nargs_constant = 0;
36293 unsigned int mask_pos = 0;
36294 int num_memory = 0;
36295 struct
36297 rtx op;
36298 machine_mode mode;
36299 } args[6];
36300 bool last_arg_count = false;
36301 enum insn_code icode = d->icode;
36302 const struct insn_data_d *insn_p = &insn_data[icode];
36303 machine_mode tmode = insn_p->operand[0].mode;
36304 machine_mode rmode = VOIDmode;
36305 bool swap = false;
36306 enum rtx_code comparison = d->comparison;
36308 switch ((enum ix86_builtin_func_type) d->flag)
36310 case V2DF_FTYPE_V2DF_ROUND:
36311 case V4DF_FTYPE_V4DF_ROUND:
36312 case V4SF_FTYPE_V4SF_ROUND:
36313 case V8SF_FTYPE_V8SF_ROUND:
36314 case V4SI_FTYPE_V4SF_ROUND:
36315 case V8SI_FTYPE_V8SF_ROUND:
36316 return ix86_expand_sse_round (d, exp, target);
36317 case V4SI_FTYPE_V2DF_V2DF_ROUND:
36318 case V8SI_FTYPE_V4DF_V4DF_ROUND:
36319 case V16SI_FTYPE_V8DF_V8DF_ROUND:
36320 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
36321 case INT_FTYPE_V8SF_V8SF_PTEST:
36322 case INT_FTYPE_V4DI_V4DI_PTEST:
36323 case INT_FTYPE_V4DF_V4DF_PTEST:
36324 case INT_FTYPE_V4SF_V4SF_PTEST:
36325 case INT_FTYPE_V2DI_V2DI_PTEST:
36326 case INT_FTYPE_V2DF_V2DF_PTEST:
36327 return ix86_expand_sse_ptest (d, exp, target);
36328 case FLOAT128_FTYPE_FLOAT128:
36329 case FLOAT_FTYPE_FLOAT:
36330 case INT_FTYPE_INT:
36331 case UINT64_FTYPE_INT:
36332 case UINT16_FTYPE_UINT16:
36333 case INT64_FTYPE_INT64:
36334 case INT64_FTYPE_V4SF:
36335 case INT64_FTYPE_V2DF:
36336 case INT_FTYPE_V16QI:
36337 case INT_FTYPE_V8QI:
36338 case INT_FTYPE_V8SF:
36339 case INT_FTYPE_V4DF:
36340 case INT_FTYPE_V4SF:
36341 case INT_FTYPE_V2DF:
36342 case INT_FTYPE_V32QI:
36343 case V16QI_FTYPE_V16QI:
36344 case V8SI_FTYPE_V8SF:
36345 case V8SI_FTYPE_V4SI:
36346 case V8HI_FTYPE_V8HI:
36347 case V8HI_FTYPE_V16QI:
36348 case V8QI_FTYPE_V8QI:
36349 case V8SF_FTYPE_V8SF:
36350 case V8SF_FTYPE_V8SI:
36351 case V8SF_FTYPE_V4SF:
36352 case V8SF_FTYPE_V8HI:
36353 case V4SI_FTYPE_V4SI:
36354 case V4SI_FTYPE_V16QI:
36355 case V4SI_FTYPE_V4SF:
36356 case V4SI_FTYPE_V8SI:
36357 case V4SI_FTYPE_V8HI:
36358 case V4SI_FTYPE_V4DF:
36359 case V4SI_FTYPE_V2DF:
36360 case V4HI_FTYPE_V4HI:
36361 case V4DF_FTYPE_V4DF:
36362 case V4DF_FTYPE_V4SI:
36363 case V4DF_FTYPE_V4SF:
36364 case V4DF_FTYPE_V2DF:
36365 case V4SF_FTYPE_V4SF:
36366 case V4SF_FTYPE_V4SI:
36367 case V4SF_FTYPE_V8SF:
36368 case V4SF_FTYPE_V4DF:
36369 case V4SF_FTYPE_V8HI:
36370 case V4SF_FTYPE_V2DF:
36371 case V2DI_FTYPE_V2DI:
36372 case V2DI_FTYPE_V16QI:
36373 case V2DI_FTYPE_V8HI:
36374 case V2DI_FTYPE_V4SI:
36375 case V2DF_FTYPE_V2DF:
36376 case V2DF_FTYPE_V4SI:
36377 case V2DF_FTYPE_V4DF:
36378 case V2DF_FTYPE_V4SF:
36379 case V2DF_FTYPE_V2SI:
36380 case V2SI_FTYPE_V2SI:
36381 case V2SI_FTYPE_V4SF:
36382 case V2SI_FTYPE_V2SF:
36383 case V2SI_FTYPE_V2DF:
36384 case V2SF_FTYPE_V2SF:
36385 case V2SF_FTYPE_V2SI:
36386 case V32QI_FTYPE_V32QI:
36387 case V32QI_FTYPE_V16QI:
36388 case V16HI_FTYPE_V16HI:
36389 case V16HI_FTYPE_V8HI:
36390 case V8SI_FTYPE_V8SI:
36391 case V16HI_FTYPE_V16QI:
36392 case V8SI_FTYPE_V16QI:
36393 case V4DI_FTYPE_V16QI:
36394 case V8SI_FTYPE_V8HI:
36395 case V4DI_FTYPE_V8HI:
36396 case V4DI_FTYPE_V4SI:
36397 case V4DI_FTYPE_V2DI:
36398 case HI_FTYPE_HI:
36399 case HI_FTYPE_V16QI:
36400 case SI_FTYPE_V32QI:
36401 case DI_FTYPE_V64QI:
36402 case V16QI_FTYPE_HI:
36403 case V32QI_FTYPE_SI:
36404 case V64QI_FTYPE_DI:
36405 case V8HI_FTYPE_QI:
36406 case V16HI_FTYPE_HI:
36407 case V32HI_FTYPE_SI:
36408 case V4SI_FTYPE_QI:
36409 case V8SI_FTYPE_QI:
36410 case V4SI_FTYPE_HI:
36411 case V8SI_FTYPE_HI:
36412 case QI_FTYPE_V8HI:
36413 case HI_FTYPE_V16HI:
36414 case SI_FTYPE_V32HI:
36415 case QI_FTYPE_V4SI:
36416 case QI_FTYPE_V8SI:
36417 case HI_FTYPE_V16SI:
36418 case QI_FTYPE_V2DI:
36419 case QI_FTYPE_V4DI:
36420 case QI_FTYPE_V8DI:
36421 case UINT_FTYPE_V2DF:
36422 case UINT_FTYPE_V4SF:
36423 case UINT64_FTYPE_V2DF:
36424 case UINT64_FTYPE_V4SF:
36425 case V16QI_FTYPE_V8DI:
36426 case V16HI_FTYPE_V16SI:
36427 case V16SI_FTYPE_HI:
36428 case V2DI_FTYPE_QI:
36429 case V4DI_FTYPE_QI:
36430 case V16SI_FTYPE_V16SI:
36431 case V16SI_FTYPE_INT:
36432 case V16SF_FTYPE_FLOAT:
36433 case V16SF_FTYPE_V8SF:
36434 case V16SI_FTYPE_V8SI:
36435 case V16SF_FTYPE_V4SF:
36436 case V16SI_FTYPE_V4SI:
36437 case V16SF_FTYPE_V16SF:
36438 case V8HI_FTYPE_V8DI:
36439 case V8UHI_FTYPE_V8UHI:
36440 case V8SI_FTYPE_V8DI:
36441 case V8SF_FTYPE_V8DF:
36442 case V8DI_FTYPE_QI:
36443 case V8DI_FTYPE_INT64:
36444 case V8DI_FTYPE_V4DI:
36445 case V8DI_FTYPE_V8DI:
36446 case V8DF_FTYPE_DOUBLE:
36447 case V8DF_FTYPE_V4DF:
36448 case V8DF_FTYPE_V2DF:
36449 case V8DF_FTYPE_V8DF:
36450 case V8DF_FTYPE_V8SI:
36451 nargs = 1;
36452 break;
36453 case V4SF_FTYPE_V4SF_VEC_MERGE:
36454 case V2DF_FTYPE_V2DF_VEC_MERGE:
36455 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
36456 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
36457 case V16QI_FTYPE_V16QI_V16QI:
36458 case V16QI_FTYPE_V8HI_V8HI:
36459 case V16SI_FTYPE_V16SI_V16SI:
36460 case V16SF_FTYPE_V16SF_V16SF:
36461 case V16SF_FTYPE_V16SF_V16SI:
36462 case V8QI_FTYPE_V8QI_V8QI:
36463 case V8QI_FTYPE_V4HI_V4HI:
36464 case V8HI_FTYPE_V8HI_V8HI:
36465 case V8HI_FTYPE_V16QI_V16QI:
36466 case V8HI_FTYPE_V4SI_V4SI:
36467 case V8SF_FTYPE_V8SF_V8SF:
36468 case V8SF_FTYPE_V8SF_V8SI:
36469 case V8DI_FTYPE_V8DI_V8DI:
36470 case V8DF_FTYPE_V8DF_V8DF:
36471 case V8DF_FTYPE_V8DF_V8DI:
36472 case V4SI_FTYPE_V4SI_V4SI:
36473 case V4SI_FTYPE_V8HI_V8HI:
36474 case V4SI_FTYPE_V4SF_V4SF:
36475 case V4SI_FTYPE_V2DF_V2DF:
36476 case V4HI_FTYPE_V4HI_V4HI:
36477 case V4HI_FTYPE_V8QI_V8QI:
36478 case V4HI_FTYPE_V2SI_V2SI:
36479 case V4DF_FTYPE_V4DF_V4DF:
36480 case V4DF_FTYPE_V4DF_V4DI:
36481 case V4SF_FTYPE_V4SF_V4SF:
36482 case V4SF_FTYPE_V4SF_V4SI:
36483 case V4SF_FTYPE_V4SF_V2SI:
36484 case V4SF_FTYPE_V4SF_V2DF:
36485 case V4SF_FTYPE_V4SF_UINT:
36486 case V4SF_FTYPE_V4SF_UINT64:
36487 case V4SF_FTYPE_V4SF_DI:
36488 case V4SF_FTYPE_V4SF_SI:
36489 case V2DI_FTYPE_V2DI_V2DI:
36490 case V2DI_FTYPE_V16QI_V16QI:
36491 case V2DI_FTYPE_V4SI_V4SI:
36492 case V2UDI_FTYPE_V4USI_V4USI:
36493 case V2DI_FTYPE_V2DI_V16QI:
36494 case V2DI_FTYPE_V2DF_V2DF:
36495 case V2SI_FTYPE_V2SI_V2SI:
36496 case V2SI_FTYPE_V4HI_V4HI:
36497 case V2SI_FTYPE_V2SF_V2SF:
36498 case V2DF_FTYPE_V2DF_V2DF:
36499 case V2DF_FTYPE_V2DF_V4SF:
36500 case V2DF_FTYPE_V2DF_V2DI:
36501 case V2DF_FTYPE_V2DF_DI:
36502 case V2DF_FTYPE_V2DF_SI:
36503 case V2DF_FTYPE_V2DF_UINT:
36504 case V2DF_FTYPE_V2DF_UINT64:
36505 case V2SF_FTYPE_V2SF_V2SF:
36506 case V1DI_FTYPE_V1DI_V1DI:
36507 case V1DI_FTYPE_V8QI_V8QI:
36508 case V1DI_FTYPE_V2SI_V2SI:
36509 case V32QI_FTYPE_V16HI_V16HI:
36510 case V16HI_FTYPE_V8SI_V8SI:
36511 case V32QI_FTYPE_V32QI_V32QI:
36512 case V16HI_FTYPE_V32QI_V32QI:
36513 case V16HI_FTYPE_V16HI_V16HI:
36514 case V8SI_FTYPE_V4DF_V4DF:
36515 case V8SI_FTYPE_V8SI_V8SI:
36516 case V8SI_FTYPE_V16HI_V16HI:
36517 case V4DI_FTYPE_V4DI_V4DI:
36518 case V4DI_FTYPE_V8SI_V8SI:
36519 case V4UDI_FTYPE_V8USI_V8USI:
36520 case QI_FTYPE_V8DI_V8DI:
36521 case V8DI_FTYPE_V64QI_V64QI:
36522 case HI_FTYPE_V16SI_V16SI:
36523 if (comparison == UNKNOWN)
36524 return ix86_expand_binop_builtin (icode, exp, target);
36525 nargs = 2;
36526 break;
36527 case V4SF_FTYPE_V4SF_V4SF_SWAP:
36528 case V2DF_FTYPE_V2DF_V2DF_SWAP:
36529 gcc_assert (comparison != UNKNOWN);
36530 nargs = 2;
36531 swap = true;
36532 break;
36533 case V16HI_FTYPE_V16HI_V8HI_COUNT:
36534 case V16HI_FTYPE_V16HI_SI_COUNT:
36535 case V8SI_FTYPE_V8SI_V4SI_COUNT:
36536 case V8SI_FTYPE_V8SI_SI_COUNT:
36537 case V4DI_FTYPE_V4DI_V2DI_COUNT:
36538 case V4DI_FTYPE_V4DI_INT_COUNT:
36539 case V8HI_FTYPE_V8HI_V8HI_COUNT:
36540 case V8HI_FTYPE_V8HI_SI_COUNT:
36541 case V4SI_FTYPE_V4SI_V4SI_COUNT:
36542 case V4SI_FTYPE_V4SI_SI_COUNT:
36543 case V4HI_FTYPE_V4HI_V4HI_COUNT:
36544 case V4HI_FTYPE_V4HI_SI_COUNT:
36545 case V2DI_FTYPE_V2DI_V2DI_COUNT:
36546 case V2DI_FTYPE_V2DI_SI_COUNT:
36547 case V2SI_FTYPE_V2SI_V2SI_COUNT:
36548 case V2SI_FTYPE_V2SI_SI_COUNT:
36549 case V1DI_FTYPE_V1DI_V1DI_COUNT:
36550 case V1DI_FTYPE_V1DI_SI_COUNT:
36551 nargs = 2;
36552 last_arg_count = true;
36553 break;
36554 case UINT64_FTYPE_UINT64_UINT64:
36555 case UINT_FTYPE_UINT_UINT:
36556 case UINT_FTYPE_UINT_USHORT:
36557 case UINT_FTYPE_UINT_UCHAR:
36558 case UINT16_FTYPE_UINT16_INT:
36559 case UINT8_FTYPE_UINT8_INT:
36560 case HI_FTYPE_HI_HI:
36561 case SI_FTYPE_SI_SI:
36562 case DI_FTYPE_DI_DI:
36563 case V16SI_FTYPE_V8DF_V8DF:
36564 nargs = 2;
36565 break;
36566 case V2DI_FTYPE_V2DI_INT_CONVERT:
36567 nargs = 2;
36568 rmode = V1TImode;
36569 nargs_constant = 1;
36570 break;
36571 case V4DI_FTYPE_V4DI_INT_CONVERT:
36572 nargs = 2;
36573 rmode = V2TImode;
36574 nargs_constant = 1;
36575 break;
36576 case V8DI_FTYPE_V8DI_INT_CONVERT:
36577 nargs = 2;
36578 rmode = V4TImode;
36579 nargs_constant = 1;
36580 break;
36581 case V8HI_FTYPE_V8HI_INT:
36582 case V8HI_FTYPE_V8SF_INT:
36583 case V16HI_FTYPE_V16SF_INT:
36584 case V8HI_FTYPE_V4SF_INT:
36585 case V8SF_FTYPE_V8SF_INT:
36586 case V4SF_FTYPE_V16SF_INT:
36587 case V16SF_FTYPE_V16SF_INT:
36588 case V4SI_FTYPE_V4SI_INT:
36589 case V4SI_FTYPE_V8SI_INT:
36590 case V4HI_FTYPE_V4HI_INT:
36591 case V4DF_FTYPE_V4DF_INT:
36592 case V4DF_FTYPE_V8DF_INT:
36593 case V4SF_FTYPE_V4SF_INT:
36594 case V4SF_FTYPE_V8SF_INT:
36595 case V2DI_FTYPE_V2DI_INT:
36596 case V2DF_FTYPE_V2DF_INT:
36597 case V2DF_FTYPE_V4DF_INT:
36598 case V16HI_FTYPE_V16HI_INT:
36599 case V8SI_FTYPE_V8SI_INT:
36600 case V16SI_FTYPE_V16SI_INT:
36601 case V4SI_FTYPE_V16SI_INT:
36602 case V4DI_FTYPE_V4DI_INT:
36603 case V2DI_FTYPE_V4DI_INT:
36604 case V4DI_FTYPE_V8DI_INT:
36605 case HI_FTYPE_HI_INT:
36606 case QI_FTYPE_V4SF_INT:
36607 case QI_FTYPE_V2DF_INT:
36608 nargs = 2;
36609 nargs_constant = 1;
36610 break;
36611 case V16QI_FTYPE_V16QI_V16QI_V16QI:
36612 case V8SF_FTYPE_V8SF_V8SF_V8SF:
36613 case V4DF_FTYPE_V4DF_V4DF_V4DF:
36614 case V4SF_FTYPE_V4SF_V4SF_V4SF:
36615 case V2DF_FTYPE_V2DF_V2DF_V2DF:
36616 case V32QI_FTYPE_V32QI_V32QI_V32QI:
36617 case HI_FTYPE_V16SI_V16SI_HI:
36618 case QI_FTYPE_V8DI_V8DI_QI:
36619 case V16HI_FTYPE_V16SI_V16HI_HI:
36620 case V16QI_FTYPE_V16SI_V16QI_HI:
36621 case V16QI_FTYPE_V8DI_V16QI_QI:
36622 case V16SF_FTYPE_V16SF_V16SF_HI:
36623 case V16SF_FTYPE_V16SF_V16SF_V16SF:
36624 case V16SF_FTYPE_V16SF_V16SI_V16SF:
36625 case V16SF_FTYPE_V16SI_V16SF_HI:
36626 case V16SF_FTYPE_V16SI_V16SF_V16SF:
36627 case V16SF_FTYPE_V4SF_V16SF_HI:
36628 case V16SI_FTYPE_SI_V16SI_HI:
36629 case V16SI_FTYPE_V16HI_V16SI_HI:
36630 case V16SI_FTYPE_V16QI_V16SI_HI:
36631 case V16SI_FTYPE_V16SF_V16SI_HI:
36632 case V8SF_FTYPE_V4SF_V8SF_QI:
36633 case V4DF_FTYPE_V2DF_V4DF_QI:
36634 case V8SI_FTYPE_V4SI_V8SI_QI:
36635 case V8SI_FTYPE_SI_V8SI_QI:
36636 case V4SI_FTYPE_V4SI_V4SI_QI:
36637 case V4SI_FTYPE_SI_V4SI_QI:
36638 case V4DI_FTYPE_V2DI_V4DI_QI:
36639 case V4DI_FTYPE_DI_V4DI_QI:
36640 case V2DI_FTYPE_V2DI_V2DI_QI:
36641 case V2DI_FTYPE_DI_V2DI_QI:
36642 case V64QI_FTYPE_V64QI_V64QI_DI:
36643 case V64QI_FTYPE_V16QI_V64QI_DI:
36644 case V64QI_FTYPE_QI_V64QI_DI:
36645 case V32QI_FTYPE_V32QI_V32QI_SI:
36646 case V32QI_FTYPE_V16QI_V32QI_SI:
36647 case V32QI_FTYPE_QI_V32QI_SI:
36648 case V16QI_FTYPE_V16QI_V16QI_HI:
36649 case V16QI_FTYPE_QI_V16QI_HI:
36650 case V32HI_FTYPE_V8HI_V32HI_SI:
36651 case V32HI_FTYPE_HI_V32HI_SI:
36652 case V16HI_FTYPE_V8HI_V16HI_HI:
36653 case V16HI_FTYPE_HI_V16HI_HI:
36654 case V8HI_FTYPE_V8HI_V8HI_QI:
36655 case V8HI_FTYPE_HI_V8HI_QI:
36656 case V8SF_FTYPE_V8HI_V8SF_QI:
36657 case V4SF_FTYPE_V8HI_V4SF_QI:
36658 case V8SI_FTYPE_V8SF_V8SI_QI:
36659 case V4SI_FTYPE_V4SF_V4SI_QI:
36660 case V8DI_FTYPE_V8SF_V8DI_QI:
36661 case V4DI_FTYPE_V4SF_V4DI_QI:
36662 case V2DI_FTYPE_V4SF_V2DI_QI:
36663 case V8SF_FTYPE_V8DI_V8SF_QI:
36664 case V4SF_FTYPE_V4DI_V4SF_QI:
36665 case V4SF_FTYPE_V2DI_V4SF_QI:
36666 case V8DF_FTYPE_V8DI_V8DF_QI:
36667 case V4DF_FTYPE_V4DI_V4DF_QI:
36668 case V2DF_FTYPE_V2DI_V2DF_QI:
36669 case V16QI_FTYPE_V8HI_V16QI_QI:
36670 case V16QI_FTYPE_V16HI_V16QI_HI:
36671 case V16QI_FTYPE_V4SI_V16QI_QI:
36672 case V16QI_FTYPE_V8SI_V16QI_QI:
36673 case V8HI_FTYPE_V4SI_V8HI_QI:
36674 case V8HI_FTYPE_V8SI_V8HI_QI:
36675 case V16QI_FTYPE_V2DI_V16QI_QI:
36676 case V16QI_FTYPE_V4DI_V16QI_QI:
36677 case V8HI_FTYPE_V2DI_V8HI_QI:
36678 case V8HI_FTYPE_V4DI_V8HI_QI:
36679 case V4SI_FTYPE_V2DI_V4SI_QI:
36680 case V4SI_FTYPE_V4DI_V4SI_QI:
36681 case V32QI_FTYPE_V32HI_V32QI_SI:
36682 case HI_FTYPE_V16QI_V16QI_HI:
36683 case SI_FTYPE_V32QI_V32QI_SI:
36684 case DI_FTYPE_V64QI_V64QI_DI:
36685 case QI_FTYPE_V8HI_V8HI_QI:
36686 case HI_FTYPE_V16HI_V16HI_HI:
36687 case SI_FTYPE_V32HI_V32HI_SI:
36688 case QI_FTYPE_V4SI_V4SI_QI:
36689 case QI_FTYPE_V8SI_V8SI_QI:
36690 case QI_FTYPE_V2DI_V2DI_QI:
36691 case QI_FTYPE_V4DI_V4DI_QI:
36692 case V4SF_FTYPE_V2DF_V4SF_QI:
36693 case V4SF_FTYPE_V4DF_V4SF_QI:
36694 case V16SI_FTYPE_V16SI_V16SI_HI:
36695 case V16SI_FTYPE_V16SI_V16SI_V16SI:
36696 case V16SI_FTYPE_V4SI_V16SI_HI:
36697 case V2DI_FTYPE_V2DI_V2DI_V2DI:
36698 case V2DI_FTYPE_V4SI_V2DI_QI:
36699 case V2DI_FTYPE_V8HI_V2DI_QI:
36700 case V2DI_FTYPE_V16QI_V2DI_QI:
36701 case V4DI_FTYPE_V4DI_V4DI_QI:
36702 case V4DI_FTYPE_V4SI_V4DI_QI:
36703 case V4DI_FTYPE_V8HI_V4DI_QI:
36704 case V4DI_FTYPE_V16QI_V4DI_QI:
36705 case V8DI_FTYPE_V8DF_V8DI_QI:
36706 case V4DI_FTYPE_V4DF_V4DI_QI:
36707 case V2DI_FTYPE_V2DF_V2DI_QI:
36708 case V4SI_FTYPE_V4DF_V4SI_QI:
36709 case V4SI_FTYPE_V2DF_V4SI_QI:
36710 case V4SI_FTYPE_V8HI_V4SI_QI:
36711 case V4SI_FTYPE_V16QI_V4SI_QI:
36712 case V8SI_FTYPE_V8SI_V8SI_V8SI:
36713 case V4DI_FTYPE_V4DI_V4DI_V4DI:
36714 case V8DF_FTYPE_V2DF_V8DF_QI:
36715 case V8DF_FTYPE_V4DF_V8DF_QI:
36716 case V8DF_FTYPE_V8DF_V8DF_QI:
36717 case V8DF_FTYPE_V8DF_V8DF_V8DF:
36718 case V8SF_FTYPE_V8SF_V8SF_QI:
36719 case V8SF_FTYPE_V8SI_V8SF_QI:
36720 case V4DF_FTYPE_V4DF_V4DF_QI:
36721 case V4SF_FTYPE_V4SF_V4SF_QI:
36722 case V2DF_FTYPE_V2DF_V2DF_QI:
36723 case V2DF_FTYPE_V4SF_V2DF_QI:
36724 case V2DF_FTYPE_V4SI_V2DF_QI:
36725 case V4SF_FTYPE_V4SI_V4SF_QI:
36726 case V4DF_FTYPE_V4SF_V4DF_QI:
36727 case V4DF_FTYPE_V4SI_V4DF_QI:
36728 case V8SI_FTYPE_V8SI_V8SI_QI:
36729 case V8SI_FTYPE_V8HI_V8SI_QI:
36730 case V8SI_FTYPE_V16QI_V8SI_QI:
36731 case V8DF_FTYPE_V8DF_V8DI_V8DF:
36732 case V8DF_FTYPE_V8DI_V8DF_V8DF:
36733 case V8DF_FTYPE_V8SF_V8DF_QI:
36734 case V8DF_FTYPE_V8SI_V8DF_QI:
36735 case V8DI_FTYPE_DI_V8DI_QI:
36736 case V16SF_FTYPE_V8SF_V16SF_HI:
36737 case V16SI_FTYPE_V8SI_V16SI_HI:
36738 case V16HI_FTYPE_V16HI_V16HI_HI:
36739 case V8HI_FTYPE_V16QI_V8HI_QI:
36740 case V16HI_FTYPE_V16QI_V16HI_HI:
36741 case V32HI_FTYPE_V32HI_V32HI_SI:
36742 case V32HI_FTYPE_V32QI_V32HI_SI:
36743 case V8DI_FTYPE_V16QI_V8DI_QI:
36744 case V8DI_FTYPE_V2DI_V8DI_QI:
36745 case V8DI_FTYPE_V4DI_V8DI_QI:
36746 case V8DI_FTYPE_V8DI_V8DI_QI:
36747 case V8DI_FTYPE_V8DI_V8DI_V8DI:
36748 case V8DI_FTYPE_V8HI_V8DI_QI:
36749 case V8DI_FTYPE_V8SI_V8DI_QI:
36750 case V8HI_FTYPE_V8DI_V8HI_QI:
36751 case V8SF_FTYPE_V8DF_V8SF_QI:
36752 case V8SI_FTYPE_V8DF_V8SI_QI:
36753 case V8SI_FTYPE_V8DI_V8SI_QI:
36754 case V4SI_FTYPE_V4SI_V4SI_V4SI:
36755 nargs = 3;
36756 break;
36757 case V32QI_FTYPE_V32QI_V32QI_INT:
36758 case V16HI_FTYPE_V16HI_V16HI_INT:
36759 case V16QI_FTYPE_V16QI_V16QI_INT:
36760 case V4DI_FTYPE_V4DI_V4DI_INT:
36761 case V8HI_FTYPE_V8HI_V8HI_INT:
36762 case V8SI_FTYPE_V8SI_V8SI_INT:
36763 case V8SI_FTYPE_V8SI_V4SI_INT:
36764 case V8SF_FTYPE_V8SF_V8SF_INT:
36765 case V8SF_FTYPE_V8SF_V4SF_INT:
36766 case V4SI_FTYPE_V4SI_V4SI_INT:
36767 case V4DF_FTYPE_V4DF_V4DF_INT:
36768 case V16SF_FTYPE_V16SF_V16SF_INT:
36769 case V16SF_FTYPE_V16SF_V4SF_INT:
36770 case V16SI_FTYPE_V16SI_V4SI_INT:
36771 case V4DF_FTYPE_V4DF_V2DF_INT:
36772 case V4SF_FTYPE_V4SF_V4SF_INT:
36773 case V2DI_FTYPE_V2DI_V2DI_INT:
36774 case V4DI_FTYPE_V4DI_V2DI_INT:
36775 case V2DF_FTYPE_V2DF_V2DF_INT:
36776 case QI_FTYPE_V8DI_V8DI_INT:
36777 case QI_FTYPE_V8DF_V8DF_INT:
36778 case QI_FTYPE_V2DF_V2DF_INT:
36779 case QI_FTYPE_V4SF_V4SF_INT:
36780 case HI_FTYPE_V16SI_V16SI_INT:
36781 case HI_FTYPE_V16SF_V16SF_INT:
36782 nargs = 3;
36783 nargs_constant = 1;
36784 break;
36785 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
36786 nargs = 3;
36787 rmode = V4DImode;
36788 nargs_constant = 1;
36789 break;
36790 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
36791 nargs = 3;
36792 rmode = V2DImode;
36793 nargs_constant = 1;
36794 break;
36795 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
36796 nargs = 3;
36797 rmode = DImode;
36798 nargs_constant = 1;
36799 break;
36800 case V2DI_FTYPE_V2DI_UINT_UINT:
36801 nargs = 3;
36802 nargs_constant = 2;
36803 break;
36804 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
36805 nargs = 3;
36806 rmode = V8DImode;
36807 nargs_constant = 1;
36808 break;
36809 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
36810 nargs = 5;
36811 rmode = V8DImode;
36812 mask_pos = 2;
36813 nargs_constant = 1;
36814 break;
36815 case QI_FTYPE_V8DF_INT_QI:
36816 case QI_FTYPE_V4DF_INT_QI:
36817 case QI_FTYPE_V2DF_INT_QI:
36818 case HI_FTYPE_V16SF_INT_HI:
36819 case QI_FTYPE_V8SF_INT_QI:
36820 case QI_FTYPE_V4SF_INT_QI:
36821 nargs = 3;
36822 mask_pos = 1;
36823 nargs_constant = 1;
36824 break;
36825 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
36826 nargs = 5;
36827 rmode = V4DImode;
36828 mask_pos = 2;
36829 nargs_constant = 1;
36830 break;
36831 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
36832 nargs = 5;
36833 rmode = V2DImode;
36834 mask_pos = 2;
36835 nargs_constant = 1;
36836 break;
36837 case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
36838 case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
36839 case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
36840 case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
36841 case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
36842 case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
36843 case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
36844 case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
36845 case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
36846 case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
36847 case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
36848 case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
36849 case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
36850 case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
36851 case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
36852 case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
36853 case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
36854 case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
36855 case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
36856 case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
36857 case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
36858 case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
36859 case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
36860 case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
36861 case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
36862 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
36863 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
36864 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
36865 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
36866 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
36867 case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
36868 case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
36869 case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
36870 case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
36871 case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
36872 case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
36873 case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
36874 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
36875 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
36876 case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
36877 case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
36878 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
36879 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
36880 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
36881 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
36882 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
36883 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
36884 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
36885 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
36886 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
36887 case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
36888 case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
36889 case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
36890 case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
36891 nargs = 4;
36892 break;
36893 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
36894 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
36895 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
36896 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
36897 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
36898 nargs = 4;
36899 nargs_constant = 1;
36900 break;
36901 case QI_FTYPE_V4DI_V4DI_INT_QI:
36902 case QI_FTYPE_V8SI_V8SI_INT_QI:
36903 case QI_FTYPE_V4DF_V4DF_INT_QI:
36904 case QI_FTYPE_V8SF_V8SF_INT_QI:
36905 case QI_FTYPE_V2DI_V2DI_INT_QI:
36906 case QI_FTYPE_V4SI_V4SI_INT_QI:
36907 case QI_FTYPE_V2DF_V2DF_INT_QI:
36908 case QI_FTYPE_V4SF_V4SF_INT_QI:
36909 case DI_FTYPE_V64QI_V64QI_INT_DI:
36910 case SI_FTYPE_V32QI_V32QI_INT_SI:
36911 case HI_FTYPE_V16QI_V16QI_INT_HI:
36912 case SI_FTYPE_V32HI_V32HI_INT_SI:
36913 case HI_FTYPE_V16HI_V16HI_INT_HI:
36914 case QI_FTYPE_V8HI_V8HI_INT_QI:
36915 nargs = 4;
36916 mask_pos = 1;
36917 nargs_constant = 1;
36918 break;
36919 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
36920 nargs = 4;
36921 nargs_constant = 2;
36922 break;
36923 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
36924 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
36925 nargs = 4;
36926 break;
36927 case QI_FTYPE_V8DI_V8DI_INT_QI:
36928 case HI_FTYPE_V16SI_V16SI_INT_HI:
36929 case QI_FTYPE_V8DF_V8DF_INT_QI:
36930 case HI_FTYPE_V16SF_V16SF_INT_HI:
36931 mask_pos = 1;
36932 nargs = 4;
36933 nargs_constant = 1;
36934 break;
36935 case V8SF_FTYPE_V8SF_INT_V8SF_QI:
36936 case V4SF_FTYPE_V4SF_INT_V4SF_QI:
36937 case V2DF_FTYPE_V4DF_INT_V2DF_QI:
36938 case V2DI_FTYPE_V4DI_INT_V2DI_QI:
36939 case V8SF_FTYPE_V16SF_INT_V8SF_QI:
36940 case V8SI_FTYPE_V16SI_INT_V8SI_QI:
36941 case V2DF_FTYPE_V8DF_INT_V2DF_QI:
36942 case V2DI_FTYPE_V8DI_INT_V2DI_QI:
36943 case V4SF_FTYPE_V8SF_INT_V4SF_QI:
36944 case V4SI_FTYPE_V8SI_INT_V4SI_QI:
36945 case V8HI_FTYPE_V8SF_INT_V8HI_QI:
36946 case V8HI_FTYPE_V4SF_INT_V8HI_QI:
36947 case V32HI_FTYPE_V32HI_INT_V32HI_SI:
36948 case V16HI_FTYPE_V16HI_INT_V16HI_HI:
36949 case V8HI_FTYPE_V8HI_INT_V8HI_QI:
36950 case V4DI_FTYPE_V4DI_INT_V4DI_QI:
36951 case V2DI_FTYPE_V2DI_INT_V2DI_QI:
36952 case V8SI_FTYPE_V8SI_INT_V8SI_QI:
36953 case V4SI_FTYPE_V4SI_INT_V4SI_QI:
36954 case V4DF_FTYPE_V4DF_INT_V4DF_QI:
36955 case V2DF_FTYPE_V2DF_INT_V2DF_QI:
36956 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
36957 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
36958 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
36959 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
36960 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
36961 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
36962 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
36963 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
36964 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
36965 nargs = 4;
36966 mask_pos = 2;
36967 nargs_constant = 1;
36968 break;
36969 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
36970 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
36971 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
36972 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
36973 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
36974 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
36975 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
36976 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
36977 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
36978 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
36979 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
36980 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
36981 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
36982 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
36983 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
36984 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
36985 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
36986 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
36987 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
36988 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
36989 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
36990 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
36991 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
36992 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
36993 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
36994 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
36995 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
36996 nargs = 5;
36997 mask_pos = 2;
36998 nargs_constant = 1;
36999 break;
37000 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
37001 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
37002 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
37003 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
37004 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
37005 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
37006 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
37007 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
37008 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
37009 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
37010 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
37011 nargs = 5;
37012 nargs = 5;
37013 mask_pos = 1;
37014 nargs_constant = 1;
37015 break;
37017 default:
37018 gcc_unreachable ();
37021 gcc_assert (nargs <= ARRAY_SIZE (args));
37023 if (comparison != UNKNOWN)
37025 gcc_assert (nargs == 2);
37026 return ix86_expand_sse_compare (d, exp, target, swap);
37029 if (rmode == VOIDmode || rmode == tmode)
37031 if (optimize
37032 || target == 0
37033 || GET_MODE (target) != tmode
37034 || !insn_p->operand[0].predicate (target, tmode))
37035 target = gen_reg_rtx (tmode);
37036 real_target = target;
37038 else
37040 real_target = gen_reg_rtx (tmode);
37041 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
37044 for (i = 0; i < nargs; i++)
37046 tree arg = CALL_EXPR_ARG (exp, i);
37047 rtx op = expand_normal (arg);
37048 machine_mode mode = insn_p->operand[i + 1].mode;
37049 bool match = insn_p->operand[i + 1].predicate (op, mode);
37051 if (last_arg_count && (i + 1) == nargs)
37053 /* SIMD shift insns take either an 8-bit immediate or
37054 register as count. But builtin functions take int as
37055 count. If count doesn't match, we put it in register. */
37056 if (!match)
37058 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
37059 if (!insn_p->operand[i + 1].predicate (op, mode))
37060 op = copy_to_reg (op);
37063 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37064 (!mask_pos && (nargs - i) <= nargs_constant))
37066 if (!match)
37067 switch (icode)
37069 case CODE_FOR_avx_vinsertf128v4di:
37070 case CODE_FOR_avx_vextractf128v4di:
37071 error ("the last argument must be an 1-bit immediate");
37072 return const0_rtx;
37074 case CODE_FOR_avx512f_cmpv8di3_mask:
37075 case CODE_FOR_avx512f_cmpv16si3_mask:
37076 case CODE_FOR_avx512f_ucmpv8di3_mask:
37077 case CODE_FOR_avx512f_ucmpv16si3_mask:
37078 case CODE_FOR_avx512vl_cmpv4di3_mask:
37079 case CODE_FOR_avx512vl_cmpv8si3_mask:
37080 case CODE_FOR_avx512vl_ucmpv4di3_mask:
37081 case CODE_FOR_avx512vl_ucmpv8si3_mask:
37082 case CODE_FOR_avx512vl_cmpv2di3_mask:
37083 case CODE_FOR_avx512vl_cmpv4si3_mask:
37084 case CODE_FOR_avx512vl_ucmpv2di3_mask:
37085 case CODE_FOR_avx512vl_ucmpv4si3_mask:
37086 error ("the last argument must be a 3-bit immediate");
37087 return const0_rtx;
37089 case CODE_FOR_sse4_1_roundsd:
37090 case CODE_FOR_sse4_1_roundss:
37092 case CODE_FOR_sse4_1_roundpd:
37093 case CODE_FOR_sse4_1_roundps:
37094 case CODE_FOR_avx_roundpd256:
37095 case CODE_FOR_avx_roundps256:
37097 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
37098 case CODE_FOR_sse4_1_roundps_sfix:
37099 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
37100 case CODE_FOR_avx_roundps_sfix256:
37102 case CODE_FOR_sse4_1_blendps:
37103 case CODE_FOR_avx_blendpd256:
37104 case CODE_FOR_avx_vpermilv4df:
37105 case CODE_FOR_avx_vpermilv4df_mask:
37106 case CODE_FOR_avx512f_getmantv8df_mask:
37107 case CODE_FOR_avx512f_getmantv16sf_mask:
37108 case CODE_FOR_avx512vl_getmantv8sf_mask:
37109 case CODE_FOR_avx512vl_getmantv4df_mask:
37110 case CODE_FOR_avx512vl_getmantv4sf_mask:
37111 case CODE_FOR_avx512vl_getmantv2df_mask:
37112 case CODE_FOR_avx512dq_rangepv8df_mask_round:
37113 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
37114 case CODE_FOR_avx512dq_rangepv4df_mask:
37115 case CODE_FOR_avx512dq_rangepv8sf_mask:
37116 case CODE_FOR_avx512dq_rangepv2df_mask:
37117 case CODE_FOR_avx512dq_rangepv4sf_mask:
37118 case CODE_FOR_avx_shufpd256_mask:
37119 error ("the last argument must be a 4-bit immediate");
37120 return const0_rtx;
37122 case CODE_FOR_sha1rnds4:
37123 case CODE_FOR_sse4_1_blendpd:
37124 case CODE_FOR_avx_vpermilv2df:
37125 case CODE_FOR_avx_vpermilv2df_mask:
37126 case CODE_FOR_xop_vpermil2v2df3:
37127 case CODE_FOR_xop_vpermil2v4sf3:
37128 case CODE_FOR_xop_vpermil2v4df3:
37129 case CODE_FOR_xop_vpermil2v8sf3:
37130 case CODE_FOR_avx512f_vinsertf32x4_mask:
37131 case CODE_FOR_avx512f_vinserti32x4_mask:
37132 case CODE_FOR_avx512f_vextractf32x4_mask:
37133 case CODE_FOR_avx512f_vextracti32x4_mask:
37134 case CODE_FOR_sse2_shufpd:
37135 case CODE_FOR_sse2_shufpd_mask:
37136 case CODE_FOR_avx512dq_shuf_f64x2_mask:
37137 case CODE_FOR_avx512dq_shuf_i64x2_mask:
37138 case CODE_FOR_avx512vl_shuf_i32x4_mask:
37139 case CODE_FOR_avx512vl_shuf_f32x4_mask:
37140 error ("the last argument must be a 2-bit immediate");
37141 return const0_rtx;
37143 case CODE_FOR_avx_vextractf128v4df:
37144 case CODE_FOR_avx_vextractf128v8sf:
37145 case CODE_FOR_avx_vextractf128v8si:
37146 case CODE_FOR_avx_vinsertf128v4df:
37147 case CODE_FOR_avx_vinsertf128v8sf:
37148 case CODE_FOR_avx_vinsertf128v8si:
37149 case CODE_FOR_avx512f_vinsertf64x4_mask:
37150 case CODE_FOR_avx512f_vinserti64x4_mask:
37151 case CODE_FOR_avx512f_vextractf64x4_mask:
37152 case CODE_FOR_avx512f_vextracti64x4_mask:
37153 case CODE_FOR_avx512dq_vinsertf32x8_mask:
37154 case CODE_FOR_avx512dq_vinserti32x8_mask:
37155 case CODE_FOR_avx512vl_vinsertv4df:
37156 case CODE_FOR_avx512vl_vinsertv4di:
37157 case CODE_FOR_avx512vl_vinsertv8sf:
37158 case CODE_FOR_avx512vl_vinsertv8si:
37159 error ("the last argument must be a 1-bit immediate");
37160 return const0_rtx;
37162 case CODE_FOR_avx_vmcmpv2df3:
37163 case CODE_FOR_avx_vmcmpv4sf3:
37164 case CODE_FOR_avx_cmpv2df3:
37165 case CODE_FOR_avx_cmpv4sf3:
37166 case CODE_FOR_avx_cmpv4df3:
37167 case CODE_FOR_avx_cmpv8sf3:
37168 case CODE_FOR_avx512f_cmpv8df3_mask:
37169 case CODE_FOR_avx512f_cmpv16sf3_mask:
37170 case CODE_FOR_avx512f_vmcmpv2df3_mask:
37171 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
37172 error ("the last argument must be a 5-bit immediate");
37173 return const0_rtx;
37175 default:
37176 switch (nargs_constant)
37178 case 2:
37179 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37180 (!mask_pos && (nargs - i) == nargs_constant))
37182 error ("the next to last argument must be an 8-bit immediate");
37183 break;
37185 case 1:
37186 error ("the last argument must be an 8-bit immediate");
37187 break;
37188 default:
37189 gcc_unreachable ();
37191 return const0_rtx;
37194 else
37196 if (VECTOR_MODE_P (mode))
37197 op = safe_vector_operand (op, mode);
37199 /* If we aren't optimizing, only allow one memory operand to
37200 be generated. */
37201 if (memory_operand (op, mode))
37202 num_memory++;
37204 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37206 if (optimize || !match || num_memory > 1)
37207 op = copy_to_mode_reg (mode, op);
37209 else
37211 op = copy_to_reg (op);
37212 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37216 args[i].op = op;
37217 args[i].mode = mode;
37220 switch (nargs)
37222 case 1:
37223 pat = GEN_FCN (icode) (real_target, args[0].op);
37224 break;
37225 case 2:
37226 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
37227 break;
37228 case 3:
37229 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37230 args[2].op);
37231 break;
37232 case 4:
37233 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37234 args[2].op, args[3].op);
37235 break;
37236 case 5:
37237 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37238 args[2].op, args[3].op, args[4].op);
37239 case 6:
37240 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37241 args[2].op, args[3].op, args[4].op,
37242 args[5].op);
37243 break;
37244 default:
37245 gcc_unreachable ();
37248 if (! pat)
37249 return 0;
37251 emit_insn (pat);
37252 return target;
37255 /* Transform pattern of following layout:
37256 (parallel [
37257 set (A B)
37258 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
37260 into:
37261 (set (A B))
37264 (parallel [ A B
37266 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
37269 into:
37270 (parallel [ A B ... ]) */
37272 static rtx
37273 ix86_erase_embedded_rounding (rtx pat)
37275 if (GET_CODE (pat) == INSN)
37276 pat = PATTERN (pat);
37278 gcc_assert (GET_CODE (pat) == PARALLEL);
37280 if (XVECLEN (pat, 0) == 2)
37282 rtx p0 = XVECEXP (pat, 0, 0);
37283 rtx p1 = XVECEXP (pat, 0, 1);
37285 gcc_assert (GET_CODE (p0) == SET
37286 && GET_CODE (p1) == UNSPEC
37287 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
37289 return p0;
37291 else
37293 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
37294 int i = 0;
37295 int j = 0;
37297 for (; i < XVECLEN (pat, 0); ++i)
37299 rtx elem = XVECEXP (pat, 0, i);
37300 if (GET_CODE (elem) != UNSPEC
37301 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
37302 res [j++] = elem;
37305 /* No more than 1 occurence was removed. */
37306 gcc_assert (j >= XVECLEN (pat, 0) - 1);
37308 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
37312 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
37313 with rounding. */
37314 static rtx
37315 ix86_expand_sse_comi_round (const struct builtin_description *d,
37316 tree exp, rtx target)
37318 rtx pat, set_dst;
37319 tree arg0 = CALL_EXPR_ARG (exp, 0);
37320 tree arg1 = CALL_EXPR_ARG (exp, 1);
37321 tree arg2 = CALL_EXPR_ARG (exp, 2);
37322 tree arg3 = CALL_EXPR_ARG (exp, 3);
37323 rtx op0 = expand_normal (arg0);
37324 rtx op1 = expand_normal (arg1);
37325 rtx op2 = expand_normal (arg2);
37326 rtx op3 = expand_normal (arg3);
37327 enum insn_code icode = d->icode;
37328 const struct insn_data_d *insn_p = &insn_data[icode];
37329 machine_mode mode0 = insn_p->operand[0].mode;
37330 machine_mode mode1 = insn_p->operand[1].mode;
37331 enum rtx_code comparison = UNEQ;
37332 bool need_ucomi = false;
37334 /* See avxintrin.h for values. */
37335 enum rtx_code comi_comparisons[32] =
37337 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
37338 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
37339 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
37341 bool need_ucomi_values[32] =
37343 true, false, false, true, true, false, false, true,
37344 true, false, false, true, true, false, false, true,
37345 false, true, true, false, false, true, true, false,
37346 false, true, true, false, false, true, true, false
37349 if (!CONST_INT_P (op2))
37351 error ("the third argument must be comparison constant");
37352 return const0_rtx;
37354 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
37356 error ("incorect comparison mode");
37357 return const0_rtx;
37360 if (!insn_p->operand[2].predicate (op3, SImode))
37362 error ("incorrect rounding operand");
37363 return const0_rtx;
37366 comparison = comi_comparisons[INTVAL (op2)];
37367 need_ucomi = need_ucomi_values[INTVAL (op2)];
37369 if (VECTOR_MODE_P (mode0))
37370 op0 = safe_vector_operand (op0, mode0);
37371 if (VECTOR_MODE_P (mode1))
37372 op1 = safe_vector_operand (op1, mode1);
37374 target = gen_reg_rtx (SImode);
37375 emit_move_insn (target, const0_rtx);
37376 target = gen_rtx_SUBREG (QImode, target, 0);
37378 if ((optimize && !register_operand (op0, mode0))
37379 || !insn_p->operand[0].predicate (op0, mode0))
37380 op0 = copy_to_mode_reg (mode0, op0);
37381 if ((optimize && !register_operand (op1, mode1))
37382 || !insn_p->operand[1].predicate (op1, mode1))
37383 op1 = copy_to_mode_reg (mode1, op1);
37385 if (need_ucomi)
37386 icode = icode == CODE_FOR_sse_comi_round
37387 ? CODE_FOR_sse_ucomi_round
37388 : CODE_FOR_sse2_ucomi_round;
37390 pat = GEN_FCN (icode) (op0, op1, op3);
37391 if (! pat)
37392 return 0;
37394 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
37395 if (INTVAL (op3) == NO_ROUND)
37397 pat = ix86_erase_embedded_rounding (pat);
37398 if (! pat)
37399 return 0;
37401 set_dst = SET_DEST (pat);
37403 else
37405 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
37406 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
37409 emit_insn (pat);
37410 emit_insn (gen_rtx_SET (VOIDmode,
37411 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37412 gen_rtx_fmt_ee (comparison, QImode,
37413 set_dst,
37414 const0_rtx)));
37416 return SUBREG_REG (target);
37419 static rtx
37420 ix86_expand_round_builtin (const struct builtin_description *d,
37421 tree exp, rtx target)
37423 rtx pat;
37424 unsigned int i, nargs;
37425 struct
37427 rtx op;
37428 machine_mode mode;
37429 } args[6];
37430 enum insn_code icode = d->icode;
37431 const struct insn_data_d *insn_p = &insn_data[icode];
37432 machine_mode tmode = insn_p->operand[0].mode;
37433 unsigned int nargs_constant = 0;
37434 unsigned int redundant_embed_rnd = 0;
37436 switch ((enum ix86_builtin_func_type) d->flag)
37438 case UINT64_FTYPE_V2DF_INT:
37439 case UINT64_FTYPE_V4SF_INT:
37440 case UINT_FTYPE_V2DF_INT:
37441 case UINT_FTYPE_V4SF_INT:
37442 case INT64_FTYPE_V2DF_INT:
37443 case INT64_FTYPE_V4SF_INT:
37444 case INT_FTYPE_V2DF_INT:
37445 case INT_FTYPE_V4SF_INT:
37446 nargs = 2;
37447 break;
37448 case V4SF_FTYPE_V4SF_UINT_INT:
37449 case V4SF_FTYPE_V4SF_UINT64_INT:
37450 case V2DF_FTYPE_V2DF_UINT64_INT:
37451 case V4SF_FTYPE_V4SF_INT_INT:
37452 case V4SF_FTYPE_V4SF_INT64_INT:
37453 case V2DF_FTYPE_V2DF_INT64_INT:
37454 case V4SF_FTYPE_V4SF_V4SF_INT:
37455 case V2DF_FTYPE_V2DF_V2DF_INT:
37456 case V4SF_FTYPE_V4SF_V2DF_INT:
37457 case V2DF_FTYPE_V2DF_V4SF_INT:
37458 nargs = 3;
37459 break;
37460 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
37461 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
37462 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
37463 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
37464 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
37465 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
37466 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
37467 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
37468 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
37469 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
37470 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
37471 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
37472 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
37473 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
37474 nargs = 4;
37475 break;
37476 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
37477 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
37478 nargs_constant = 2;
37479 nargs = 4;
37480 break;
37481 case INT_FTYPE_V4SF_V4SF_INT_INT:
37482 case INT_FTYPE_V2DF_V2DF_INT_INT:
37483 return ix86_expand_sse_comi_round (d, exp, target);
37484 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
37485 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
37486 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
37487 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
37488 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
37489 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
37490 nargs = 5;
37491 break;
37492 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
37493 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
37494 nargs_constant = 4;
37495 nargs = 5;
37496 break;
37497 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
37498 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
37499 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
37500 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
37501 nargs_constant = 3;
37502 nargs = 5;
37503 break;
37504 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
37505 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
37506 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
37507 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
37508 nargs = 6;
37509 nargs_constant = 4;
37510 break;
37511 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
37512 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
37513 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
37514 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
37515 nargs = 6;
37516 nargs_constant = 3;
37517 break;
37518 default:
37519 gcc_unreachable ();
37521 gcc_assert (nargs <= ARRAY_SIZE (args));
37523 if (optimize
37524 || target == 0
37525 || GET_MODE (target) != tmode
37526 || !insn_p->operand[0].predicate (target, tmode))
37527 target = gen_reg_rtx (tmode);
37529 for (i = 0; i < nargs; i++)
37531 tree arg = CALL_EXPR_ARG (exp, i);
37532 rtx op = expand_normal (arg);
37533 machine_mode mode = insn_p->operand[i + 1].mode;
37534 bool match = insn_p->operand[i + 1].predicate (op, mode);
37536 if (i == nargs - nargs_constant)
37538 if (!match)
37540 switch (icode)
37542 case CODE_FOR_avx512f_getmantv8df_mask_round:
37543 case CODE_FOR_avx512f_getmantv16sf_mask_round:
37544 case CODE_FOR_avx512f_vgetmantv2df_round:
37545 case CODE_FOR_avx512f_vgetmantv4sf_round:
37546 error ("the immediate argument must be a 4-bit immediate");
37547 return const0_rtx;
37548 case CODE_FOR_avx512f_cmpv8df3_mask_round:
37549 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
37550 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
37551 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
37552 error ("the immediate argument must be a 5-bit immediate");
37553 return const0_rtx;
37554 default:
37555 error ("the immediate argument must be an 8-bit immediate");
37556 return const0_rtx;
37560 else if (i == nargs-1)
37562 if (!insn_p->operand[nargs].predicate (op, SImode))
37564 error ("incorrect rounding operand");
37565 return const0_rtx;
37568 /* If there is no rounding use normal version of the pattern. */
37569 if (INTVAL (op) == NO_ROUND)
37570 redundant_embed_rnd = 1;
37572 else
37574 if (VECTOR_MODE_P (mode))
37575 op = safe_vector_operand (op, mode);
37577 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37579 if (optimize || !match)
37580 op = copy_to_mode_reg (mode, op);
37582 else
37584 op = copy_to_reg (op);
37585 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37589 args[i].op = op;
37590 args[i].mode = mode;
37593 switch (nargs)
37595 case 1:
37596 pat = GEN_FCN (icode) (target, args[0].op);
37597 break;
37598 case 2:
37599 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37600 break;
37601 case 3:
37602 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37603 args[2].op);
37604 break;
37605 case 4:
37606 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37607 args[2].op, args[3].op);
37608 break;
37609 case 5:
37610 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37611 args[2].op, args[3].op, args[4].op);
37612 case 6:
37613 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37614 args[2].op, args[3].op, args[4].op,
37615 args[5].op);
37616 break;
37617 default:
37618 gcc_unreachable ();
37621 if (!pat)
37622 return 0;
37624 if (redundant_embed_rnd)
37625 pat = ix86_erase_embedded_rounding (pat);
37627 emit_insn (pat);
37628 return target;
37631 /* Subroutine of ix86_expand_builtin to take care of special insns
37632 with variable number of operands. */
37634 static rtx
37635 ix86_expand_special_args_builtin (const struct builtin_description *d,
37636 tree exp, rtx target)
37638 tree arg;
37639 rtx pat, op;
37640 unsigned int i, nargs, arg_adjust, memory;
37641 bool aligned_mem = false;
37642 struct
37644 rtx op;
37645 machine_mode mode;
37646 } args[3];
37647 enum insn_code icode = d->icode;
37648 bool last_arg_constant = false;
37649 const struct insn_data_d *insn_p = &insn_data[icode];
37650 machine_mode tmode = insn_p->operand[0].mode;
37651 enum { load, store } klass;
37653 switch ((enum ix86_builtin_func_type) d->flag)
37655 case VOID_FTYPE_VOID:
37656 emit_insn (GEN_FCN (icode) (target));
37657 return 0;
37658 case VOID_FTYPE_UINT64:
37659 case VOID_FTYPE_UNSIGNED:
37660 nargs = 0;
37661 klass = store;
37662 memory = 0;
37663 break;
37665 case INT_FTYPE_VOID:
37666 case USHORT_FTYPE_VOID:
37667 case UINT64_FTYPE_VOID:
37668 case UNSIGNED_FTYPE_VOID:
37669 nargs = 0;
37670 klass = load;
37671 memory = 0;
37672 break;
37673 case UINT64_FTYPE_PUNSIGNED:
37674 case V2DI_FTYPE_PV2DI:
37675 case V4DI_FTYPE_PV4DI:
37676 case V32QI_FTYPE_PCCHAR:
37677 case V16QI_FTYPE_PCCHAR:
37678 case V8SF_FTYPE_PCV4SF:
37679 case V8SF_FTYPE_PCFLOAT:
37680 case V4SF_FTYPE_PCFLOAT:
37681 case V4DF_FTYPE_PCV2DF:
37682 case V4DF_FTYPE_PCDOUBLE:
37683 case V2DF_FTYPE_PCDOUBLE:
37684 case VOID_FTYPE_PVOID:
37685 case V16SI_FTYPE_PV4SI:
37686 case V16SF_FTYPE_PV4SF:
37687 case V8DI_FTYPE_PV4DI:
37688 case V8DI_FTYPE_PV8DI:
37689 case V8DF_FTYPE_PV4DF:
37690 nargs = 1;
37691 klass = load;
37692 memory = 0;
37693 switch (icode)
37695 case CODE_FOR_sse4_1_movntdqa:
37696 case CODE_FOR_avx2_movntdqa:
37697 case CODE_FOR_avx512f_movntdqa:
37698 aligned_mem = true;
37699 break;
37700 default:
37701 break;
37703 break;
37704 case VOID_FTYPE_PV2SF_V4SF:
37705 case VOID_FTYPE_PV8DI_V8DI:
37706 case VOID_FTYPE_PV4DI_V4DI:
37707 case VOID_FTYPE_PV2DI_V2DI:
37708 case VOID_FTYPE_PCHAR_V32QI:
37709 case VOID_FTYPE_PCHAR_V16QI:
37710 case VOID_FTYPE_PFLOAT_V16SF:
37711 case VOID_FTYPE_PFLOAT_V8SF:
37712 case VOID_FTYPE_PFLOAT_V4SF:
37713 case VOID_FTYPE_PDOUBLE_V8DF:
37714 case VOID_FTYPE_PDOUBLE_V4DF:
37715 case VOID_FTYPE_PDOUBLE_V2DF:
37716 case VOID_FTYPE_PLONGLONG_LONGLONG:
37717 case VOID_FTYPE_PULONGLONG_ULONGLONG:
37718 case VOID_FTYPE_PINT_INT:
37719 nargs = 1;
37720 klass = store;
37721 /* Reserve memory operand for target. */
37722 memory = ARRAY_SIZE (args);
37723 switch (icode)
37725 /* These builtins and instructions require the memory
37726 to be properly aligned. */
37727 case CODE_FOR_avx_movntv4di:
37728 case CODE_FOR_sse2_movntv2di:
37729 case CODE_FOR_avx_movntv8sf:
37730 case CODE_FOR_sse_movntv4sf:
37731 case CODE_FOR_sse4a_vmmovntv4sf:
37732 case CODE_FOR_avx_movntv4df:
37733 case CODE_FOR_sse2_movntv2df:
37734 case CODE_FOR_sse4a_vmmovntv2df:
37735 case CODE_FOR_sse2_movntidi:
37736 case CODE_FOR_sse_movntq:
37737 case CODE_FOR_sse2_movntisi:
37738 case CODE_FOR_avx512f_movntv16sf:
37739 case CODE_FOR_avx512f_movntv8df:
37740 case CODE_FOR_avx512f_movntv8di:
37741 aligned_mem = true;
37742 break;
37743 default:
37744 break;
37746 break;
37747 case V4SF_FTYPE_V4SF_PCV2SF:
37748 case V2DF_FTYPE_V2DF_PCDOUBLE:
37749 nargs = 2;
37750 klass = load;
37751 memory = 1;
37752 break;
37753 case V8SF_FTYPE_PCV8SF_V8SI:
37754 case V4DF_FTYPE_PCV4DF_V4DI:
37755 case V4SF_FTYPE_PCV4SF_V4SI:
37756 case V2DF_FTYPE_PCV2DF_V2DI:
37757 case V8SI_FTYPE_PCV8SI_V8SI:
37758 case V4DI_FTYPE_PCV4DI_V4DI:
37759 case V4SI_FTYPE_PCV4SI_V4SI:
37760 case V2DI_FTYPE_PCV2DI_V2DI:
37761 nargs = 2;
37762 klass = load;
37763 memory = 0;
37764 break;
37765 case VOID_FTYPE_PV8DF_V8DF_QI:
37766 case VOID_FTYPE_PV16SF_V16SF_HI:
37767 case VOID_FTYPE_PV8DI_V8DI_QI:
37768 case VOID_FTYPE_PV4DI_V4DI_QI:
37769 case VOID_FTYPE_PV2DI_V2DI_QI:
37770 case VOID_FTYPE_PV16SI_V16SI_HI:
37771 case VOID_FTYPE_PV8SI_V8SI_QI:
37772 case VOID_FTYPE_PV4SI_V4SI_QI:
37773 switch (icode)
37775 /* These builtins and instructions require the memory
37776 to be properly aligned. */
37777 case CODE_FOR_avx512f_storev16sf_mask:
37778 case CODE_FOR_avx512f_storev16si_mask:
37779 case CODE_FOR_avx512f_storev8df_mask:
37780 case CODE_FOR_avx512f_storev8di_mask:
37781 case CODE_FOR_avx512vl_storev8sf_mask:
37782 case CODE_FOR_avx512vl_storev8si_mask:
37783 case CODE_FOR_avx512vl_storev4df_mask:
37784 case CODE_FOR_avx512vl_storev4di_mask:
37785 case CODE_FOR_avx512vl_storev4sf_mask:
37786 case CODE_FOR_avx512vl_storev4si_mask:
37787 case CODE_FOR_avx512vl_storev2df_mask:
37788 case CODE_FOR_avx512vl_storev2di_mask:
37789 aligned_mem = true;
37790 break;
37791 default:
37792 break;
37794 /* FALLTHRU */
37795 case VOID_FTYPE_PV8SF_V8SI_V8SF:
37796 case VOID_FTYPE_PV4DF_V4DI_V4DF:
37797 case VOID_FTYPE_PV4SF_V4SI_V4SF:
37798 case VOID_FTYPE_PV2DF_V2DI_V2DF:
37799 case VOID_FTYPE_PV8SI_V8SI_V8SI:
37800 case VOID_FTYPE_PV4DI_V4DI_V4DI:
37801 case VOID_FTYPE_PV4SI_V4SI_V4SI:
37802 case VOID_FTYPE_PV2DI_V2DI_V2DI:
37803 case VOID_FTYPE_PDOUBLE_V2DF_QI:
37804 case VOID_FTYPE_PFLOAT_V4SF_QI:
37805 case VOID_FTYPE_PV8SI_V8DI_QI:
37806 case VOID_FTYPE_PV8HI_V8DI_QI:
37807 case VOID_FTYPE_PV16HI_V16SI_HI:
37808 case VOID_FTYPE_PV16QI_V8DI_QI:
37809 case VOID_FTYPE_PV16QI_V16SI_HI:
37810 case VOID_FTYPE_PV4SI_V4DI_QI:
37811 case VOID_FTYPE_PV4SI_V2DI_QI:
37812 case VOID_FTYPE_PV8HI_V4DI_QI:
37813 case VOID_FTYPE_PV8HI_V2DI_QI:
37814 case VOID_FTYPE_PV8HI_V8SI_QI:
37815 case VOID_FTYPE_PV8HI_V4SI_QI:
37816 case VOID_FTYPE_PV16QI_V4DI_QI:
37817 case VOID_FTYPE_PV16QI_V2DI_QI:
37818 case VOID_FTYPE_PV16QI_V8SI_QI:
37819 case VOID_FTYPE_PV16QI_V4SI_QI:
37820 case VOID_FTYPE_PV8HI_V8HI_QI:
37821 case VOID_FTYPE_PV16HI_V16HI_HI:
37822 case VOID_FTYPE_PV32HI_V32HI_SI:
37823 case VOID_FTYPE_PV16QI_V16QI_HI:
37824 case VOID_FTYPE_PV32QI_V32QI_SI:
37825 case VOID_FTYPE_PV64QI_V64QI_DI:
37826 case VOID_FTYPE_PV4DF_V4DF_QI:
37827 case VOID_FTYPE_PV2DF_V2DF_QI:
37828 case VOID_FTYPE_PV8SF_V8SF_QI:
37829 case VOID_FTYPE_PV4SF_V4SF_QI:
37830 nargs = 2;
37831 klass = store;
37832 /* Reserve memory operand for target. */
37833 memory = ARRAY_SIZE (args);
37834 break;
37835 case V4SF_FTYPE_PCV4SF_V4SF_QI:
37836 case V8SF_FTYPE_PCV8SF_V8SF_QI:
37837 case V16SF_FTYPE_PCV16SF_V16SF_HI:
37838 case V4SI_FTYPE_PCV4SI_V4SI_QI:
37839 case V8SI_FTYPE_PCV8SI_V8SI_QI:
37840 case V16SI_FTYPE_PCV16SI_V16SI_HI:
37841 case V2DF_FTYPE_PCV2DF_V2DF_QI:
37842 case V4DF_FTYPE_PCV4DF_V4DF_QI:
37843 case V8DF_FTYPE_PCV8DF_V8DF_QI:
37844 case V2DI_FTYPE_PCV2DI_V2DI_QI:
37845 case V4DI_FTYPE_PCV4DI_V4DI_QI:
37846 case V8DI_FTYPE_PCV8DI_V8DI_QI:
37847 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
37848 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
37849 case V8HI_FTYPE_PCV8HI_V8HI_QI:
37850 case V16HI_FTYPE_PCV16HI_V16HI_HI:
37851 case V32HI_FTYPE_PCV32HI_V32HI_SI:
37852 case V16QI_FTYPE_PCV16QI_V16QI_HI:
37853 case V32QI_FTYPE_PCV32QI_V32QI_SI:
37854 case V64QI_FTYPE_PCV64QI_V64QI_DI:
37855 nargs = 3;
37856 klass = load;
37857 memory = 0;
37858 switch (icode)
37860 /* These builtins and instructions require the memory
37861 to be properly aligned. */
37862 case CODE_FOR_avx512f_loadv16sf_mask:
37863 case CODE_FOR_avx512f_loadv16si_mask:
37864 case CODE_FOR_avx512f_loadv8df_mask:
37865 case CODE_FOR_avx512f_loadv8di_mask:
37866 case CODE_FOR_avx512vl_loadv8sf_mask:
37867 case CODE_FOR_avx512vl_loadv8si_mask:
37868 case CODE_FOR_avx512vl_loadv4df_mask:
37869 case CODE_FOR_avx512vl_loadv4di_mask:
37870 case CODE_FOR_avx512vl_loadv4sf_mask:
37871 case CODE_FOR_avx512vl_loadv4si_mask:
37872 case CODE_FOR_avx512vl_loadv2df_mask:
37873 case CODE_FOR_avx512vl_loadv2di_mask:
37874 case CODE_FOR_avx512bw_loadv64qi_mask:
37875 case CODE_FOR_avx512vl_loadv32qi_mask:
37876 case CODE_FOR_avx512vl_loadv16qi_mask:
37877 case CODE_FOR_avx512bw_loadv32hi_mask:
37878 case CODE_FOR_avx512vl_loadv16hi_mask:
37879 case CODE_FOR_avx512vl_loadv8hi_mask:
37880 aligned_mem = true;
37881 break;
37882 default:
37883 break;
37885 break;
37886 case VOID_FTYPE_UINT_UINT_UINT:
37887 case VOID_FTYPE_UINT64_UINT_UINT:
37888 case UCHAR_FTYPE_UINT_UINT_UINT:
37889 case UCHAR_FTYPE_UINT64_UINT_UINT:
37890 nargs = 3;
37891 klass = load;
37892 memory = ARRAY_SIZE (args);
37893 last_arg_constant = true;
37894 break;
37895 default:
37896 gcc_unreachable ();
37899 gcc_assert (nargs <= ARRAY_SIZE (args));
37901 if (klass == store)
37903 arg = CALL_EXPR_ARG (exp, 0);
37904 op = expand_normal (arg);
37905 gcc_assert (target == 0);
37906 if (memory)
37908 op = ix86_zero_extend_to_Pmode (op);
37909 target = gen_rtx_MEM (tmode, op);
37910 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
37911 on it. Try to improve it using get_pointer_alignment,
37912 and if the special builtin is one that requires strict
37913 mode alignment, also from it's GET_MODE_ALIGNMENT.
37914 Failure to do so could lead to ix86_legitimate_combined_insn
37915 rejecting all changes to such insns. */
37916 unsigned int align = get_pointer_alignment (arg);
37917 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
37918 align = GET_MODE_ALIGNMENT (tmode);
37919 if (MEM_ALIGN (target) < align)
37920 set_mem_align (target, align);
37922 else
37923 target = force_reg (tmode, op);
37924 arg_adjust = 1;
37926 else
37928 arg_adjust = 0;
37929 if (optimize
37930 || target == 0
37931 || !register_operand (target, tmode)
37932 || GET_MODE (target) != tmode)
37933 target = gen_reg_rtx (tmode);
37936 for (i = 0; i < nargs; i++)
37938 machine_mode mode = insn_p->operand[i + 1].mode;
37939 bool match;
37941 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
37942 op = expand_normal (arg);
37943 match = insn_p->operand[i + 1].predicate (op, mode);
37945 if (last_arg_constant && (i + 1) == nargs)
37947 if (!match)
37949 if (icode == CODE_FOR_lwp_lwpvalsi3
37950 || icode == CODE_FOR_lwp_lwpinssi3
37951 || icode == CODE_FOR_lwp_lwpvaldi3
37952 || icode == CODE_FOR_lwp_lwpinsdi3)
37953 error ("the last argument must be a 32-bit immediate");
37954 else
37955 error ("the last argument must be an 8-bit immediate");
37956 return const0_rtx;
37959 else
37961 if (i == memory)
37963 /* This must be the memory operand. */
37964 op = ix86_zero_extend_to_Pmode (op);
37965 op = gen_rtx_MEM (mode, op);
37966 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
37967 on it. Try to improve it using get_pointer_alignment,
37968 and if the special builtin is one that requires strict
37969 mode alignment, also from it's GET_MODE_ALIGNMENT.
37970 Failure to do so could lead to ix86_legitimate_combined_insn
37971 rejecting all changes to such insns. */
37972 unsigned int align = get_pointer_alignment (arg);
37973 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
37974 align = GET_MODE_ALIGNMENT (mode);
37975 if (MEM_ALIGN (op) < align)
37976 set_mem_align (op, align);
37978 else
37980 /* This must be register. */
37981 if (VECTOR_MODE_P (mode))
37982 op = safe_vector_operand (op, mode);
37984 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37985 op = copy_to_mode_reg (mode, op);
37986 else
37988 op = copy_to_reg (op);
37989 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37994 args[i].op = op;
37995 args[i].mode = mode;
37998 switch (nargs)
38000 case 0:
38001 pat = GEN_FCN (icode) (target);
38002 break;
38003 case 1:
38004 pat = GEN_FCN (icode) (target, args[0].op);
38005 break;
38006 case 2:
38007 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38008 break;
38009 case 3:
38010 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
38011 break;
38012 default:
38013 gcc_unreachable ();
38016 if (! pat)
38017 return 0;
38018 emit_insn (pat);
38019 return klass == store ? 0 : target;
38022 /* Return the integer constant in ARG. Constrain it to be in the range
38023 of the subparts of VEC_TYPE; issue an error if not. */
38025 static int
38026 get_element_number (tree vec_type, tree arg)
38028 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
38030 if (!tree_fits_uhwi_p (arg)
38031 || (elt = tree_to_uhwi (arg), elt > max))
38033 error ("selector must be an integer constant in the range 0..%wi", max);
38034 return 0;
38037 return elt;
38040 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38041 ix86_expand_vector_init. We DO have language-level syntax for this, in
38042 the form of (type){ init-list }. Except that since we can't place emms
38043 instructions from inside the compiler, we can't allow the use of MMX
38044 registers unless the user explicitly asks for it. So we do *not* define
38045 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
38046 we have builtins invoked by mmintrin.h that gives us license to emit
38047 these sorts of instructions. */
38049 static rtx
38050 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
38052 machine_mode tmode = TYPE_MODE (type);
38053 machine_mode inner_mode = GET_MODE_INNER (tmode);
38054 int i, n_elt = GET_MODE_NUNITS (tmode);
38055 rtvec v = rtvec_alloc (n_elt);
38057 gcc_assert (VECTOR_MODE_P (tmode));
38058 gcc_assert (call_expr_nargs (exp) == n_elt);
38060 for (i = 0; i < n_elt; ++i)
38062 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
38063 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
38066 if (!target || !register_operand (target, tmode))
38067 target = gen_reg_rtx (tmode);
38069 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
38070 return target;
38073 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38074 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
38075 had a language-level syntax for referencing vector elements. */
38077 static rtx
38078 ix86_expand_vec_ext_builtin (tree exp, rtx target)
38080 machine_mode tmode, mode0;
38081 tree arg0, arg1;
38082 int elt;
38083 rtx op0;
38085 arg0 = CALL_EXPR_ARG (exp, 0);
38086 arg1 = CALL_EXPR_ARG (exp, 1);
38088 op0 = expand_normal (arg0);
38089 elt = get_element_number (TREE_TYPE (arg0), arg1);
38091 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38092 mode0 = TYPE_MODE (TREE_TYPE (arg0));
38093 gcc_assert (VECTOR_MODE_P (mode0));
38095 op0 = force_reg (mode0, op0);
38097 if (optimize || !target || !register_operand (target, tmode))
38098 target = gen_reg_rtx (tmode);
38100 ix86_expand_vector_extract (true, target, op0, elt);
38102 return target;
38105 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38106 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
38107 a language-level syntax for referencing vector elements. */
38109 static rtx
38110 ix86_expand_vec_set_builtin (tree exp)
38112 machine_mode tmode, mode1;
38113 tree arg0, arg1, arg2;
38114 int elt;
38115 rtx op0, op1, target;
38117 arg0 = CALL_EXPR_ARG (exp, 0);
38118 arg1 = CALL_EXPR_ARG (exp, 1);
38119 arg2 = CALL_EXPR_ARG (exp, 2);
38121 tmode = TYPE_MODE (TREE_TYPE (arg0));
38122 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38123 gcc_assert (VECTOR_MODE_P (tmode));
38125 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
38126 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
38127 elt = get_element_number (TREE_TYPE (arg0), arg2);
38129 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
38130 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
38132 op0 = force_reg (tmode, op0);
38133 op1 = force_reg (mode1, op1);
38135 /* OP0 is the source of these builtin functions and shouldn't be
38136 modified. Create a copy, use it and return it as target. */
38137 target = gen_reg_rtx (tmode);
38138 emit_move_insn (target, op0);
38139 ix86_expand_vector_set (true, target, op1, elt);
38141 return target;
38144 /* Emit conditional move of SRC to DST with condition
38145 OP1 CODE OP2. */
38146 static void
38147 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
38149 rtx t;
38151 if (TARGET_CMOVE)
38153 t = ix86_expand_compare (code, op1, op2);
38154 emit_insn (gen_rtx_SET (VOIDmode, dst,
38155 gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
38156 src, dst)));
38158 else
38160 rtx nomove = gen_label_rtx ();
38161 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
38162 const0_rtx, GET_MODE (op1), 1, nomove);
38163 emit_move_insn (dst, src);
38164 emit_label (nomove);
38168 /* Choose max of DST and SRC and put it to DST. */
38169 static void
38170 ix86_emit_move_max (rtx dst, rtx src)
38172 ix86_emit_cmove (dst, src, LTU, dst, src);
38175 /* Expand an expression EXP that calls a built-in function,
38176 with result going to TARGET if that's convenient
38177 (and in mode MODE if that's convenient).
38178 SUBTARGET may be used as the target for computing one of EXP's operands.
38179 IGNORE is nonzero if the value is to be ignored. */
38181 static rtx
38182 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
38183 machine_mode mode, int ignore)
38185 const struct builtin_description *d;
38186 size_t i;
38187 enum insn_code icode;
38188 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
38189 tree arg0, arg1, arg2, arg3, arg4;
38190 rtx op0, op1, op2, op3, op4, pat, insn;
38191 machine_mode mode0, mode1, mode2, mode3, mode4;
38192 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
38194 /* For CPU builtins that can be folded, fold first and expand the fold. */
38195 switch (fcode)
38197 case IX86_BUILTIN_CPU_INIT:
38199 /* Make it call __cpu_indicator_init in libgcc. */
38200 tree call_expr, fndecl, type;
38201 type = build_function_type_list (integer_type_node, NULL_TREE);
38202 fndecl = build_fn_decl ("__cpu_indicator_init", type);
38203 call_expr = build_call_expr (fndecl, 0);
38204 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
38206 case IX86_BUILTIN_CPU_IS:
38207 case IX86_BUILTIN_CPU_SUPPORTS:
38209 tree arg0 = CALL_EXPR_ARG (exp, 0);
38210 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
38211 gcc_assert (fold_expr != NULL_TREE);
38212 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
38216 /* Determine whether the builtin function is available under the current ISA.
38217 Originally the builtin was not created if it wasn't applicable to the
38218 current ISA based on the command line switches. With function specific
38219 options, we need to check in the context of the function making the call
38220 whether it is supported. */
38221 if (ix86_builtins_isa[fcode].isa
38222 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
38224 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
38225 NULL, (enum fpmath_unit) 0, false);
38227 if (!opts)
38228 error ("%qE needs unknown isa option", fndecl);
38229 else
38231 gcc_assert (opts != NULL);
38232 error ("%qE needs isa option %s", fndecl, opts);
38233 free (opts);
38235 return const0_rtx;
38238 switch (fcode)
38240 case IX86_BUILTIN_BNDMK:
38241 if (!target
38242 || GET_MODE (target) != BNDmode
38243 || !register_operand (target, BNDmode))
38244 target = gen_reg_rtx (BNDmode);
38246 arg0 = CALL_EXPR_ARG (exp, 0);
38247 arg1 = CALL_EXPR_ARG (exp, 1);
38249 op0 = expand_normal (arg0);
38250 op1 = expand_normal (arg1);
38252 if (!register_operand (op0, Pmode))
38253 op0 = ix86_zero_extend_to_Pmode (op0);
38254 if (!register_operand (op1, Pmode))
38255 op1 = ix86_zero_extend_to_Pmode (op1);
38257 /* Builtin arg1 is size of block but instruction op1 should
38258 be (size - 1). */
38259 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
38260 NULL_RTX, 1, OPTAB_DIRECT);
38262 emit_insn (BNDmode == BND64mode
38263 ? gen_bnd64_mk (target, op0, op1)
38264 : gen_bnd32_mk (target, op0, op1));
38265 return target;
38267 case IX86_BUILTIN_BNDSTX:
38268 arg0 = CALL_EXPR_ARG (exp, 0);
38269 arg1 = CALL_EXPR_ARG (exp, 1);
38270 arg2 = CALL_EXPR_ARG (exp, 2);
38272 op0 = expand_normal (arg0);
38273 op1 = expand_normal (arg1);
38274 op2 = expand_normal (arg2);
38276 if (!register_operand (op0, Pmode))
38277 op0 = ix86_zero_extend_to_Pmode (op0);
38278 if (!register_operand (op1, BNDmode))
38279 op1 = copy_to_mode_reg (BNDmode, op1);
38280 if (!register_operand (op2, Pmode))
38281 op2 = ix86_zero_extend_to_Pmode (op2);
38283 emit_insn (BNDmode == BND64mode
38284 ? gen_bnd64_stx (op2, op0, op1)
38285 : gen_bnd32_stx (op2, op0, op1));
38286 return 0;
38288 case IX86_BUILTIN_BNDLDX:
38289 if (!target
38290 || GET_MODE (target) != BNDmode
38291 || !register_operand (target, BNDmode))
38292 target = gen_reg_rtx (BNDmode);
38294 arg0 = CALL_EXPR_ARG (exp, 0);
38295 arg1 = CALL_EXPR_ARG (exp, 1);
38297 op0 = expand_normal (arg0);
38298 op1 = expand_normal (arg1);
38300 if (!register_operand (op0, Pmode))
38301 op0 = ix86_zero_extend_to_Pmode (op0);
38302 if (!register_operand (op1, Pmode))
38303 op1 = ix86_zero_extend_to_Pmode (op1);
38305 emit_insn (BNDmode == BND64mode
38306 ? gen_bnd64_ldx (target, op0, op1)
38307 : gen_bnd32_ldx (target, op0, op1));
38308 return target;
38310 case IX86_BUILTIN_BNDCL:
38311 arg0 = CALL_EXPR_ARG (exp, 0);
38312 arg1 = CALL_EXPR_ARG (exp, 1);
38314 op0 = expand_normal (arg0);
38315 op1 = expand_normal (arg1);
38317 if (!register_operand (op0, Pmode))
38318 op0 = ix86_zero_extend_to_Pmode (op0);
38319 if (!register_operand (op1, BNDmode))
38320 op1 = copy_to_mode_reg (BNDmode, op1);
38322 emit_insn (BNDmode == BND64mode
38323 ? gen_bnd64_cl (op1, op0)
38324 : gen_bnd32_cl (op1, op0));
38325 return 0;
38327 case IX86_BUILTIN_BNDCU:
38328 arg0 = CALL_EXPR_ARG (exp, 0);
38329 arg1 = CALL_EXPR_ARG (exp, 1);
38331 op0 = expand_normal (arg0);
38332 op1 = expand_normal (arg1);
38334 if (!register_operand (op0, Pmode))
38335 op0 = ix86_zero_extend_to_Pmode (op0);
38336 if (!register_operand (op1, BNDmode))
38337 op1 = copy_to_mode_reg (BNDmode, op1);
38339 emit_insn (BNDmode == BND64mode
38340 ? gen_bnd64_cu (op1, op0)
38341 : gen_bnd32_cu (op1, op0));
38342 return 0;
38344 case IX86_BUILTIN_BNDRET:
38345 arg0 = CALL_EXPR_ARG (exp, 0);
38346 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
38347 target = chkp_get_rtl_bounds (arg0);
38349 /* If no bounds were specified for returned value,
38350 then use INIT bounds. It usually happens when
38351 some built-in function is expanded. */
38352 if (!target)
38354 rtx t1 = gen_reg_rtx (Pmode);
38355 rtx t2 = gen_reg_rtx (Pmode);
38356 target = gen_reg_rtx (BNDmode);
38357 emit_move_insn (t1, const0_rtx);
38358 emit_move_insn (t2, constm1_rtx);
38359 emit_insn (BNDmode == BND64mode
38360 ? gen_bnd64_mk (target, t1, t2)
38361 : gen_bnd32_mk (target, t1, t2));
38364 gcc_assert (target && REG_P (target));
38365 return target;
38367 case IX86_BUILTIN_BNDNARROW:
38369 rtx m1, m1h1, m1h2, lb, ub, t1;
38371 /* Return value and lb. */
38372 arg0 = CALL_EXPR_ARG (exp, 0);
38373 /* Bounds. */
38374 arg1 = CALL_EXPR_ARG (exp, 1);
38375 /* Size. */
38376 arg2 = CALL_EXPR_ARG (exp, 2);
38378 lb = expand_normal (arg0);
38379 op1 = expand_normal (arg1);
38380 op2 = expand_normal (arg2);
38382 /* Size was passed but we need to use (size - 1) as for bndmk. */
38383 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
38384 NULL_RTX, 1, OPTAB_DIRECT);
38386 /* Add LB to size and inverse to get UB. */
38387 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
38388 op2, 1, OPTAB_DIRECT);
38389 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
38391 if (!register_operand (lb, Pmode))
38392 lb = ix86_zero_extend_to_Pmode (lb);
38393 if (!register_operand (ub, Pmode))
38394 ub = ix86_zero_extend_to_Pmode (ub);
38396 /* We need to move bounds to memory before any computations. */
38397 if (MEM_P (op1))
38398 m1 = op1;
38399 else
38401 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
38402 emit_move_insn (m1, op1);
38405 /* Generate mem expression to be used for access to LB and UB. */
38406 m1h1 = adjust_address (m1, Pmode, 0);
38407 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
38409 t1 = gen_reg_rtx (Pmode);
38411 /* Compute LB. */
38412 emit_move_insn (t1, m1h1);
38413 ix86_emit_move_max (t1, lb);
38414 emit_move_insn (m1h1, t1);
38416 /* Compute UB. UB is stored in 1's complement form. Therefore
38417 we also use max here. */
38418 emit_move_insn (t1, m1h2);
38419 ix86_emit_move_max (t1, ub);
38420 emit_move_insn (m1h2, t1);
38422 op2 = gen_reg_rtx (BNDmode);
38423 emit_move_insn (op2, m1);
38425 return chkp_join_splitted_slot (lb, op2);
38428 case IX86_BUILTIN_BNDINT:
38430 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
38432 if (!target
38433 || GET_MODE (target) != BNDmode
38434 || !register_operand (target, BNDmode))
38435 target = gen_reg_rtx (BNDmode);
38437 arg0 = CALL_EXPR_ARG (exp, 0);
38438 arg1 = CALL_EXPR_ARG (exp, 1);
38440 op0 = expand_normal (arg0);
38441 op1 = expand_normal (arg1);
38443 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
38444 rh1 = adjust_address (res, Pmode, 0);
38445 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
38447 /* Put first bounds to temporaries. */
38448 lb1 = gen_reg_rtx (Pmode);
38449 ub1 = gen_reg_rtx (Pmode);
38450 if (MEM_P (op0))
38452 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
38453 emit_move_insn (ub1, adjust_address (op0, Pmode,
38454 GET_MODE_SIZE (Pmode)));
38456 else
38458 emit_move_insn (res, op0);
38459 emit_move_insn (lb1, rh1);
38460 emit_move_insn (ub1, rh2);
38463 /* Put second bounds to temporaries. */
38464 lb2 = gen_reg_rtx (Pmode);
38465 ub2 = gen_reg_rtx (Pmode);
38466 if (MEM_P (op1))
38468 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
38469 emit_move_insn (ub2, adjust_address (op1, Pmode,
38470 GET_MODE_SIZE (Pmode)));
38472 else
38474 emit_move_insn (res, op1);
38475 emit_move_insn (lb2, rh1);
38476 emit_move_insn (ub2, rh2);
38479 /* Compute LB. */
38480 ix86_emit_move_max (lb1, lb2);
38481 emit_move_insn (rh1, lb1);
38483 /* Compute UB. UB is stored in 1's complement form. Therefore
38484 we also use max here. */
38485 ix86_emit_move_max (ub1, ub2);
38486 emit_move_insn (rh2, ub1);
38488 emit_move_insn (target, res);
38490 return target;
38493 case IX86_BUILTIN_SIZEOF:
38495 tree name;
38496 rtx symbol;
38498 if (!target
38499 || GET_MODE (target) != Pmode
38500 || !register_operand (target, Pmode))
38501 target = gen_reg_rtx (Pmode);
38503 arg0 = CALL_EXPR_ARG (exp, 0);
38504 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
38506 name = DECL_ASSEMBLER_NAME (arg0);
38507 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
38509 emit_insn (Pmode == SImode
38510 ? gen_move_size_reloc_si (target, symbol)
38511 : gen_move_size_reloc_di (target, symbol));
38513 return target;
38516 case IX86_BUILTIN_BNDLOWER:
38518 rtx mem, hmem;
38520 if (!target
38521 || GET_MODE (target) != Pmode
38522 || !register_operand (target, Pmode))
38523 target = gen_reg_rtx (Pmode);
38525 arg0 = CALL_EXPR_ARG (exp, 0);
38526 op0 = expand_normal (arg0);
38528 /* We need to move bounds to memory first. */
38529 if (MEM_P (op0))
38530 mem = op0;
38531 else
38533 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38534 emit_move_insn (mem, op0);
38537 /* Generate mem expression to access LB and load it. */
38538 hmem = adjust_address (mem, Pmode, 0);
38539 emit_move_insn (target, hmem);
38541 return target;
38544 case IX86_BUILTIN_BNDUPPER:
38546 rtx mem, hmem, res;
38548 if (!target
38549 || GET_MODE (target) != Pmode
38550 || !register_operand (target, Pmode))
38551 target = gen_reg_rtx (Pmode);
38553 arg0 = CALL_EXPR_ARG (exp, 0);
38554 op0 = expand_normal (arg0);
38556 /* We need to move bounds to memory first. */
38557 if (MEM_P (op0))
38558 mem = op0;
38559 else
38561 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38562 emit_move_insn (mem, op0);
38565 /* Generate mem expression to access UB. */
38566 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
38568 /* We need to inverse all bits of UB. */
38569 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
38571 if (res != target)
38572 emit_move_insn (target, res);
38574 return target;
38577 case IX86_BUILTIN_MASKMOVQ:
38578 case IX86_BUILTIN_MASKMOVDQU:
38579 icode = (fcode == IX86_BUILTIN_MASKMOVQ
38580 ? CODE_FOR_mmx_maskmovq
38581 : CODE_FOR_sse2_maskmovdqu);
38582 /* Note the arg order is different from the operand order. */
38583 arg1 = CALL_EXPR_ARG (exp, 0);
38584 arg2 = CALL_EXPR_ARG (exp, 1);
38585 arg0 = CALL_EXPR_ARG (exp, 2);
38586 op0 = expand_normal (arg0);
38587 op1 = expand_normal (arg1);
38588 op2 = expand_normal (arg2);
38589 mode0 = insn_data[icode].operand[0].mode;
38590 mode1 = insn_data[icode].operand[1].mode;
38591 mode2 = insn_data[icode].operand[2].mode;
38593 op0 = ix86_zero_extend_to_Pmode (op0);
38594 op0 = gen_rtx_MEM (mode1, op0);
38596 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38597 op0 = copy_to_mode_reg (mode0, op0);
38598 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38599 op1 = copy_to_mode_reg (mode1, op1);
38600 if (!insn_data[icode].operand[2].predicate (op2, mode2))
38601 op2 = copy_to_mode_reg (mode2, op2);
38602 pat = GEN_FCN (icode) (op0, op1, op2);
38603 if (! pat)
38604 return 0;
38605 emit_insn (pat);
38606 return 0;
38608 case IX86_BUILTIN_LDMXCSR:
38609 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
38610 target = assign_386_stack_local (SImode, SLOT_TEMP);
38611 emit_move_insn (target, op0);
38612 emit_insn (gen_sse_ldmxcsr (target));
38613 return 0;
38615 case IX86_BUILTIN_STMXCSR:
38616 target = assign_386_stack_local (SImode, SLOT_TEMP);
38617 emit_insn (gen_sse_stmxcsr (target));
38618 return copy_to_mode_reg (SImode, target);
38620 case IX86_BUILTIN_CLFLUSH:
38621 arg0 = CALL_EXPR_ARG (exp, 0);
38622 op0 = expand_normal (arg0);
38623 icode = CODE_FOR_sse2_clflush;
38624 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38625 op0 = ix86_zero_extend_to_Pmode (op0);
38627 emit_insn (gen_sse2_clflush (op0));
38628 return 0;
38630 case IX86_BUILTIN_CLFLUSHOPT:
38631 arg0 = CALL_EXPR_ARG (exp, 0);
38632 op0 = expand_normal (arg0);
38633 icode = CODE_FOR_clflushopt;
38634 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38635 op0 = ix86_zero_extend_to_Pmode (op0);
38637 emit_insn (gen_clflushopt (op0));
38638 return 0;
38640 case IX86_BUILTIN_MONITOR:
38641 arg0 = CALL_EXPR_ARG (exp, 0);
38642 arg1 = CALL_EXPR_ARG (exp, 1);
38643 arg2 = CALL_EXPR_ARG (exp, 2);
38644 op0 = expand_normal (arg0);
38645 op1 = expand_normal (arg1);
38646 op2 = expand_normal (arg2);
38647 if (!REG_P (op0))
38648 op0 = ix86_zero_extend_to_Pmode (op0);
38649 if (!REG_P (op1))
38650 op1 = copy_to_mode_reg (SImode, op1);
38651 if (!REG_P (op2))
38652 op2 = copy_to_mode_reg (SImode, op2);
38653 emit_insn (ix86_gen_monitor (op0, op1, op2));
38654 return 0;
38656 case IX86_BUILTIN_MWAIT:
38657 arg0 = CALL_EXPR_ARG (exp, 0);
38658 arg1 = CALL_EXPR_ARG (exp, 1);
38659 op0 = expand_normal (arg0);
38660 op1 = expand_normal (arg1);
38661 if (!REG_P (op0))
38662 op0 = copy_to_mode_reg (SImode, op0);
38663 if (!REG_P (op1))
38664 op1 = copy_to_mode_reg (SImode, op1);
38665 emit_insn (gen_sse3_mwait (op0, op1));
38666 return 0;
38668 case IX86_BUILTIN_VEC_INIT_V2SI:
38669 case IX86_BUILTIN_VEC_INIT_V4HI:
38670 case IX86_BUILTIN_VEC_INIT_V8QI:
38671 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
38673 case IX86_BUILTIN_VEC_EXT_V2DF:
38674 case IX86_BUILTIN_VEC_EXT_V2DI:
38675 case IX86_BUILTIN_VEC_EXT_V4SF:
38676 case IX86_BUILTIN_VEC_EXT_V4SI:
38677 case IX86_BUILTIN_VEC_EXT_V8HI:
38678 case IX86_BUILTIN_VEC_EXT_V2SI:
38679 case IX86_BUILTIN_VEC_EXT_V4HI:
38680 case IX86_BUILTIN_VEC_EXT_V16QI:
38681 return ix86_expand_vec_ext_builtin (exp, target);
38683 case IX86_BUILTIN_VEC_SET_V2DI:
38684 case IX86_BUILTIN_VEC_SET_V4SF:
38685 case IX86_BUILTIN_VEC_SET_V4SI:
38686 case IX86_BUILTIN_VEC_SET_V8HI:
38687 case IX86_BUILTIN_VEC_SET_V4HI:
38688 case IX86_BUILTIN_VEC_SET_V16QI:
38689 return ix86_expand_vec_set_builtin (exp);
38691 case IX86_BUILTIN_INFQ:
38692 case IX86_BUILTIN_HUGE_VALQ:
38694 REAL_VALUE_TYPE inf;
38695 rtx tmp;
38697 real_inf (&inf);
38698 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
38700 tmp = validize_mem (force_const_mem (mode, tmp));
38702 if (target == 0)
38703 target = gen_reg_rtx (mode);
38705 emit_move_insn (target, tmp);
38706 return target;
38709 case IX86_BUILTIN_RDPMC:
38710 case IX86_BUILTIN_RDTSC:
38711 case IX86_BUILTIN_RDTSCP:
38713 op0 = gen_reg_rtx (DImode);
38714 op1 = gen_reg_rtx (DImode);
38716 if (fcode == IX86_BUILTIN_RDPMC)
38718 arg0 = CALL_EXPR_ARG (exp, 0);
38719 op2 = expand_normal (arg0);
38720 if (!register_operand (op2, SImode))
38721 op2 = copy_to_mode_reg (SImode, op2);
38723 insn = (TARGET_64BIT
38724 ? gen_rdpmc_rex64 (op0, op1, op2)
38725 : gen_rdpmc (op0, op2));
38726 emit_insn (insn);
38728 else if (fcode == IX86_BUILTIN_RDTSC)
38730 insn = (TARGET_64BIT
38731 ? gen_rdtsc_rex64 (op0, op1)
38732 : gen_rdtsc (op0));
38733 emit_insn (insn);
38735 else
38737 op2 = gen_reg_rtx (SImode);
38739 insn = (TARGET_64BIT
38740 ? gen_rdtscp_rex64 (op0, op1, op2)
38741 : gen_rdtscp (op0, op2));
38742 emit_insn (insn);
38744 arg0 = CALL_EXPR_ARG (exp, 0);
38745 op4 = expand_normal (arg0);
38746 if (!address_operand (op4, VOIDmode))
38748 op4 = convert_memory_address (Pmode, op4);
38749 op4 = copy_addr_to_reg (op4);
38751 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
38754 if (target == 0)
38756 /* mode is VOIDmode if __builtin_rd* has been called
38757 without lhs. */
38758 if (mode == VOIDmode)
38759 return target;
38760 target = gen_reg_rtx (mode);
38763 if (TARGET_64BIT)
38765 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
38766 op1, 1, OPTAB_DIRECT);
38767 op0 = expand_simple_binop (DImode, IOR, op0, op1,
38768 op0, 1, OPTAB_DIRECT);
38771 emit_move_insn (target, op0);
38772 return target;
38774 case IX86_BUILTIN_FXSAVE:
38775 case IX86_BUILTIN_FXRSTOR:
38776 case IX86_BUILTIN_FXSAVE64:
38777 case IX86_BUILTIN_FXRSTOR64:
38778 case IX86_BUILTIN_FNSTENV:
38779 case IX86_BUILTIN_FLDENV:
38780 mode0 = BLKmode;
38781 switch (fcode)
38783 case IX86_BUILTIN_FXSAVE:
38784 icode = CODE_FOR_fxsave;
38785 break;
38786 case IX86_BUILTIN_FXRSTOR:
38787 icode = CODE_FOR_fxrstor;
38788 break;
38789 case IX86_BUILTIN_FXSAVE64:
38790 icode = CODE_FOR_fxsave64;
38791 break;
38792 case IX86_BUILTIN_FXRSTOR64:
38793 icode = CODE_FOR_fxrstor64;
38794 break;
38795 case IX86_BUILTIN_FNSTENV:
38796 icode = CODE_FOR_fnstenv;
38797 break;
38798 case IX86_BUILTIN_FLDENV:
38799 icode = CODE_FOR_fldenv;
38800 break;
38801 default:
38802 gcc_unreachable ();
38805 arg0 = CALL_EXPR_ARG (exp, 0);
38806 op0 = expand_normal (arg0);
38808 if (!address_operand (op0, VOIDmode))
38810 op0 = convert_memory_address (Pmode, op0);
38811 op0 = copy_addr_to_reg (op0);
38813 op0 = gen_rtx_MEM (mode0, op0);
38815 pat = GEN_FCN (icode) (op0);
38816 if (pat)
38817 emit_insn (pat);
38818 return 0;
38820 case IX86_BUILTIN_XSAVE:
38821 case IX86_BUILTIN_XRSTOR:
38822 case IX86_BUILTIN_XSAVE64:
38823 case IX86_BUILTIN_XRSTOR64:
38824 case IX86_BUILTIN_XSAVEOPT:
38825 case IX86_BUILTIN_XSAVEOPT64:
38826 case IX86_BUILTIN_XSAVES:
38827 case IX86_BUILTIN_XRSTORS:
38828 case IX86_BUILTIN_XSAVES64:
38829 case IX86_BUILTIN_XRSTORS64:
38830 case IX86_BUILTIN_XSAVEC:
38831 case IX86_BUILTIN_XSAVEC64:
38832 arg0 = CALL_EXPR_ARG (exp, 0);
38833 arg1 = CALL_EXPR_ARG (exp, 1);
38834 op0 = expand_normal (arg0);
38835 op1 = expand_normal (arg1);
38837 if (!address_operand (op0, VOIDmode))
38839 op0 = convert_memory_address (Pmode, op0);
38840 op0 = copy_addr_to_reg (op0);
38842 op0 = gen_rtx_MEM (BLKmode, op0);
38844 op1 = force_reg (DImode, op1);
38846 if (TARGET_64BIT)
38848 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
38849 NULL, 1, OPTAB_DIRECT);
38850 switch (fcode)
38852 case IX86_BUILTIN_XSAVE:
38853 icode = CODE_FOR_xsave_rex64;
38854 break;
38855 case IX86_BUILTIN_XRSTOR:
38856 icode = CODE_FOR_xrstor_rex64;
38857 break;
38858 case IX86_BUILTIN_XSAVE64:
38859 icode = CODE_FOR_xsave64;
38860 break;
38861 case IX86_BUILTIN_XRSTOR64:
38862 icode = CODE_FOR_xrstor64;
38863 break;
38864 case IX86_BUILTIN_XSAVEOPT:
38865 icode = CODE_FOR_xsaveopt_rex64;
38866 break;
38867 case IX86_BUILTIN_XSAVEOPT64:
38868 icode = CODE_FOR_xsaveopt64;
38869 break;
38870 case IX86_BUILTIN_XSAVES:
38871 icode = CODE_FOR_xsaves_rex64;
38872 break;
38873 case IX86_BUILTIN_XRSTORS:
38874 icode = CODE_FOR_xrstors_rex64;
38875 break;
38876 case IX86_BUILTIN_XSAVES64:
38877 icode = CODE_FOR_xsaves64;
38878 break;
38879 case IX86_BUILTIN_XRSTORS64:
38880 icode = CODE_FOR_xrstors64;
38881 break;
38882 case IX86_BUILTIN_XSAVEC:
38883 icode = CODE_FOR_xsavec_rex64;
38884 break;
38885 case IX86_BUILTIN_XSAVEC64:
38886 icode = CODE_FOR_xsavec64;
38887 break;
38888 default:
38889 gcc_unreachable ();
38892 op2 = gen_lowpart (SImode, op2);
38893 op1 = gen_lowpart (SImode, op1);
38894 pat = GEN_FCN (icode) (op0, op1, op2);
38896 else
38898 switch (fcode)
38900 case IX86_BUILTIN_XSAVE:
38901 icode = CODE_FOR_xsave;
38902 break;
38903 case IX86_BUILTIN_XRSTOR:
38904 icode = CODE_FOR_xrstor;
38905 break;
38906 case IX86_BUILTIN_XSAVEOPT:
38907 icode = CODE_FOR_xsaveopt;
38908 break;
38909 case IX86_BUILTIN_XSAVES:
38910 icode = CODE_FOR_xsaves;
38911 break;
38912 case IX86_BUILTIN_XRSTORS:
38913 icode = CODE_FOR_xrstors;
38914 break;
38915 case IX86_BUILTIN_XSAVEC:
38916 icode = CODE_FOR_xsavec;
38917 break;
38918 default:
38919 gcc_unreachable ();
38921 pat = GEN_FCN (icode) (op0, op1);
38924 if (pat)
38925 emit_insn (pat);
38926 return 0;
38928 case IX86_BUILTIN_LLWPCB:
38929 arg0 = CALL_EXPR_ARG (exp, 0);
38930 op0 = expand_normal (arg0);
38931 icode = CODE_FOR_lwp_llwpcb;
38932 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38933 op0 = ix86_zero_extend_to_Pmode (op0);
38934 emit_insn (gen_lwp_llwpcb (op0));
38935 return 0;
38937 case IX86_BUILTIN_SLWPCB:
38938 icode = CODE_FOR_lwp_slwpcb;
38939 if (!target
38940 || !insn_data[icode].operand[0].predicate (target, Pmode))
38941 target = gen_reg_rtx (Pmode);
38942 emit_insn (gen_lwp_slwpcb (target));
38943 return target;
38945 case IX86_BUILTIN_BEXTRI32:
38946 case IX86_BUILTIN_BEXTRI64:
38947 arg0 = CALL_EXPR_ARG (exp, 0);
38948 arg1 = CALL_EXPR_ARG (exp, 1);
38949 op0 = expand_normal (arg0);
38950 op1 = expand_normal (arg1);
38951 icode = (fcode == IX86_BUILTIN_BEXTRI32
38952 ? CODE_FOR_tbm_bextri_si
38953 : CODE_FOR_tbm_bextri_di);
38954 if (!CONST_INT_P (op1))
38956 error ("last argument must be an immediate");
38957 return const0_rtx;
38959 else
38961 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
38962 unsigned char lsb_index = INTVAL (op1) & 0xFF;
38963 op1 = GEN_INT (length);
38964 op2 = GEN_INT (lsb_index);
38965 pat = GEN_FCN (icode) (target, op0, op1, op2);
38966 if (pat)
38967 emit_insn (pat);
38968 return target;
38971 case IX86_BUILTIN_RDRAND16_STEP:
38972 icode = CODE_FOR_rdrandhi_1;
38973 mode0 = HImode;
38974 goto rdrand_step;
38976 case IX86_BUILTIN_RDRAND32_STEP:
38977 icode = CODE_FOR_rdrandsi_1;
38978 mode0 = SImode;
38979 goto rdrand_step;
38981 case IX86_BUILTIN_RDRAND64_STEP:
38982 icode = CODE_FOR_rdranddi_1;
38983 mode0 = DImode;
38985 rdrand_step:
38986 op0 = gen_reg_rtx (mode0);
38987 emit_insn (GEN_FCN (icode) (op0));
38989 arg0 = CALL_EXPR_ARG (exp, 0);
38990 op1 = expand_normal (arg0);
38991 if (!address_operand (op1, VOIDmode))
38993 op1 = convert_memory_address (Pmode, op1);
38994 op1 = copy_addr_to_reg (op1);
38996 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
38998 op1 = gen_reg_rtx (SImode);
38999 emit_move_insn (op1, CONST1_RTX (SImode));
39001 /* Emit SImode conditional move. */
39002 if (mode0 == HImode)
39004 op2 = gen_reg_rtx (SImode);
39005 emit_insn (gen_zero_extendhisi2 (op2, op0));
39007 else if (mode0 == SImode)
39008 op2 = op0;
39009 else
39010 op2 = gen_rtx_SUBREG (SImode, op0, 0);
39012 if (target == 0
39013 || !register_operand (target, SImode))
39014 target = gen_reg_rtx (SImode);
39016 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
39017 const0_rtx);
39018 emit_insn (gen_rtx_SET (VOIDmode, target,
39019 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
39020 return target;
39022 case IX86_BUILTIN_RDSEED16_STEP:
39023 icode = CODE_FOR_rdseedhi_1;
39024 mode0 = HImode;
39025 goto rdseed_step;
39027 case IX86_BUILTIN_RDSEED32_STEP:
39028 icode = CODE_FOR_rdseedsi_1;
39029 mode0 = SImode;
39030 goto rdseed_step;
39032 case IX86_BUILTIN_RDSEED64_STEP:
39033 icode = CODE_FOR_rdseeddi_1;
39034 mode0 = DImode;
39036 rdseed_step:
39037 op0 = gen_reg_rtx (mode0);
39038 emit_insn (GEN_FCN (icode) (op0));
39040 arg0 = CALL_EXPR_ARG (exp, 0);
39041 op1 = expand_normal (arg0);
39042 if (!address_operand (op1, VOIDmode))
39044 op1 = convert_memory_address (Pmode, op1);
39045 op1 = copy_addr_to_reg (op1);
39047 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39049 op2 = gen_reg_rtx (QImode);
39051 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
39052 const0_rtx);
39053 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
39055 if (target == 0
39056 || !register_operand (target, SImode))
39057 target = gen_reg_rtx (SImode);
39059 emit_insn (gen_zero_extendqisi2 (target, op2));
39060 return target;
39062 case IX86_BUILTIN_SBB32:
39063 icode = CODE_FOR_subsi3_carry;
39064 mode0 = SImode;
39065 goto addcarryx;
39067 case IX86_BUILTIN_SBB64:
39068 icode = CODE_FOR_subdi3_carry;
39069 mode0 = DImode;
39070 goto addcarryx;
39072 case IX86_BUILTIN_ADDCARRYX32:
39073 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
39074 mode0 = SImode;
39075 goto addcarryx;
39077 case IX86_BUILTIN_ADDCARRYX64:
39078 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
39079 mode0 = DImode;
39081 addcarryx:
39082 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
39083 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
39084 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
39085 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
39087 op0 = gen_reg_rtx (QImode);
39089 /* Generate CF from input operand. */
39090 op1 = expand_normal (arg0);
39091 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
39092 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
39094 /* Gen ADCX instruction to compute X+Y+CF. */
39095 op2 = expand_normal (arg1);
39096 op3 = expand_normal (arg2);
39098 if (!REG_P (op2))
39099 op2 = copy_to_mode_reg (mode0, op2);
39100 if (!REG_P (op3))
39101 op3 = copy_to_mode_reg (mode0, op3);
39103 op0 = gen_reg_rtx (mode0);
39105 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
39106 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
39107 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
39109 /* Store the result. */
39110 op4 = expand_normal (arg3);
39111 if (!address_operand (op4, VOIDmode))
39113 op4 = convert_memory_address (Pmode, op4);
39114 op4 = copy_addr_to_reg (op4);
39116 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
39118 /* Return current CF value. */
39119 if (target == 0)
39120 target = gen_reg_rtx (QImode);
39122 PUT_MODE (pat, QImode);
39123 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
39124 return target;
39126 case IX86_BUILTIN_READ_FLAGS:
39127 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
39129 if (optimize
39130 || target == NULL_RTX
39131 || !nonimmediate_operand (target, word_mode)
39132 || GET_MODE (target) != word_mode)
39133 target = gen_reg_rtx (word_mode);
39135 emit_insn (gen_pop (target));
39136 return target;
39138 case IX86_BUILTIN_WRITE_FLAGS:
39140 arg0 = CALL_EXPR_ARG (exp, 0);
39141 op0 = expand_normal (arg0);
39142 if (!general_no_elim_operand (op0, word_mode))
39143 op0 = copy_to_mode_reg (word_mode, op0);
39145 emit_insn (gen_push (op0));
39146 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
39147 return 0;
39149 case IX86_BUILTIN_KORTESTC16:
39150 icode = CODE_FOR_kortestchi;
39151 mode0 = HImode;
39152 mode1 = CCCmode;
39153 goto kortest;
39155 case IX86_BUILTIN_KORTESTZ16:
39156 icode = CODE_FOR_kortestzhi;
39157 mode0 = HImode;
39158 mode1 = CCZmode;
39160 kortest:
39161 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
39162 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
39163 op0 = expand_normal (arg0);
39164 op1 = expand_normal (arg1);
39166 op0 = copy_to_reg (op0);
39167 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39168 op1 = copy_to_reg (op1);
39169 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
39171 target = gen_reg_rtx (QImode);
39172 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
39174 /* Emit kortest. */
39175 emit_insn (GEN_FCN (icode) (op0, op1));
39176 /* And use setcc to return result from flags. */
39177 ix86_expand_setcc (target, EQ,
39178 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
39179 return target;
39181 case IX86_BUILTIN_GATHERSIV2DF:
39182 icode = CODE_FOR_avx2_gathersiv2df;
39183 goto gather_gen;
39184 case IX86_BUILTIN_GATHERSIV4DF:
39185 icode = CODE_FOR_avx2_gathersiv4df;
39186 goto gather_gen;
39187 case IX86_BUILTIN_GATHERDIV2DF:
39188 icode = CODE_FOR_avx2_gatherdiv2df;
39189 goto gather_gen;
39190 case IX86_BUILTIN_GATHERDIV4DF:
39191 icode = CODE_FOR_avx2_gatherdiv4df;
39192 goto gather_gen;
39193 case IX86_BUILTIN_GATHERSIV4SF:
39194 icode = CODE_FOR_avx2_gathersiv4sf;
39195 goto gather_gen;
39196 case IX86_BUILTIN_GATHERSIV8SF:
39197 icode = CODE_FOR_avx2_gathersiv8sf;
39198 goto gather_gen;
39199 case IX86_BUILTIN_GATHERDIV4SF:
39200 icode = CODE_FOR_avx2_gatherdiv4sf;
39201 goto gather_gen;
39202 case IX86_BUILTIN_GATHERDIV8SF:
39203 icode = CODE_FOR_avx2_gatherdiv8sf;
39204 goto gather_gen;
39205 case IX86_BUILTIN_GATHERSIV2DI:
39206 icode = CODE_FOR_avx2_gathersiv2di;
39207 goto gather_gen;
39208 case IX86_BUILTIN_GATHERSIV4DI:
39209 icode = CODE_FOR_avx2_gathersiv4di;
39210 goto gather_gen;
39211 case IX86_BUILTIN_GATHERDIV2DI:
39212 icode = CODE_FOR_avx2_gatherdiv2di;
39213 goto gather_gen;
39214 case IX86_BUILTIN_GATHERDIV4DI:
39215 icode = CODE_FOR_avx2_gatherdiv4di;
39216 goto gather_gen;
39217 case IX86_BUILTIN_GATHERSIV4SI:
39218 icode = CODE_FOR_avx2_gathersiv4si;
39219 goto gather_gen;
39220 case IX86_BUILTIN_GATHERSIV8SI:
39221 icode = CODE_FOR_avx2_gathersiv8si;
39222 goto gather_gen;
39223 case IX86_BUILTIN_GATHERDIV4SI:
39224 icode = CODE_FOR_avx2_gatherdiv4si;
39225 goto gather_gen;
39226 case IX86_BUILTIN_GATHERDIV8SI:
39227 icode = CODE_FOR_avx2_gatherdiv8si;
39228 goto gather_gen;
39229 case IX86_BUILTIN_GATHERALTSIV4DF:
39230 icode = CODE_FOR_avx2_gathersiv4df;
39231 goto gather_gen;
39232 case IX86_BUILTIN_GATHERALTDIV8SF:
39233 icode = CODE_FOR_avx2_gatherdiv8sf;
39234 goto gather_gen;
39235 case IX86_BUILTIN_GATHERALTSIV4DI:
39236 icode = CODE_FOR_avx2_gathersiv4di;
39237 goto gather_gen;
39238 case IX86_BUILTIN_GATHERALTDIV8SI:
39239 icode = CODE_FOR_avx2_gatherdiv8si;
39240 goto gather_gen;
39241 case IX86_BUILTIN_GATHER3SIV16SF:
39242 icode = CODE_FOR_avx512f_gathersiv16sf;
39243 goto gather_gen;
39244 case IX86_BUILTIN_GATHER3SIV8DF:
39245 icode = CODE_FOR_avx512f_gathersiv8df;
39246 goto gather_gen;
39247 case IX86_BUILTIN_GATHER3DIV16SF:
39248 icode = CODE_FOR_avx512f_gatherdiv16sf;
39249 goto gather_gen;
39250 case IX86_BUILTIN_GATHER3DIV8DF:
39251 icode = CODE_FOR_avx512f_gatherdiv8df;
39252 goto gather_gen;
39253 case IX86_BUILTIN_GATHER3SIV16SI:
39254 icode = CODE_FOR_avx512f_gathersiv16si;
39255 goto gather_gen;
39256 case IX86_BUILTIN_GATHER3SIV8DI:
39257 icode = CODE_FOR_avx512f_gathersiv8di;
39258 goto gather_gen;
39259 case IX86_BUILTIN_GATHER3DIV16SI:
39260 icode = CODE_FOR_avx512f_gatherdiv16si;
39261 goto gather_gen;
39262 case IX86_BUILTIN_GATHER3DIV8DI:
39263 icode = CODE_FOR_avx512f_gatherdiv8di;
39264 goto gather_gen;
39265 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39266 icode = CODE_FOR_avx512f_gathersiv8df;
39267 goto gather_gen;
39268 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39269 icode = CODE_FOR_avx512f_gatherdiv16sf;
39270 goto gather_gen;
39271 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39272 icode = CODE_FOR_avx512f_gathersiv8di;
39273 goto gather_gen;
39274 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39275 icode = CODE_FOR_avx512f_gatherdiv16si;
39276 goto gather_gen;
39277 case IX86_BUILTIN_GATHER3SIV2DF:
39278 icode = CODE_FOR_avx512vl_gathersiv2df;
39279 goto gather_gen;
39280 case IX86_BUILTIN_GATHER3SIV4DF:
39281 icode = CODE_FOR_avx512vl_gathersiv4df;
39282 goto gather_gen;
39283 case IX86_BUILTIN_GATHER3DIV2DF:
39284 icode = CODE_FOR_avx512vl_gatherdiv2df;
39285 goto gather_gen;
39286 case IX86_BUILTIN_GATHER3DIV4DF:
39287 icode = CODE_FOR_avx512vl_gatherdiv4df;
39288 goto gather_gen;
39289 case IX86_BUILTIN_GATHER3SIV4SF:
39290 icode = CODE_FOR_avx512vl_gathersiv4sf;
39291 goto gather_gen;
39292 case IX86_BUILTIN_GATHER3SIV8SF:
39293 icode = CODE_FOR_avx512vl_gathersiv8sf;
39294 goto gather_gen;
39295 case IX86_BUILTIN_GATHER3DIV4SF:
39296 icode = CODE_FOR_avx512vl_gatherdiv4sf;
39297 goto gather_gen;
39298 case IX86_BUILTIN_GATHER3DIV8SF:
39299 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39300 goto gather_gen;
39301 case IX86_BUILTIN_GATHER3SIV2DI:
39302 icode = CODE_FOR_avx512vl_gathersiv2di;
39303 goto gather_gen;
39304 case IX86_BUILTIN_GATHER3SIV4DI:
39305 icode = CODE_FOR_avx512vl_gathersiv4di;
39306 goto gather_gen;
39307 case IX86_BUILTIN_GATHER3DIV2DI:
39308 icode = CODE_FOR_avx512vl_gatherdiv2di;
39309 goto gather_gen;
39310 case IX86_BUILTIN_GATHER3DIV4DI:
39311 icode = CODE_FOR_avx512vl_gatherdiv4di;
39312 goto gather_gen;
39313 case IX86_BUILTIN_GATHER3SIV4SI:
39314 icode = CODE_FOR_avx512vl_gathersiv4si;
39315 goto gather_gen;
39316 case IX86_BUILTIN_GATHER3SIV8SI:
39317 icode = CODE_FOR_avx512vl_gathersiv8si;
39318 goto gather_gen;
39319 case IX86_BUILTIN_GATHER3DIV4SI:
39320 icode = CODE_FOR_avx512vl_gatherdiv4si;
39321 goto gather_gen;
39322 case IX86_BUILTIN_GATHER3DIV8SI:
39323 icode = CODE_FOR_avx512vl_gatherdiv8si;
39324 goto gather_gen;
39325 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39326 icode = CODE_FOR_avx512vl_gathersiv4df;
39327 goto gather_gen;
39328 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39329 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39330 goto gather_gen;
39331 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39332 icode = CODE_FOR_avx512vl_gathersiv4di;
39333 goto gather_gen;
39334 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39335 icode = CODE_FOR_avx512vl_gatherdiv8si;
39336 goto gather_gen;
39337 case IX86_BUILTIN_SCATTERSIV16SF:
39338 icode = CODE_FOR_avx512f_scattersiv16sf;
39339 goto scatter_gen;
39340 case IX86_BUILTIN_SCATTERSIV8DF:
39341 icode = CODE_FOR_avx512f_scattersiv8df;
39342 goto scatter_gen;
39343 case IX86_BUILTIN_SCATTERDIV16SF:
39344 icode = CODE_FOR_avx512f_scatterdiv16sf;
39345 goto scatter_gen;
39346 case IX86_BUILTIN_SCATTERDIV8DF:
39347 icode = CODE_FOR_avx512f_scatterdiv8df;
39348 goto scatter_gen;
39349 case IX86_BUILTIN_SCATTERSIV16SI:
39350 icode = CODE_FOR_avx512f_scattersiv16si;
39351 goto scatter_gen;
39352 case IX86_BUILTIN_SCATTERSIV8DI:
39353 icode = CODE_FOR_avx512f_scattersiv8di;
39354 goto scatter_gen;
39355 case IX86_BUILTIN_SCATTERDIV16SI:
39356 icode = CODE_FOR_avx512f_scatterdiv16si;
39357 goto scatter_gen;
39358 case IX86_BUILTIN_SCATTERDIV8DI:
39359 icode = CODE_FOR_avx512f_scatterdiv8di;
39360 goto scatter_gen;
39361 case IX86_BUILTIN_SCATTERSIV8SF:
39362 icode = CODE_FOR_avx512vl_scattersiv8sf;
39363 goto scatter_gen;
39364 case IX86_BUILTIN_SCATTERSIV4SF:
39365 icode = CODE_FOR_avx512vl_scattersiv4sf;
39366 goto scatter_gen;
39367 case IX86_BUILTIN_SCATTERSIV4DF:
39368 icode = CODE_FOR_avx512vl_scattersiv4df;
39369 goto scatter_gen;
39370 case IX86_BUILTIN_SCATTERSIV2DF:
39371 icode = CODE_FOR_avx512vl_scattersiv2df;
39372 goto scatter_gen;
39373 case IX86_BUILTIN_SCATTERDIV8SF:
39374 icode = CODE_FOR_avx512vl_scatterdiv8sf;
39375 goto scatter_gen;
39376 case IX86_BUILTIN_SCATTERDIV4SF:
39377 icode = CODE_FOR_avx512vl_scatterdiv4sf;
39378 goto scatter_gen;
39379 case IX86_BUILTIN_SCATTERDIV4DF:
39380 icode = CODE_FOR_avx512vl_scatterdiv4df;
39381 goto scatter_gen;
39382 case IX86_BUILTIN_SCATTERDIV2DF:
39383 icode = CODE_FOR_avx512vl_scatterdiv2df;
39384 goto scatter_gen;
39385 case IX86_BUILTIN_SCATTERSIV8SI:
39386 icode = CODE_FOR_avx512vl_scattersiv8si;
39387 goto scatter_gen;
39388 case IX86_BUILTIN_SCATTERSIV4SI:
39389 icode = CODE_FOR_avx512vl_scattersiv4si;
39390 goto scatter_gen;
39391 case IX86_BUILTIN_SCATTERSIV4DI:
39392 icode = CODE_FOR_avx512vl_scattersiv4di;
39393 goto scatter_gen;
39394 case IX86_BUILTIN_SCATTERSIV2DI:
39395 icode = CODE_FOR_avx512vl_scattersiv2di;
39396 goto scatter_gen;
39397 case IX86_BUILTIN_SCATTERDIV8SI:
39398 icode = CODE_FOR_avx512vl_scatterdiv8si;
39399 goto scatter_gen;
39400 case IX86_BUILTIN_SCATTERDIV4SI:
39401 icode = CODE_FOR_avx512vl_scatterdiv4si;
39402 goto scatter_gen;
39403 case IX86_BUILTIN_SCATTERDIV4DI:
39404 icode = CODE_FOR_avx512vl_scatterdiv4di;
39405 goto scatter_gen;
39406 case IX86_BUILTIN_SCATTERDIV2DI:
39407 icode = CODE_FOR_avx512vl_scatterdiv2di;
39408 goto scatter_gen;
39409 case IX86_BUILTIN_GATHERPFDPD:
39410 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
39411 goto vec_prefetch_gen;
39412 case IX86_BUILTIN_GATHERPFDPS:
39413 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
39414 goto vec_prefetch_gen;
39415 case IX86_BUILTIN_GATHERPFQPD:
39416 icode = CODE_FOR_avx512pf_gatherpfv8didf;
39417 goto vec_prefetch_gen;
39418 case IX86_BUILTIN_GATHERPFQPS:
39419 icode = CODE_FOR_avx512pf_gatherpfv8disf;
39420 goto vec_prefetch_gen;
39421 case IX86_BUILTIN_SCATTERPFDPD:
39422 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
39423 goto vec_prefetch_gen;
39424 case IX86_BUILTIN_SCATTERPFDPS:
39425 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
39426 goto vec_prefetch_gen;
39427 case IX86_BUILTIN_SCATTERPFQPD:
39428 icode = CODE_FOR_avx512pf_scatterpfv8didf;
39429 goto vec_prefetch_gen;
39430 case IX86_BUILTIN_SCATTERPFQPS:
39431 icode = CODE_FOR_avx512pf_scatterpfv8disf;
39432 goto vec_prefetch_gen;
39434 gather_gen:
39435 rtx half;
39436 rtx (*gen) (rtx, rtx);
39438 arg0 = CALL_EXPR_ARG (exp, 0);
39439 arg1 = CALL_EXPR_ARG (exp, 1);
39440 arg2 = CALL_EXPR_ARG (exp, 2);
39441 arg3 = CALL_EXPR_ARG (exp, 3);
39442 arg4 = CALL_EXPR_ARG (exp, 4);
39443 op0 = expand_normal (arg0);
39444 op1 = expand_normal (arg1);
39445 op2 = expand_normal (arg2);
39446 op3 = expand_normal (arg3);
39447 op4 = expand_normal (arg4);
39448 /* Note the arg order is different from the operand order. */
39449 mode0 = insn_data[icode].operand[1].mode;
39450 mode2 = insn_data[icode].operand[3].mode;
39451 mode3 = insn_data[icode].operand[4].mode;
39452 mode4 = insn_data[icode].operand[5].mode;
39454 if (target == NULL_RTX
39455 || GET_MODE (target) != insn_data[icode].operand[0].mode
39456 || !insn_data[icode].operand[0].predicate (target,
39457 GET_MODE (target)))
39458 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
39459 else
39460 subtarget = target;
39462 switch (fcode)
39464 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39465 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39466 half = gen_reg_rtx (V8SImode);
39467 if (!nonimmediate_operand (op2, V16SImode))
39468 op2 = copy_to_mode_reg (V16SImode, op2);
39469 emit_insn (gen_vec_extract_lo_v16si (half, op2));
39470 op2 = half;
39471 break;
39472 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39473 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39474 case IX86_BUILTIN_GATHERALTSIV4DF:
39475 case IX86_BUILTIN_GATHERALTSIV4DI:
39476 half = gen_reg_rtx (V4SImode);
39477 if (!nonimmediate_operand (op2, V8SImode))
39478 op2 = copy_to_mode_reg (V8SImode, op2);
39479 emit_insn (gen_vec_extract_lo_v8si (half, op2));
39480 op2 = half;
39481 break;
39482 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39483 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39484 half = gen_reg_rtx (mode0);
39485 if (mode0 == V8SFmode)
39486 gen = gen_vec_extract_lo_v16sf;
39487 else
39488 gen = gen_vec_extract_lo_v16si;
39489 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39490 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39491 emit_insn (gen (half, op0));
39492 op0 = half;
39493 if (GET_MODE (op3) != VOIDmode)
39495 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39496 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39497 emit_insn (gen (half, op3));
39498 op3 = half;
39500 break;
39501 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39502 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39503 case IX86_BUILTIN_GATHERALTDIV8SF:
39504 case IX86_BUILTIN_GATHERALTDIV8SI:
39505 half = gen_reg_rtx (mode0);
39506 if (mode0 == V4SFmode)
39507 gen = gen_vec_extract_lo_v8sf;
39508 else
39509 gen = gen_vec_extract_lo_v8si;
39510 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39511 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39512 emit_insn (gen (half, op0));
39513 op0 = half;
39514 if (GET_MODE (op3) != VOIDmode)
39516 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39517 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39518 emit_insn (gen (half, op3));
39519 op3 = half;
39521 break;
39522 default:
39523 break;
39526 /* Force memory operand only with base register here. But we
39527 don't want to do it on memory operand for other builtin
39528 functions. */
39529 op1 = ix86_zero_extend_to_Pmode (op1);
39531 if (!insn_data[icode].operand[1].predicate (op0, mode0))
39532 op0 = copy_to_mode_reg (mode0, op0);
39533 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
39534 op1 = copy_to_mode_reg (Pmode, op1);
39535 if (!insn_data[icode].operand[3].predicate (op2, mode2))
39536 op2 = copy_to_mode_reg (mode2, op2);
39537 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
39539 if (!insn_data[icode].operand[4].predicate (op3, mode3))
39540 op3 = copy_to_mode_reg (mode3, op3);
39542 else
39544 op3 = copy_to_reg (op3);
39545 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
39547 if (!insn_data[icode].operand[5].predicate (op4, mode4))
39549 error ("the last argument must be scale 1, 2, 4, 8");
39550 return const0_rtx;
39553 /* Optimize. If mask is known to have all high bits set,
39554 replace op0 with pc_rtx to signal that the instruction
39555 overwrites the whole destination and doesn't use its
39556 previous contents. */
39557 if (optimize)
39559 if (TREE_CODE (arg3) == INTEGER_CST)
39561 if (integer_all_onesp (arg3))
39562 op0 = pc_rtx;
39564 else if (TREE_CODE (arg3) == VECTOR_CST)
39566 unsigned int negative = 0;
39567 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
39569 tree cst = VECTOR_CST_ELT (arg3, i);
39570 if (TREE_CODE (cst) == INTEGER_CST
39571 && tree_int_cst_sign_bit (cst))
39572 negative++;
39573 else if (TREE_CODE (cst) == REAL_CST
39574 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
39575 negative++;
39577 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
39578 op0 = pc_rtx;
39580 else if (TREE_CODE (arg3) == SSA_NAME
39581 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
39583 /* Recognize also when mask is like:
39584 __v2df src = _mm_setzero_pd ();
39585 __v2df mask = _mm_cmpeq_pd (src, src);
39587 __v8sf src = _mm256_setzero_ps ();
39588 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
39589 as that is a cheaper way to load all ones into
39590 a register than having to load a constant from
39591 memory. */
39592 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
39593 if (is_gimple_call (def_stmt))
39595 tree fndecl = gimple_call_fndecl (def_stmt);
39596 if (fndecl
39597 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
39598 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
39600 case IX86_BUILTIN_CMPPD:
39601 case IX86_BUILTIN_CMPPS:
39602 case IX86_BUILTIN_CMPPD256:
39603 case IX86_BUILTIN_CMPPS256:
39604 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
39605 break;
39606 /* FALLTHRU */
39607 case IX86_BUILTIN_CMPEQPD:
39608 case IX86_BUILTIN_CMPEQPS:
39609 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
39610 && initializer_zerop (gimple_call_arg (def_stmt,
39611 1)))
39612 op0 = pc_rtx;
39613 break;
39614 default:
39615 break;
39621 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
39622 if (! pat)
39623 return const0_rtx;
39624 emit_insn (pat);
39626 switch (fcode)
39628 case IX86_BUILTIN_GATHER3DIV16SF:
39629 if (target == NULL_RTX)
39630 target = gen_reg_rtx (V8SFmode);
39631 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
39632 break;
39633 case IX86_BUILTIN_GATHER3DIV16SI:
39634 if (target == NULL_RTX)
39635 target = gen_reg_rtx (V8SImode);
39636 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
39637 break;
39638 case IX86_BUILTIN_GATHER3DIV8SF:
39639 case IX86_BUILTIN_GATHERDIV8SF:
39640 if (target == NULL_RTX)
39641 target = gen_reg_rtx (V4SFmode);
39642 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
39643 break;
39644 case IX86_BUILTIN_GATHER3DIV8SI:
39645 case IX86_BUILTIN_GATHERDIV8SI:
39646 if (target == NULL_RTX)
39647 target = gen_reg_rtx (V4SImode);
39648 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
39649 break;
39650 default:
39651 target = subtarget;
39652 break;
39654 return target;
39656 scatter_gen:
39657 arg0 = CALL_EXPR_ARG (exp, 0);
39658 arg1 = CALL_EXPR_ARG (exp, 1);
39659 arg2 = CALL_EXPR_ARG (exp, 2);
39660 arg3 = CALL_EXPR_ARG (exp, 3);
39661 arg4 = CALL_EXPR_ARG (exp, 4);
39662 op0 = expand_normal (arg0);
39663 op1 = expand_normal (arg1);
39664 op2 = expand_normal (arg2);
39665 op3 = expand_normal (arg3);
39666 op4 = expand_normal (arg4);
39667 mode1 = insn_data[icode].operand[1].mode;
39668 mode2 = insn_data[icode].operand[2].mode;
39669 mode3 = insn_data[icode].operand[3].mode;
39670 mode4 = insn_data[icode].operand[4].mode;
39672 /* Force memory operand only with base register here. But we
39673 don't want to do it on memory operand for other builtin
39674 functions. */
39675 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
39677 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39678 op0 = copy_to_mode_reg (Pmode, op0);
39680 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
39682 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39683 op1 = copy_to_mode_reg (mode1, op1);
39685 else
39687 op1 = copy_to_reg (op1);
39688 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
39691 if (!insn_data[icode].operand[2].predicate (op2, mode2))
39692 op2 = copy_to_mode_reg (mode2, op2);
39694 if (!insn_data[icode].operand[3].predicate (op3, mode3))
39695 op3 = copy_to_mode_reg (mode3, op3);
39697 if (!insn_data[icode].operand[4].predicate (op4, mode4))
39699 error ("the last argument must be scale 1, 2, 4, 8");
39700 return const0_rtx;
39703 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
39704 if (! pat)
39705 return const0_rtx;
39707 emit_insn (pat);
39708 return 0;
39710 vec_prefetch_gen:
39711 arg0 = CALL_EXPR_ARG (exp, 0);
39712 arg1 = CALL_EXPR_ARG (exp, 1);
39713 arg2 = CALL_EXPR_ARG (exp, 2);
39714 arg3 = CALL_EXPR_ARG (exp, 3);
39715 arg4 = CALL_EXPR_ARG (exp, 4);
39716 op0 = expand_normal (arg0);
39717 op1 = expand_normal (arg1);
39718 op2 = expand_normal (arg2);
39719 op3 = expand_normal (arg3);
39720 op4 = expand_normal (arg4);
39721 mode0 = insn_data[icode].operand[0].mode;
39722 mode1 = insn_data[icode].operand[1].mode;
39723 mode3 = insn_data[icode].operand[3].mode;
39724 mode4 = insn_data[icode].operand[4].mode;
39726 if (GET_MODE (op0) == mode0
39727 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
39729 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39730 op0 = copy_to_mode_reg (mode0, op0);
39732 else if (op0 != constm1_rtx)
39734 op0 = copy_to_reg (op0);
39735 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39738 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39739 op1 = copy_to_mode_reg (mode1, op1);
39741 /* Force memory operand only with base register here. But we
39742 don't want to do it on memory operand for other builtin
39743 functions. */
39744 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
39746 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
39747 op2 = copy_to_mode_reg (Pmode, op2);
39749 if (!insn_data[icode].operand[3].predicate (op3, mode3))
39751 error ("the forth argument must be scale 1, 2, 4, 8");
39752 return const0_rtx;
39755 if (!insn_data[icode].operand[4].predicate (op4, mode4))
39757 error ("incorrect hint operand");
39758 return const0_rtx;
39761 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
39762 if (! pat)
39763 return const0_rtx;
39765 emit_insn (pat);
39767 return 0;
39769 case IX86_BUILTIN_XABORT:
39770 icode = CODE_FOR_xabort;
39771 arg0 = CALL_EXPR_ARG (exp, 0);
39772 op0 = expand_normal (arg0);
39773 mode0 = insn_data[icode].operand[0].mode;
39774 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39776 error ("the xabort's argument must be an 8-bit immediate");
39777 return const0_rtx;
39779 emit_insn (gen_xabort (op0));
39780 return 0;
39782 default:
39783 break;
39786 for (i = 0, d = bdesc_special_args;
39787 i < ARRAY_SIZE (bdesc_special_args);
39788 i++, d++)
39789 if (d->code == fcode)
39790 return ix86_expand_special_args_builtin (d, exp, target);
39792 for (i = 0, d = bdesc_args;
39793 i < ARRAY_SIZE (bdesc_args);
39794 i++, d++)
39795 if (d->code == fcode)
39796 switch (fcode)
39798 case IX86_BUILTIN_FABSQ:
39799 case IX86_BUILTIN_COPYSIGNQ:
39800 if (!TARGET_SSE)
39801 /* Emit a normal call if SSE isn't available. */
39802 return expand_call (exp, target, ignore);
39803 default:
39804 return ix86_expand_args_builtin (d, exp, target);
39807 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
39808 if (d->code == fcode)
39809 return ix86_expand_sse_comi (d, exp, target);
39811 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
39812 if (d->code == fcode)
39813 return ix86_expand_round_builtin (d, exp, target);
39815 for (i = 0, d = bdesc_pcmpestr;
39816 i < ARRAY_SIZE (bdesc_pcmpestr);
39817 i++, d++)
39818 if (d->code == fcode)
39819 return ix86_expand_sse_pcmpestr (d, exp, target);
39821 for (i = 0, d = bdesc_pcmpistr;
39822 i < ARRAY_SIZE (bdesc_pcmpistr);
39823 i++, d++)
39824 if (d->code == fcode)
39825 return ix86_expand_sse_pcmpistr (d, exp, target);
39827 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
39828 if (d->code == fcode)
39829 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
39830 (enum ix86_builtin_func_type)
39831 d->flag, d->comparison);
39833 gcc_unreachable ();
39836 /* This returns the target-specific builtin with code CODE if
39837 current_function_decl has visibility on this builtin, which is checked
39838 using isa flags. Returns NULL_TREE otherwise. */
39840 static tree ix86_get_builtin (enum ix86_builtins code)
39842 struct cl_target_option *opts;
39843 tree target_tree = NULL_TREE;
39845 /* Determine the isa flags of current_function_decl. */
39847 if (current_function_decl)
39848 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
39850 if (target_tree == NULL)
39851 target_tree = target_option_default_node;
39853 opts = TREE_TARGET_OPTION (target_tree);
39855 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
39856 return ix86_builtin_decl (code, true);
39857 else
39858 return NULL_TREE;
39861 /* Return function decl for target specific builtin
39862 for given MPX builtin passed i FCODE. */
39863 static tree
39864 ix86_builtin_mpx_function (unsigned fcode)
39866 switch (fcode)
39868 case BUILT_IN_CHKP_BNDMK:
39869 return ix86_builtins[IX86_BUILTIN_BNDMK];
39871 case BUILT_IN_CHKP_BNDSTX:
39872 return ix86_builtins[IX86_BUILTIN_BNDSTX];
39874 case BUILT_IN_CHKP_BNDLDX:
39875 return ix86_builtins[IX86_BUILTIN_BNDLDX];
39877 case BUILT_IN_CHKP_BNDCL:
39878 return ix86_builtins[IX86_BUILTIN_BNDCL];
39880 case BUILT_IN_CHKP_BNDCU:
39881 return ix86_builtins[IX86_BUILTIN_BNDCU];
39883 case BUILT_IN_CHKP_BNDRET:
39884 return ix86_builtins[IX86_BUILTIN_BNDRET];
39886 case BUILT_IN_CHKP_INTERSECT:
39887 return ix86_builtins[IX86_BUILTIN_BNDINT];
39889 case BUILT_IN_CHKP_NARROW:
39890 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
39892 case BUILT_IN_CHKP_SIZEOF:
39893 return ix86_builtins[IX86_BUILTIN_SIZEOF];
39895 case BUILT_IN_CHKP_EXTRACT_LOWER:
39896 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
39898 case BUILT_IN_CHKP_EXTRACT_UPPER:
39899 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
39901 default:
39902 return NULL_TREE;
39905 gcc_unreachable ();
39908 /* Helper function for ix86_load_bounds and ix86_store_bounds.
39910 Return an address to be used to load/store bounds for pointer
39911 passed in SLOT.
39913 SLOT_NO is an integer constant holding number of a target
39914 dependent special slot to be used in case SLOT is not a memory.
39916 SPECIAL_BASE is a pointer to be used as a base of fake address
39917 to access special slots in Bounds Table. SPECIAL_BASE[-1],
39918 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
39920 static rtx
39921 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
39923 rtx addr = NULL;
39925 /* NULL slot means we pass bounds for pointer not passed to the
39926 function at all. Register slot means we pass pointer in a
39927 register. In both these cases bounds are passed via Bounds
39928 Table. Since we do not have actual pointer stored in memory,
39929 we have to use fake addresses to access Bounds Table. We
39930 start with (special_base - sizeof (void*)) and decrease this
39931 address by pointer size to get addresses for other slots. */
39932 if (!slot || REG_P (slot))
39934 gcc_assert (CONST_INT_P (slot_no));
39935 addr = plus_constant (Pmode, special_base,
39936 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
39938 /* If pointer is passed in a memory then its address is used to
39939 access Bounds Table. */
39940 else if (MEM_P (slot))
39942 addr = XEXP (slot, 0);
39943 if (!register_operand (addr, Pmode))
39944 addr = copy_addr_to_reg (addr);
39946 else
39947 gcc_unreachable ();
39949 return addr;
39952 /* Expand pass uses this hook to load bounds for function parameter
39953 PTR passed in SLOT in case its bounds are not passed in a register.
39955 If SLOT is a memory, then bounds are loaded as for regular pointer
39956 loaded from memory. PTR may be NULL in case SLOT is a memory.
39957 In such case value of PTR (if required) may be loaded from SLOT.
39959 If SLOT is NULL or a register then SLOT_NO is an integer constant
39960 holding number of the target dependent special slot which should be
39961 used to obtain bounds.
39963 Return loaded bounds. */
39965 static rtx
39966 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
39968 rtx reg = gen_reg_rtx (BNDmode);
39969 rtx addr;
39971 /* Get address to be used to access Bounds Table. Special slots start
39972 at the location of return address of the current function. */
39973 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
39975 /* Load pointer value from a memory if we don't have it. */
39976 if (!ptr)
39978 gcc_assert (MEM_P (slot));
39979 ptr = copy_addr_to_reg (slot);
39982 emit_insn (BNDmode == BND64mode
39983 ? gen_bnd64_ldx (reg, addr, ptr)
39984 : gen_bnd32_ldx (reg, addr, ptr));
39986 return reg;
39989 /* Expand pass uses this hook to store BOUNDS for call argument PTR
39990 passed in SLOT in case BOUNDS are not passed in a register.
39992 If SLOT is a memory, then BOUNDS are stored as for regular pointer
39993 stored in memory. PTR may be NULL in case SLOT is a memory.
39994 In such case value of PTR (if required) may be loaded from SLOT.
39996 If SLOT is NULL or a register then SLOT_NO is an integer constant
39997 holding number of the target dependent special slot which should be
39998 used to store BOUNDS. */
40000 static void
40001 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
40003 rtx addr;
40005 /* Get address to be used to access Bounds Table. Special slots start
40006 at the location of return address of a called function. */
40007 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
40009 /* Load pointer value from a memory if we don't have it. */
40010 if (!ptr)
40012 gcc_assert (MEM_P (slot));
40013 ptr = copy_addr_to_reg (slot);
40016 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
40017 if (!register_operand (bounds, BNDmode))
40018 bounds = copy_to_mode_reg (BNDmode, bounds);
40020 emit_insn (BNDmode == BND64mode
40021 ? gen_bnd64_stx (addr, ptr, bounds)
40022 : gen_bnd32_stx (addr, ptr, bounds));
40025 /* Load and return bounds returned by function in SLOT. */
40027 static rtx
40028 ix86_load_returned_bounds (rtx slot)
40030 rtx res;
40032 gcc_assert (REG_P (slot));
40033 res = gen_reg_rtx (BNDmode);
40034 emit_move_insn (res, slot);
40036 return res;
40039 /* Store BOUNDS returned by function into SLOT. */
40041 static void
40042 ix86_store_returned_bounds (rtx slot, rtx bounds)
40044 gcc_assert (REG_P (slot));
40045 emit_move_insn (slot, bounds);
40048 /* Returns a function decl for a vectorized version of the builtin function
40049 with builtin function code FN and the result vector type TYPE, or NULL_TREE
40050 if it is not available. */
40052 static tree
40053 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
40054 tree type_in)
40056 machine_mode in_mode, out_mode;
40057 int in_n, out_n;
40058 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
40060 if (TREE_CODE (type_out) != VECTOR_TYPE
40061 || TREE_CODE (type_in) != VECTOR_TYPE
40062 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
40063 return NULL_TREE;
40065 out_mode = TYPE_MODE (TREE_TYPE (type_out));
40066 out_n = TYPE_VECTOR_SUBPARTS (type_out);
40067 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40068 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40070 switch (fn)
40072 case BUILT_IN_SQRT:
40073 if (out_mode == DFmode && in_mode == DFmode)
40075 if (out_n == 2 && in_n == 2)
40076 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
40077 else if (out_n == 4 && in_n == 4)
40078 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
40079 else if (out_n == 8 && in_n == 8)
40080 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
40082 break;
40084 case BUILT_IN_EXP2F:
40085 if (out_mode == SFmode && in_mode == SFmode)
40087 if (out_n == 16 && in_n == 16)
40088 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
40090 break;
40092 case BUILT_IN_SQRTF:
40093 if (out_mode == SFmode && in_mode == SFmode)
40095 if (out_n == 4 && in_n == 4)
40096 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
40097 else if (out_n == 8 && in_n == 8)
40098 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
40099 else if (out_n == 16 && in_n == 16)
40100 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
40102 break;
40104 case BUILT_IN_IFLOOR:
40105 case BUILT_IN_LFLOOR:
40106 case BUILT_IN_LLFLOOR:
40107 /* The round insn does not trap on denormals. */
40108 if (flag_trapping_math || !TARGET_ROUND)
40109 break;
40111 if (out_mode == SImode && in_mode == DFmode)
40113 if (out_n == 4 && in_n == 2)
40114 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
40115 else if (out_n == 8 && in_n == 4)
40116 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
40117 else if (out_n == 16 && in_n == 8)
40118 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
40120 break;
40122 case BUILT_IN_IFLOORF:
40123 case BUILT_IN_LFLOORF:
40124 case BUILT_IN_LLFLOORF:
40125 /* The round insn does not trap on denormals. */
40126 if (flag_trapping_math || !TARGET_ROUND)
40127 break;
40129 if (out_mode == SImode && in_mode == SFmode)
40131 if (out_n == 4 && in_n == 4)
40132 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
40133 else if (out_n == 8 && in_n == 8)
40134 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
40136 break;
40138 case BUILT_IN_ICEIL:
40139 case BUILT_IN_LCEIL:
40140 case BUILT_IN_LLCEIL:
40141 /* The round insn does not trap on denormals. */
40142 if (flag_trapping_math || !TARGET_ROUND)
40143 break;
40145 if (out_mode == SImode && in_mode == DFmode)
40147 if (out_n == 4 && in_n == 2)
40148 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
40149 else if (out_n == 8 && in_n == 4)
40150 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
40151 else if (out_n == 16 && in_n == 8)
40152 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
40154 break;
40156 case BUILT_IN_ICEILF:
40157 case BUILT_IN_LCEILF:
40158 case BUILT_IN_LLCEILF:
40159 /* The round insn does not trap on denormals. */
40160 if (flag_trapping_math || !TARGET_ROUND)
40161 break;
40163 if (out_mode == SImode && in_mode == SFmode)
40165 if (out_n == 4 && in_n == 4)
40166 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
40167 else if (out_n == 8 && in_n == 8)
40168 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
40170 break;
40172 case BUILT_IN_IRINT:
40173 case BUILT_IN_LRINT:
40174 case BUILT_IN_LLRINT:
40175 if (out_mode == SImode && in_mode == DFmode)
40177 if (out_n == 4 && in_n == 2)
40178 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
40179 else if (out_n == 8 && in_n == 4)
40180 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
40182 break;
40184 case BUILT_IN_IRINTF:
40185 case BUILT_IN_LRINTF:
40186 case BUILT_IN_LLRINTF:
40187 if (out_mode == SImode && in_mode == SFmode)
40189 if (out_n == 4 && in_n == 4)
40190 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
40191 else if (out_n == 8 && in_n == 8)
40192 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
40194 break;
40196 case BUILT_IN_IROUND:
40197 case BUILT_IN_LROUND:
40198 case BUILT_IN_LLROUND:
40199 /* The round insn does not trap on denormals. */
40200 if (flag_trapping_math || !TARGET_ROUND)
40201 break;
40203 if (out_mode == SImode && in_mode == DFmode)
40205 if (out_n == 4 && in_n == 2)
40206 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
40207 else if (out_n == 8 && in_n == 4)
40208 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
40209 else if (out_n == 16 && in_n == 8)
40210 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
40212 break;
40214 case BUILT_IN_IROUNDF:
40215 case BUILT_IN_LROUNDF:
40216 case BUILT_IN_LLROUNDF:
40217 /* The round insn does not trap on denormals. */
40218 if (flag_trapping_math || !TARGET_ROUND)
40219 break;
40221 if (out_mode == SImode && in_mode == SFmode)
40223 if (out_n == 4 && in_n == 4)
40224 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
40225 else if (out_n == 8 && in_n == 8)
40226 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
40228 break;
40230 case BUILT_IN_COPYSIGN:
40231 if (out_mode == DFmode && in_mode == DFmode)
40233 if (out_n == 2 && in_n == 2)
40234 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
40235 else if (out_n == 4 && in_n == 4)
40236 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
40237 else if (out_n == 8 && in_n == 8)
40238 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
40240 break;
40242 case BUILT_IN_COPYSIGNF:
40243 if (out_mode == SFmode && in_mode == SFmode)
40245 if (out_n == 4 && in_n == 4)
40246 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
40247 else if (out_n == 8 && in_n == 8)
40248 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
40249 else if (out_n == 16 && in_n == 16)
40250 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
40252 break;
40254 case BUILT_IN_FLOOR:
40255 /* The round insn does not trap on denormals. */
40256 if (flag_trapping_math || !TARGET_ROUND)
40257 break;
40259 if (out_mode == DFmode && in_mode == DFmode)
40261 if (out_n == 2 && in_n == 2)
40262 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
40263 else if (out_n == 4 && in_n == 4)
40264 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
40266 break;
40268 case BUILT_IN_FLOORF:
40269 /* The round insn does not trap on denormals. */
40270 if (flag_trapping_math || !TARGET_ROUND)
40271 break;
40273 if (out_mode == SFmode && in_mode == SFmode)
40275 if (out_n == 4 && in_n == 4)
40276 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
40277 else if (out_n == 8 && in_n == 8)
40278 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
40280 break;
40282 case BUILT_IN_CEIL:
40283 /* The round insn does not trap on denormals. */
40284 if (flag_trapping_math || !TARGET_ROUND)
40285 break;
40287 if (out_mode == DFmode && in_mode == DFmode)
40289 if (out_n == 2 && in_n == 2)
40290 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
40291 else if (out_n == 4 && in_n == 4)
40292 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
40294 break;
40296 case BUILT_IN_CEILF:
40297 /* The round insn does not trap on denormals. */
40298 if (flag_trapping_math || !TARGET_ROUND)
40299 break;
40301 if (out_mode == SFmode && in_mode == SFmode)
40303 if (out_n == 4 && in_n == 4)
40304 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
40305 else if (out_n == 8 && in_n == 8)
40306 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
40308 break;
40310 case BUILT_IN_TRUNC:
40311 /* The round insn does not trap on denormals. */
40312 if (flag_trapping_math || !TARGET_ROUND)
40313 break;
40315 if (out_mode == DFmode && in_mode == DFmode)
40317 if (out_n == 2 && in_n == 2)
40318 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
40319 else if (out_n == 4 && in_n == 4)
40320 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
40322 break;
40324 case BUILT_IN_TRUNCF:
40325 /* The round insn does not trap on denormals. */
40326 if (flag_trapping_math || !TARGET_ROUND)
40327 break;
40329 if (out_mode == SFmode && in_mode == SFmode)
40331 if (out_n == 4 && in_n == 4)
40332 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
40333 else if (out_n == 8 && in_n == 8)
40334 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
40336 break;
40338 case BUILT_IN_RINT:
40339 /* The round insn does not trap on denormals. */
40340 if (flag_trapping_math || !TARGET_ROUND)
40341 break;
40343 if (out_mode == DFmode && in_mode == DFmode)
40345 if (out_n == 2 && in_n == 2)
40346 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
40347 else if (out_n == 4 && in_n == 4)
40348 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
40350 break;
40352 case BUILT_IN_RINTF:
40353 /* The round insn does not trap on denormals. */
40354 if (flag_trapping_math || !TARGET_ROUND)
40355 break;
40357 if (out_mode == SFmode && in_mode == SFmode)
40359 if (out_n == 4 && in_n == 4)
40360 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
40361 else if (out_n == 8 && in_n == 8)
40362 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
40364 break;
40366 case BUILT_IN_ROUND:
40367 /* The round insn does not trap on denormals. */
40368 if (flag_trapping_math || !TARGET_ROUND)
40369 break;
40371 if (out_mode == DFmode && in_mode == DFmode)
40373 if (out_n == 2 && in_n == 2)
40374 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
40375 else if (out_n == 4 && in_n == 4)
40376 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
40378 break;
40380 case BUILT_IN_ROUNDF:
40381 /* The round insn does not trap on denormals. */
40382 if (flag_trapping_math || !TARGET_ROUND)
40383 break;
40385 if (out_mode == SFmode && in_mode == SFmode)
40387 if (out_n == 4 && in_n == 4)
40388 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
40389 else if (out_n == 8 && in_n == 8)
40390 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
40392 break;
40394 case BUILT_IN_FMA:
40395 if (out_mode == DFmode && in_mode == DFmode)
40397 if (out_n == 2 && in_n == 2)
40398 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
40399 if (out_n == 4 && in_n == 4)
40400 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
40402 break;
40404 case BUILT_IN_FMAF:
40405 if (out_mode == SFmode && in_mode == SFmode)
40407 if (out_n == 4 && in_n == 4)
40408 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
40409 if (out_n == 8 && in_n == 8)
40410 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
40412 break;
40414 default:
40415 break;
40418 /* Dispatch to a handler for a vectorization library. */
40419 if (ix86_veclib_handler)
40420 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
40421 type_in);
40423 return NULL_TREE;
40426 /* Handler for an SVML-style interface to
40427 a library with vectorized intrinsics. */
40429 static tree
40430 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
40432 char name[20];
40433 tree fntype, new_fndecl, args;
40434 unsigned arity;
40435 const char *bname;
40436 machine_mode el_mode, in_mode;
40437 int n, in_n;
40439 /* The SVML is suitable for unsafe math only. */
40440 if (!flag_unsafe_math_optimizations)
40441 return NULL_TREE;
40443 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40444 n = TYPE_VECTOR_SUBPARTS (type_out);
40445 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40446 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40447 if (el_mode != in_mode
40448 || n != in_n)
40449 return NULL_TREE;
40451 switch (fn)
40453 case BUILT_IN_EXP:
40454 case BUILT_IN_LOG:
40455 case BUILT_IN_LOG10:
40456 case BUILT_IN_POW:
40457 case BUILT_IN_TANH:
40458 case BUILT_IN_TAN:
40459 case BUILT_IN_ATAN:
40460 case BUILT_IN_ATAN2:
40461 case BUILT_IN_ATANH:
40462 case BUILT_IN_CBRT:
40463 case BUILT_IN_SINH:
40464 case BUILT_IN_SIN:
40465 case BUILT_IN_ASINH:
40466 case BUILT_IN_ASIN:
40467 case BUILT_IN_COSH:
40468 case BUILT_IN_COS:
40469 case BUILT_IN_ACOSH:
40470 case BUILT_IN_ACOS:
40471 if (el_mode != DFmode || n != 2)
40472 return NULL_TREE;
40473 break;
40475 case BUILT_IN_EXPF:
40476 case BUILT_IN_LOGF:
40477 case BUILT_IN_LOG10F:
40478 case BUILT_IN_POWF:
40479 case BUILT_IN_TANHF:
40480 case BUILT_IN_TANF:
40481 case BUILT_IN_ATANF:
40482 case BUILT_IN_ATAN2F:
40483 case BUILT_IN_ATANHF:
40484 case BUILT_IN_CBRTF:
40485 case BUILT_IN_SINHF:
40486 case BUILT_IN_SINF:
40487 case BUILT_IN_ASINHF:
40488 case BUILT_IN_ASINF:
40489 case BUILT_IN_COSHF:
40490 case BUILT_IN_COSF:
40491 case BUILT_IN_ACOSHF:
40492 case BUILT_IN_ACOSF:
40493 if (el_mode != SFmode || n != 4)
40494 return NULL_TREE;
40495 break;
40497 default:
40498 return NULL_TREE;
40501 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40503 if (fn == BUILT_IN_LOGF)
40504 strcpy (name, "vmlsLn4");
40505 else if (fn == BUILT_IN_LOG)
40506 strcpy (name, "vmldLn2");
40507 else if (n == 4)
40509 sprintf (name, "vmls%s", bname+10);
40510 name[strlen (name)-1] = '4';
40512 else
40513 sprintf (name, "vmld%s2", bname+10);
40515 /* Convert to uppercase. */
40516 name[4] &= ~0x20;
40518 arity = 0;
40519 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40520 args;
40521 args = TREE_CHAIN (args))
40522 arity++;
40524 if (arity == 1)
40525 fntype = build_function_type_list (type_out, type_in, NULL);
40526 else
40527 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40529 /* Build a function declaration for the vectorized function. */
40530 new_fndecl = build_decl (BUILTINS_LOCATION,
40531 FUNCTION_DECL, get_identifier (name), fntype);
40532 TREE_PUBLIC (new_fndecl) = 1;
40533 DECL_EXTERNAL (new_fndecl) = 1;
40534 DECL_IS_NOVOPS (new_fndecl) = 1;
40535 TREE_READONLY (new_fndecl) = 1;
40537 return new_fndecl;
40540 /* Handler for an ACML-style interface to
40541 a library with vectorized intrinsics. */
40543 static tree
40544 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
40546 char name[20] = "__vr.._";
40547 tree fntype, new_fndecl, args;
40548 unsigned arity;
40549 const char *bname;
40550 machine_mode el_mode, in_mode;
40551 int n, in_n;
40553 /* The ACML is 64bits only and suitable for unsafe math only as
40554 it does not correctly support parts of IEEE with the required
40555 precision such as denormals. */
40556 if (!TARGET_64BIT
40557 || !flag_unsafe_math_optimizations)
40558 return NULL_TREE;
40560 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40561 n = TYPE_VECTOR_SUBPARTS (type_out);
40562 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40563 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40564 if (el_mode != in_mode
40565 || n != in_n)
40566 return NULL_TREE;
40568 switch (fn)
40570 case BUILT_IN_SIN:
40571 case BUILT_IN_COS:
40572 case BUILT_IN_EXP:
40573 case BUILT_IN_LOG:
40574 case BUILT_IN_LOG2:
40575 case BUILT_IN_LOG10:
40576 name[4] = 'd';
40577 name[5] = '2';
40578 if (el_mode != DFmode
40579 || n != 2)
40580 return NULL_TREE;
40581 break;
40583 case BUILT_IN_SINF:
40584 case BUILT_IN_COSF:
40585 case BUILT_IN_EXPF:
40586 case BUILT_IN_POWF:
40587 case BUILT_IN_LOGF:
40588 case BUILT_IN_LOG2F:
40589 case BUILT_IN_LOG10F:
40590 name[4] = 's';
40591 name[5] = '4';
40592 if (el_mode != SFmode
40593 || n != 4)
40594 return NULL_TREE;
40595 break;
40597 default:
40598 return NULL_TREE;
40601 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40602 sprintf (name + 7, "%s", bname+10);
40604 arity = 0;
40605 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40606 args;
40607 args = TREE_CHAIN (args))
40608 arity++;
40610 if (arity == 1)
40611 fntype = build_function_type_list (type_out, type_in, NULL);
40612 else
40613 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40615 /* Build a function declaration for the vectorized function. */
40616 new_fndecl = build_decl (BUILTINS_LOCATION,
40617 FUNCTION_DECL, get_identifier (name), fntype);
40618 TREE_PUBLIC (new_fndecl) = 1;
40619 DECL_EXTERNAL (new_fndecl) = 1;
40620 DECL_IS_NOVOPS (new_fndecl) = 1;
40621 TREE_READONLY (new_fndecl) = 1;
40623 return new_fndecl;
40626 /* Returns a decl of a function that implements gather load with
40627 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
40628 Return NULL_TREE if it is not available. */
40630 static tree
40631 ix86_vectorize_builtin_gather (const_tree mem_vectype,
40632 const_tree index_type, int scale)
40634 bool si;
40635 enum ix86_builtins code;
40637 if (! TARGET_AVX2)
40638 return NULL_TREE;
40640 if ((TREE_CODE (index_type) != INTEGER_TYPE
40641 && !POINTER_TYPE_P (index_type))
40642 || (TYPE_MODE (index_type) != SImode
40643 && TYPE_MODE (index_type) != DImode))
40644 return NULL_TREE;
40646 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
40647 return NULL_TREE;
40649 /* v*gather* insn sign extends index to pointer mode. */
40650 if (TYPE_PRECISION (index_type) < POINTER_SIZE
40651 && TYPE_UNSIGNED (index_type))
40652 return NULL_TREE;
40654 if (scale <= 0
40655 || scale > 8
40656 || (scale & (scale - 1)) != 0)
40657 return NULL_TREE;
40659 si = TYPE_MODE (index_type) == SImode;
40660 switch (TYPE_MODE (mem_vectype))
40662 case V2DFmode:
40663 if (TARGET_AVX512VL)
40664 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
40665 else
40666 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
40667 break;
40668 case V4DFmode:
40669 if (TARGET_AVX512VL)
40670 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
40671 else
40672 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
40673 break;
40674 case V2DImode:
40675 if (TARGET_AVX512VL)
40676 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
40677 else
40678 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
40679 break;
40680 case V4DImode:
40681 if (TARGET_AVX512VL)
40682 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
40683 else
40684 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
40685 break;
40686 case V4SFmode:
40687 if (TARGET_AVX512VL)
40688 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
40689 else
40690 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
40691 break;
40692 case V8SFmode:
40693 if (TARGET_AVX512VL)
40694 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
40695 else
40696 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
40697 break;
40698 case V4SImode:
40699 if (TARGET_AVX512VL)
40700 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
40701 else
40702 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
40703 break;
40704 case V8SImode:
40705 if (TARGET_AVX512VL)
40706 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
40707 else
40708 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
40709 break;
40710 case V8DFmode:
40711 if (TARGET_AVX512F)
40712 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
40713 else
40714 return NULL_TREE;
40715 break;
40716 case V8DImode:
40717 if (TARGET_AVX512F)
40718 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
40719 else
40720 return NULL_TREE;
40721 break;
40722 case V16SFmode:
40723 if (TARGET_AVX512F)
40724 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
40725 else
40726 return NULL_TREE;
40727 break;
40728 case V16SImode:
40729 if (TARGET_AVX512F)
40730 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
40731 else
40732 return NULL_TREE;
40733 break;
40734 default:
40735 return NULL_TREE;
40738 return ix86_get_builtin (code);
40741 /* Returns a code for a target-specific builtin that implements
40742 reciprocal of the function, or NULL_TREE if not available. */
40744 static tree
40745 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
40747 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
40748 && flag_finite_math_only && !flag_trapping_math
40749 && flag_unsafe_math_optimizations))
40750 return NULL_TREE;
40752 if (md_fn)
40753 /* Machine dependent builtins. */
40754 switch (fn)
40756 /* Vectorized version of sqrt to rsqrt conversion. */
40757 case IX86_BUILTIN_SQRTPS_NR:
40758 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
40760 case IX86_BUILTIN_SQRTPS_NR256:
40761 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
40763 default:
40764 return NULL_TREE;
40766 else
40767 /* Normal builtins. */
40768 switch (fn)
40770 /* Sqrt to rsqrt conversion. */
40771 case BUILT_IN_SQRTF:
40772 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
40774 default:
40775 return NULL_TREE;
40779 /* Helper for avx_vpermilps256_operand et al. This is also used by
40780 the expansion functions to turn the parallel back into a mask.
40781 The return value is 0 for no match and the imm8+1 for a match. */
40784 avx_vpermilp_parallel (rtx par, machine_mode mode)
40786 unsigned i, nelt = GET_MODE_NUNITS (mode);
40787 unsigned mask = 0;
40788 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
40790 if (XVECLEN (par, 0) != (int) nelt)
40791 return 0;
40793 /* Validate that all of the elements are constants, and not totally
40794 out of range. Copy the data into an integral array to make the
40795 subsequent checks easier. */
40796 for (i = 0; i < nelt; ++i)
40798 rtx er = XVECEXP (par, 0, i);
40799 unsigned HOST_WIDE_INT ei;
40801 if (!CONST_INT_P (er))
40802 return 0;
40803 ei = INTVAL (er);
40804 if (ei >= nelt)
40805 return 0;
40806 ipar[i] = ei;
40809 switch (mode)
40811 case V8DFmode:
40812 /* In the 512-bit DFmode case, we can only move elements within
40813 a 128-bit lane. First fill the second part of the mask,
40814 then fallthru. */
40815 for (i = 4; i < 6; ++i)
40817 if (ipar[i] < 4 || ipar[i] >= 6)
40818 return 0;
40819 mask |= (ipar[i] - 4) << i;
40821 for (i = 6; i < 8; ++i)
40823 if (ipar[i] < 6)
40824 return 0;
40825 mask |= (ipar[i] - 6) << i;
40827 /* FALLTHRU */
40829 case V4DFmode:
40830 /* In the 256-bit DFmode case, we can only move elements within
40831 a 128-bit lane. */
40832 for (i = 0; i < 2; ++i)
40834 if (ipar[i] >= 2)
40835 return 0;
40836 mask |= ipar[i] << i;
40838 for (i = 2; i < 4; ++i)
40840 if (ipar[i] < 2)
40841 return 0;
40842 mask |= (ipar[i] - 2) << i;
40844 break;
40846 case V16SFmode:
40847 /* In 512 bit SFmode case, permutation in the upper 256 bits
40848 must mirror the permutation in the lower 256-bits. */
40849 for (i = 0; i < 8; ++i)
40850 if (ipar[i] + 8 != ipar[i + 8])
40851 return 0;
40852 /* FALLTHRU */
40854 case V8SFmode:
40855 /* In 256 bit SFmode case, we have full freedom of
40856 movement within the low 128-bit lane, but the high 128-bit
40857 lane must mirror the exact same pattern. */
40858 for (i = 0; i < 4; ++i)
40859 if (ipar[i] + 4 != ipar[i + 4])
40860 return 0;
40861 nelt = 4;
40862 /* FALLTHRU */
40864 case V2DFmode:
40865 case V4SFmode:
40866 /* In the 128-bit case, we've full freedom in the placement of
40867 the elements from the source operand. */
40868 for (i = 0; i < nelt; ++i)
40869 mask |= ipar[i] << (i * (nelt / 2));
40870 break;
40872 default:
40873 gcc_unreachable ();
40876 /* Make sure success has a non-zero value by adding one. */
40877 return mask + 1;
40880 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
40881 the expansion functions to turn the parallel back into a mask.
40882 The return value is 0 for no match and the imm8+1 for a match. */
40885 avx_vperm2f128_parallel (rtx par, machine_mode mode)
40887 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
40888 unsigned mask = 0;
40889 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
40891 if (XVECLEN (par, 0) != (int) nelt)
40892 return 0;
40894 /* Validate that all of the elements are constants, and not totally
40895 out of range. Copy the data into an integral array to make the
40896 subsequent checks easier. */
40897 for (i = 0; i < nelt; ++i)
40899 rtx er = XVECEXP (par, 0, i);
40900 unsigned HOST_WIDE_INT ei;
40902 if (!CONST_INT_P (er))
40903 return 0;
40904 ei = INTVAL (er);
40905 if (ei >= 2 * nelt)
40906 return 0;
40907 ipar[i] = ei;
40910 /* Validate that the halves of the permute are halves. */
40911 for (i = 0; i < nelt2 - 1; ++i)
40912 if (ipar[i] + 1 != ipar[i + 1])
40913 return 0;
40914 for (i = nelt2; i < nelt - 1; ++i)
40915 if (ipar[i] + 1 != ipar[i + 1])
40916 return 0;
40918 /* Reconstruct the mask. */
40919 for (i = 0; i < 2; ++i)
40921 unsigned e = ipar[i * nelt2];
40922 if (e % nelt2)
40923 return 0;
40924 e /= nelt2;
40925 mask |= e << (i * 4);
40928 /* Make sure success has a non-zero value by adding one. */
40929 return mask + 1;
40932 /* Return a register priority for hard reg REGNO. */
40933 static int
40934 ix86_register_priority (int hard_regno)
40936 /* ebp and r13 as the base always wants a displacement, r12 as the
40937 base always wants an index. So discourage their usage in an
40938 address. */
40939 if (hard_regno == R12_REG || hard_regno == R13_REG)
40940 return 0;
40941 if (hard_regno == BP_REG)
40942 return 1;
40943 /* New x86-64 int registers result in bigger code size. Discourage
40944 them. */
40945 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
40946 return 2;
40947 /* New x86-64 SSE registers result in bigger code size. Discourage
40948 them. */
40949 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
40950 return 2;
40951 /* Usage of AX register results in smaller code. Prefer it. */
40952 if (hard_regno == 0)
40953 return 4;
40954 return 3;
40957 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
40959 Put float CONST_DOUBLE in the constant pool instead of fp regs.
40960 QImode must go into class Q_REGS.
40961 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
40962 movdf to do mem-to-mem moves through integer regs. */
40964 static reg_class_t
40965 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
40967 machine_mode mode = GET_MODE (x);
40969 /* We're only allowed to return a subclass of CLASS. Many of the
40970 following checks fail for NO_REGS, so eliminate that early. */
40971 if (regclass == NO_REGS)
40972 return NO_REGS;
40974 /* All classes can load zeros. */
40975 if (x == CONST0_RTX (mode))
40976 return regclass;
40978 /* Force constants into memory if we are loading a (nonzero) constant into
40979 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
40980 instructions to load from a constant. */
40981 if (CONSTANT_P (x)
40982 && (MAYBE_MMX_CLASS_P (regclass)
40983 || MAYBE_SSE_CLASS_P (regclass)
40984 || MAYBE_MASK_CLASS_P (regclass)))
40985 return NO_REGS;
40987 /* Prefer SSE regs only, if we can use them for math. */
40988 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
40989 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
40991 /* Floating-point constants need more complex checks. */
40992 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
40994 /* General regs can load everything. */
40995 if (reg_class_subset_p (regclass, GENERAL_REGS))
40996 return regclass;
40998 /* Floats can load 0 and 1 plus some others. Note that we eliminated
40999 zero above. We only want to wind up preferring 80387 registers if
41000 we plan on doing computation with them. */
41001 if (TARGET_80387
41002 && standard_80387_constant_p (x) > 0)
41004 /* Limit class to non-sse. */
41005 if (regclass == FLOAT_SSE_REGS)
41006 return FLOAT_REGS;
41007 if (regclass == FP_TOP_SSE_REGS)
41008 return FP_TOP_REG;
41009 if (regclass == FP_SECOND_SSE_REGS)
41010 return FP_SECOND_REG;
41011 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
41012 return regclass;
41015 return NO_REGS;
41018 /* Generally when we see PLUS here, it's the function invariant
41019 (plus soft-fp const_int). Which can only be computed into general
41020 regs. */
41021 if (GET_CODE (x) == PLUS)
41022 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
41024 /* QImode constants are easy to load, but non-constant QImode data
41025 must go into Q_REGS. */
41026 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
41028 if (reg_class_subset_p (regclass, Q_REGS))
41029 return regclass;
41030 if (reg_class_subset_p (Q_REGS, regclass))
41031 return Q_REGS;
41032 return NO_REGS;
41035 return regclass;
41038 /* Discourage putting floating-point values in SSE registers unless
41039 SSE math is being used, and likewise for the 387 registers. */
41040 static reg_class_t
41041 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
41043 machine_mode mode = GET_MODE (x);
41045 /* Restrict the output reload class to the register bank that we are doing
41046 math on. If we would like not to return a subset of CLASS, reject this
41047 alternative: if reload cannot do this, it will still use its choice. */
41048 mode = GET_MODE (x);
41049 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
41050 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
41052 if (X87_FLOAT_MODE_P (mode))
41054 if (regclass == FP_TOP_SSE_REGS)
41055 return FP_TOP_REG;
41056 else if (regclass == FP_SECOND_SSE_REGS)
41057 return FP_SECOND_REG;
41058 else
41059 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
41062 return regclass;
41065 static reg_class_t
41066 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
41067 machine_mode mode, secondary_reload_info *sri)
41069 /* Double-word spills from general registers to non-offsettable memory
41070 references (zero-extended addresses) require special handling. */
41071 if (TARGET_64BIT
41072 && MEM_P (x)
41073 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
41074 && INTEGER_CLASS_P (rclass)
41075 && !offsettable_memref_p (x))
41077 sri->icode = (in_p
41078 ? CODE_FOR_reload_noff_load
41079 : CODE_FOR_reload_noff_store);
41080 /* Add the cost of moving address to a temporary. */
41081 sri->extra_cost = 1;
41083 return NO_REGS;
41086 /* QImode spills from non-QI registers require
41087 intermediate register on 32bit targets. */
41088 if (mode == QImode
41089 && (MAYBE_MASK_CLASS_P (rclass)
41090 || (!TARGET_64BIT && !in_p
41091 && INTEGER_CLASS_P (rclass)
41092 && MAYBE_NON_Q_CLASS_P (rclass))))
41094 int regno;
41096 if (REG_P (x))
41097 regno = REGNO (x);
41098 else
41099 regno = -1;
41101 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
41102 regno = true_regnum (x);
41104 /* Return Q_REGS if the operand is in memory. */
41105 if (regno == -1)
41106 return Q_REGS;
41109 /* This condition handles corner case where an expression involving
41110 pointers gets vectorized. We're trying to use the address of a
41111 stack slot as a vector initializer.
41113 (set (reg:V2DI 74 [ vect_cst_.2 ])
41114 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
41116 Eventually frame gets turned into sp+offset like this:
41118 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41119 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41120 (const_int 392 [0x188]))))
41122 That later gets turned into:
41124 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41125 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41126 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
41128 We'll have the following reload recorded:
41130 Reload 0: reload_in (DI) =
41131 (plus:DI (reg/f:DI 7 sp)
41132 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
41133 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41134 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
41135 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
41136 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41137 reload_reg_rtx: (reg:V2DI 22 xmm1)
41139 Which isn't going to work since SSE instructions can't handle scalar
41140 additions. Returning GENERAL_REGS forces the addition into integer
41141 register and reload can handle subsequent reloads without problems. */
41143 if (in_p && GET_CODE (x) == PLUS
41144 && SSE_CLASS_P (rclass)
41145 && SCALAR_INT_MODE_P (mode))
41146 return GENERAL_REGS;
41148 return NO_REGS;
41151 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
41153 static bool
41154 ix86_class_likely_spilled_p (reg_class_t rclass)
41156 switch (rclass)
41158 case AREG:
41159 case DREG:
41160 case CREG:
41161 case BREG:
41162 case AD_REGS:
41163 case SIREG:
41164 case DIREG:
41165 case SSE_FIRST_REG:
41166 case FP_TOP_REG:
41167 case FP_SECOND_REG:
41168 case BND_REGS:
41169 return true;
41171 default:
41172 break;
41175 return false;
41178 /* If we are copying between general and FP registers, we need a memory
41179 location. The same is true for SSE and MMX registers.
41181 To optimize register_move_cost performance, allow inline variant.
41183 The macro can't work reliably when one of the CLASSES is class containing
41184 registers from multiple units (SSE, MMX, integer). We avoid this by never
41185 combining those units in single alternative in the machine description.
41186 Ensure that this constraint holds to avoid unexpected surprises.
41188 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
41189 enforce these sanity checks. */
41191 static inline bool
41192 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41193 machine_mode mode, int strict)
41195 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
41196 return false;
41197 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
41198 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
41199 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
41200 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
41201 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
41202 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
41204 gcc_assert (!strict || lra_in_progress);
41205 return true;
41208 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
41209 return true;
41211 /* Between mask and general, we have moves no larger than word size. */
41212 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
41213 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
41214 return true;
41216 /* ??? This is a lie. We do have moves between mmx/general, and for
41217 mmx/sse2. But by saying we need secondary memory we discourage the
41218 register allocator from using the mmx registers unless needed. */
41219 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
41220 return true;
41222 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41224 /* SSE1 doesn't have any direct moves from other classes. */
41225 if (!TARGET_SSE2)
41226 return true;
41228 /* If the target says that inter-unit moves are more expensive
41229 than moving through memory, then don't generate them. */
41230 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
41231 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
41232 return true;
41234 /* Between SSE and general, we have moves no larger than word size. */
41235 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41236 return true;
41239 return false;
41242 bool
41243 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41244 machine_mode mode, int strict)
41246 return inline_secondary_memory_needed (class1, class2, mode, strict);
41249 /* Implement the TARGET_CLASS_MAX_NREGS hook.
41251 On the 80386, this is the size of MODE in words,
41252 except in the FP regs, where a single reg is always enough. */
41254 static unsigned char
41255 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
41257 if (MAYBE_INTEGER_CLASS_P (rclass))
41259 if (mode == XFmode)
41260 return (TARGET_64BIT ? 2 : 3);
41261 else if (mode == XCmode)
41262 return (TARGET_64BIT ? 4 : 6);
41263 else
41264 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
41266 else
41268 if (COMPLEX_MODE_P (mode))
41269 return 2;
41270 else
41271 return 1;
41275 /* Return true if the registers in CLASS cannot represent the change from
41276 modes FROM to TO. */
41278 bool
41279 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
41280 enum reg_class regclass)
41282 if (from == to)
41283 return false;
41285 /* x87 registers can't do subreg at all, as all values are reformatted
41286 to extended precision. */
41287 if (MAYBE_FLOAT_CLASS_P (regclass))
41288 return true;
41290 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
41292 /* Vector registers do not support QI or HImode loads. If we don't
41293 disallow a change to these modes, reload will assume it's ok to
41294 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
41295 the vec_dupv4hi pattern. */
41296 if (GET_MODE_SIZE (from) < 4)
41297 return true;
41300 return false;
41303 /* Return the cost of moving data of mode M between a
41304 register and memory. A value of 2 is the default; this cost is
41305 relative to those in `REGISTER_MOVE_COST'.
41307 This function is used extensively by register_move_cost that is used to
41308 build tables at startup. Make it inline in this case.
41309 When IN is 2, return maximum of in and out move cost.
41311 If moving between registers and memory is more expensive than
41312 between two registers, you should define this macro to express the
41313 relative cost.
41315 Model also increased moving costs of QImode registers in non
41316 Q_REGS classes.
41318 static inline int
41319 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
41320 int in)
41322 int cost;
41323 if (FLOAT_CLASS_P (regclass))
41325 int index;
41326 switch (mode)
41328 case SFmode:
41329 index = 0;
41330 break;
41331 case DFmode:
41332 index = 1;
41333 break;
41334 case XFmode:
41335 index = 2;
41336 break;
41337 default:
41338 return 100;
41340 if (in == 2)
41341 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
41342 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
41344 if (SSE_CLASS_P (regclass))
41346 int index;
41347 switch (GET_MODE_SIZE (mode))
41349 case 4:
41350 index = 0;
41351 break;
41352 case 8:
41353 index = 1;
41354 break;
41355 case 16:
41356 index = 2;
41357 break;
41358 default:
41359 return 100;
41361 if (in == 2)
41362 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
41363 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
41365 if (MMX_CLASS_P (regclass))
41367 int index;
41368 switch (GET_MODE_SIZE (mode))
41370 case 4:
41371 index = 0;
41372 break;
41373 case 8:
41374 index = 1;
41375 break;
41376 default:
41377 return 100;
41379 if (in)
41380 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
41381 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
41383 switch (GET_MODE_SIZE (mode))
41385 case 1:
41386 if (Q_CLASS_P (regclass) || TARGET_64BIT)
41388 if (!in)
41389 return ix86_cost->int_store[0];
41390 if (TARGET_PARTIAL_REG_DEPENDENCY
41391 && optimize_function_for_speed_p (cfun))
41392 cost = ix86_cost->movzbl_load;
41393 else
41394 cost = ix86_cost->int_load[0];
41395 if (in == 2)
41396 return MAX (cost, ix86_cost->int_store[0]);
41397 return cost;
41399 else
41401 if (in == 2)
41402 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
41403 if (in)
41404 return ix86_cost->movzbl_load;
41405 else
41406 return ix86_cost->int_store[0] + 4;
41408 break;
41409 case 2:
41410 if (in == 2)
41411 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
41412 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
41413 default:
41414 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
41415 if (mode == TFmode)
41416 mode = XFmode;
41417 if (in == 2)
41418 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
41419 else if (in)
41420 cost = ix86_cost->int_load[2];
41421 else
41422 cost = ix86_cost->int_store[2];
41423 return (cost * (((int) GET_MODE_SIZE (mode)
41424 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
41428 static int
41429 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
41430 bool in)
41432 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
41436 /* Return the cost of moving data from a register in class CLASS1 to
41437 one in class CLASS2.
41439 It is not required that the cost always equal 2 when FROM is the same as TO;
41440 on some machines it is expensive to move between registers if they are not
41441 general registers. */
41443 static int
41444 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
41445 reg_class_t class2_i)
41447 enum reg_class class1 = (enum reg_class) class1_i;
41448 enum reg_class class2 = (enum reg_class) class2_i;
41450 /* In case we require secondary memory, compute cost of the store followed
41451 by load. In order to avoid bad register allocation choices, we need
41452 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
41454 if (inline_secondary_memory_needed (class1, class2, mode, 0))
41456 int cost = 1;
41458 cost += inline_memory_move_cost (mode, class1, 2);
41459 cost += inline_memory_move_cost (mode, class2, 2);
41461 /* In case of copying from general_purpose_register we may emit multiple
41462 stores followed by single load causing memory size mismatch stall.
41463 Count this as arbitrarily high cost of 20. */
41464 if (targetm.class_max_nregs (class1, mode)
41465 > targetm.class_max_nregs (class2, mode))
41466 cost += 20;
41468 /* In the case of FP/MMX moves, the registers actually overlap, and we
41469 have to switch modes in order to treat them differently. */
41470 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
41471 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
41472 cost += 20;
41474 return cost;
41477 /* Moves between SSE/MMX and integer unit are expensive. */
41478 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
41479 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41481 /* ??? By keeping returned value relatively high, we limit the number
41482 of moves between integer and MMX/SSE registers for all targets.
41483 Additionally, high value prevents problem with x86_modes_tieable_p(),
41484 where integer modes in MMX/SSE registers are not tieable
41485 because of missing QImode and HImode moves to, from or between
41486 MMX/SSE registers. */
41487 return MAX (8, ix86_cost->mmxsse_to_integer);
41489 if (MAYBE_FLOAT_CLASS_P (class1))
41490 return ix86_cost->fp_move;
41491 if (MAYBE_SSE_CLASS_P (class1))
41492 return ix86_cost->sse_move;
41493 if (MAYBE_MMX_CLASS_P (class1))
41494 return ix86_cost->mmx_move;
41495 return 2;
41498 /* Return TRUE if hard register REGNO can hold a value of machine-mode
41499 MODE. */
41501 bool
41502 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
41504 /* Flags and only flags can only hold CCmode values. */
41505 if (CC_REGNO_P (regno))
41506 return GET_MODE_CLASS (mode) == MODE_CC;
41507 if (GET_MODE_CLASS (mode) == MODE_CC
41508 || GET_MODE_CLASS (mode) == MODE_RANDOM
41509 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
41510 return false;
41511 if (STACK_REGNO_P (regno))
41512 return VALID_FP_MODE_P (mode);
41513 if (MASK_REGNO_P (regno))
41514 return (VALID_MASK_REG_MODE (mode)
41515 || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode)));
41516 if (BND_REGNO_P (regno))
41517 return VALID_BND_REG_MODE (mode);
41518 if (SSE_REGNO_P (regno))
41520 /* We implement the move patterns for all vector modes into and
41521 out of SSE registers, even when no operation instructions
41522 are available. */
41524 /* For AVX-512 we allow, regardless of regno:
41525 - XI mode
41526 - any of 512-bit wide vector mode
41527 - any scalar mode. */
41528 if (TARGET_AVX512F
41529 && (mode == XImode
41530 || VALID_AVX512F_REG_MODE (mode)
41531 || VALID_AVX512F_SCALAR_MODE (mode)))
41532 return true;
41534 /* TODO check for QI/HI scalars. */
41535 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
41536 if (TARGET_AVX512VL
41537 && (mode == OImode
41538 || mode == TImode
41539 || VALID_AVX256_REG_MODE (mode)
41540 || VALID_AVX512VL_128_REG_MODE (mode)))
41541 return true;
41543 /* xmm16-xmm31 are only available for AVX-512. */
41544 if (EXT_REX_SSE_REGNO_P (regno))
41545 return false;
41547 /* OImode and AVX modes are available only when AVX is enabled. */
41548 return ((TARGET_AVX
41549 && VALID_AVX256_REG_OR_OI_MODE (mode))
41550 || VALID_SSE_REG_MODE (mode)
41551 || VALID_SSE2_REG_MODE (mode)
41552 || VALID_MMX_REG_MODE (mode)
41553 || VALID_MMX_REG_MODE_3DNOW (mode));
41555 if (MMX_REGNO_P (regno))
41557 /* We implement the move patterns for 3DNOW modes even in MMX mode,
41558 so if the register is available at all, then we can move data of
41559 the given mode into or out of it. */
41560 return (VALID_MMX_REG_MODE (mode)
41561 || VALID_MMX_REG_MODE_3DNOW (mode));
41564 if (mode == QImode)
41566 /* Take care for QImode values - they can be in non-QI regs,
41567 but then they do cause partial register stalls. */
41568 if (ANY_QI_REGNO_P (regno))
41569 return true;
41570 if (!TARGET_PARTIAL_REG_STALL)
41571 return true;
41572 /* LRA checks if the hard register is OK for the given mode.
41573 QImode values can live in non-QI regs, so we allow all
41574 registers here. */
41575 if (lra_in_progress)
41576 return true;
41577 return !can_create_pseudo_p ();
41579 /* We handle both integer and floats in the general purpose registers. */
41580 else if (VALID_INT_MODE_P (mode))
41581 return true;
41582 else if (VALID_FP_MODE_P (mode))
41583 return true;
41584 else if (VALID_DFP_MODE_P (mode))
41585 return true;
41586 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
41587 on to use that value in smaller contexts, this can easily force a
41588 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
41589 supporting DImode, allow it. */
41590 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
41591 return true;
41593 return false;
41596 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
41597 tieable integer mode. */
41599 static bool
41600 ix86_tieable_integer_mode_p (machine_mode mode)
41602 switch (mode)
41604 case HImode:
41605 case SImode:
41606 return true;
41608 case QImode:
41609 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
41611 case DImode:
41612 return TARGET_64BIT;
41614 default:
41615 return false;
41619 /* Return true if MODE1 is accessible in a register that can hold MODE2
41620 without copying. That is, all register classes that can hold MODE2
41621 can also hold MODE1. */
41623 bool
41624 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
41626 if (mode1 == mode2)
41627 return true;
41629 if (ix86_tieable_integer_mode_p (mode1)
41630 && ix86_tieable_integer_mode_p (mode2))
41631 return true;
41633 /* MODE2 being XFmode implies fp stack or general regs, which means we
41634 can tie any smaller floating point modes to it. Note that we do not
41635 tie this with TFmode. */
41636 if (mode2 == XFmode)
41637 return mode1 == SFmode || mode1 == DFmode;
41639 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
41640 that we can tie it with SFmode. */
41641 if (mode2 == DFmode)
41642 return mode1 == SFmode;
41644 /* If MODE2 is only appropriate for an SSE register, then tie with
41645 any other mode acceptable to SSE registers. */
41646 if (GET_MODE_SIZE (mode2) == 32
41647 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41648 return (GET_MODE_SIZE (mode1) == 32
41649 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41650 if (GET_MODE_SIZE (mode2) == 16
41651 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41652 return (GET_MODE_SIZE (mode1) == 16
41653 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41655 /* If MODE2 is appropriate for an MMX register, then tie
41656 with any other mode acceptable to MMX registers. */
41657 if (GET_MODE_SIZE (mode2) == 8
41658 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
41659 return (GET_MODE_SIZE (mode1) == 8
41660 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
41662 return false;
41665 /* Return the cost of moving between two registers of mode MODE. */
41667 static int
41668 ix86_set_reg_reg_cost (machine_mode mode)
41670 unsigned int units = UNITS_PER_WORD;
41672 switch (GET_MODE_CLASS (mode))
41674 default:
41675 break;
41677 case MODE_CC:
41678 units = GET_MODE_SIZE (CCmode);
41679 break;
41681 case MODE_FLOAT:
41682 if ((TARGET_SSE && mode == TFmode)
41683 || (TARGET_80387 && mode == XFmode)
41684 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
41685 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
41686 units = GET_MODE_SIZE (mode);
41687 break;
41689 case MODE_COMPLEX_FLOAT:
41690 if ((TARGET_SSE && mode == TCmode)
41691 || (TARGET_80387 && mode == XCmode)
41692 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
41693 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
41694 units = GET_MODE_SIZE (mode);
41695 break;
41697 case MODE_VECTOR_INT:
41698 case MODE_VECTOR_FLOAT:
41699 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41700 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41701 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41702 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41703 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
41704 units = GET_MODE_SIZE (mode);
41707 /* Return the cost of moving between two registers of mode MODE,
41708 assuming that the move will be in pieces of at most UNITS bytes. */
41709 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
41712 /* Compute a (partial) cost for rtx X. Return true if the complete
41713 cost has been computed, and false if subexpressions should be
41714 scanned. In either case, *TOTAL contains the cost result. */
41716 static bool
41717 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
41718 bool speed)
41720 rtx mask;
41721 enum rtx_code code = (enum rtx_code) code_i;
41722 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
41723 machine_mode mode = GET_MODE (x);
41724 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
41726 switch (code)
41728 case SET:
41729 if (register_operand (SET_DEST (x), VOIDmode)
41730 && reg_or_0_operand (SET_SRC (x), VOIDmode))
41732 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
41733 return true;
41735 return false;
41737 case CONST_INT:
41738 case CONST:
41739 case LABEL_REF:
41740 case SYMBOL_REF:
41741 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
41742 *total = 3;
41743 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
41744 *total = 2;
41745 else if (flag_pic && SYMBOLIC_CONST (x)
41746 && !(TARGET_64BIT
41747 && (GET_CODE (x) == LABEL_REF
41748 || (GET_CODE (x) == SYMBOL_REF
41749 && SYMBOL_REF_LOCAL_P (x)))))
41750 *total = 1;
41751 else
41752 *total = 0;
41753 return true;
41755 case CONST_DOUBLE:
41756 if (mode == VOIDmode)
41758 *total = 0;
41759 return true;
41761 switch (standard_80387_constant_p (x))
41763 case 1: /* 0.0 */
41764 *total = 1;
41765 return true;
41766 default: /* Other constants */
41767 *total = 2;
41768 return true;
41769 case 0:
41770 case -1:
41771 break;
41773 if (SSE_FLOAT_MODE_P (mode))
41775 case CONST_VECTOR:
41776 switch (standard_sse_constant_p (x))
41778 case 0:
41779 break;
41780 case 1: /* 0: xor eliminates false dependency */
41781 *total = 0;
41782 return true;
41783 default: /* -1: cmp contains false dependency */
41784 *total = 1;
41785 return true;
41788 /* Fall back to (MEM (SYMBOL_REF)), since that's where
41789 it'll probably end up. Add a penalty for size. */
41790 *total = (COSTS_N_INSNS (1)
41791 + (flag_pic != 0 && !TARGET_64BIT)
41792 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
41793 return true;
41795 case ZERO_EXTEND:
41796 /* The zero extensions is often completely free on x86_64, so make
41797 it as cheap as possible. */
41798 if (TARGET_64BIT && mode == DImode
41799 && GET_MODE (XEXP (x, 0)) == SImode)
41800 *total = 1;
41801 else if (TARGET_ZERO_EXTEND_WITH_AND)
41802 *total = cost->add;
41803 else
41804 *total = cost->movzx;
41805 return false;
41807 case SIGN_EXTEND:
41808 *total = cost->movsx;
41809 return false;
41811 case ASHIFT:
41812 if (SCALAR_INT_MODE_P (mode)
41813 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
41814 && CONST_INT_P (XEXP (x, 1)))
41816 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
41817 if (value == 1)
41819 *total = cost->add;
41820 return false;
41822 if ((value == 2 || value == 3)
41823 && cost->lea <= cost->shift_const)
41825 *total = cost->lea;
41826 return false;
41829 /* FALLTHRU */
41831 case ROTATE:
41832 case ASHIFTRT:
41833 case LSHIFTRT:
41834 case ROTATERT:
41835 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
41837 /* ??? Should be SSE vector operation cost. */
41838 /* At least for published AMD latencies, this really is the same
41839 as the latency for a simple fpu operation like fabs. */
41840 /* V*QImode is emulated with 1-11 insns. */
41841 if (mode == V16QImode || mode == V32QImode)
41843 int count = 11;
41844 if (TARGET_XOP && mode == V16QImode)
41846 /* For XOP we use vpshab, which requires a broadcast of the
41847 value to the variable shift insn. For constants this
41848 means a V16Q const in mem; even when we can perform the
41849 shift with one insn set the cost to prefer paddb. */
41850 if (CONSTANT_P (XEXP (x, 1)))
41852 *total = (cost->fabs
41853 + rtx_cost (XEXP (x, 0), code, 0, speed)
41854 + (speed ? 2 : COSTS_N_BYTES (16)));
41855 return true;
41857 count = 3;
41859 else if (TARGET_SSSE3)
41860 count = 7;
41861 *total = cost->fabs * count;
41863 else
41864 *total = cost->fabs;
41866 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41868 if (CONST_INT_P (XEXP (x, 1)))
41870 if (INTVAL (XEXP (x, 1)) > 32)
41871 *total = cost->shift_const + COSTS_N_INSNS (2);
41872 else
41873 *total = cost->shift_const * 2;
41875 else
41877 if (GET_CODE (XEXP (x, 1)) == AND)
41878 *total = cost->shift_var * 2;
41879 else
41880 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
41883 else
41885 if (CONST_INT_P (XEXP (x, 1)))
41886 *total = cost->shift_const;
41887 else if (GET_CODE (XEXP (x, 1)) == SUBREG
41888 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
41890 /* Return the cost after shift-and truncation. */
41891 *total = cost->shift_var;
41892 return true;
41894 else
41895 *total = cost->shift_var;
41897 return false;
41899 case FMA:
41901 rtx sub;
41903 gcc_assert (FLOAT_MODE_P (mode));
41904 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
41906 /* ??? SSE scalar/vector cost should be used here. */
41907 /* ??? Bald assumption that fma has the same cost as fmul. */
41908 *total = cost->fmul;
41909 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
41911 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
41912 sub = XEXP (x, 0);
41913 if (GET_CODE (sub) == NEG)
41914 sub = XEXP (sub, 0);
41915 *total += rtx_cost (sub, FMA, 0, speed);
41917 sub = XEXP (x, 2);
41918 if (GET_CODE (sub) == NEG)
41919 sub = XEXP (sub, 0);
41920 *total += rtx_cost (sub, FMA, 2, speed);
41921 return true;
41924 case MULT:
41925 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
41927 /* ??? SSE scalar cost should be used here. */
41928 *total = cost->fmul;
41929 return false;
41931 else if (X87_FLOAT_MODE_P (mode))
41933 *total = cost->fmul;
41934 return false;
41936 else if (FLOAT_MODE_P (mode))
41938 /* ??? SSE vector cost should be used here. */
41939 *total = cost->fmul;
41940 return false;
41942 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
41944 /* V*QImode is emulated with 7-13 insns. */
41945 if (mode == V16QImode || mode == V32QImode)
41947 int extra = 11;
41948 if (TARGET_XOP && mode == V16QImode)
41949 extra = 5;
41950 else if (TARGET_SSSE3)
41951 extra = 6;
41952 *total = cost->fmul * 2 + cost->fabs * extra;
41954 /* V*DImode is emulated with 5-8 insns. */
41955 else if (mode == V2DImode || mode == V4DImode)
41957 if (TARGET_XOP && mode == V2DImode)
41958 *total = cost->fmul * 2 + cost->fabs * 3;
41959 else
41960 *total = cost->fmul * 3 + cost->fabs * 5;
41962 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
41963 insns, including two PMULUDQ. */
41964 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
41965 *total = cost->fmul * 2 + cost->fabs * 5;
41966 else
41967 *total = cost->fmul;
41968 return false;
41970 else
41972 rtx op0 = XEXP (x, 0);
41973 rtx op1 = XEXP (x, 1);
41974 int nbits;
41975 if (CONST_INT_P (XEXP (x, 1)))
41977 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
41978 for (nbits = 0; value != 0; value &= value - 1)
41979 nbits++;
41981 else
41982 /* This is arbitrary. */
41983 nbits = 7;
41985 /* Compute costs correctly for widening multiplication. */
41986 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
41987 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
41988 == GET_MODE_SIZE (mode))
41990 int is_mulwiden = 0;
41991 machine_mode inner_mode = GET_MODE (op0);
41993 if (GET_CODE (op0) == GET_CODE (op1))
41994 is_mulwiden = 1, op1 = XEXP (op1, 0);
41995 else if (CONST_INT_P (op1))
41997 if (GET_CODE (op0) == SIGN_EXTEND)
41998 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
41999 == INTVAL (op1);
42000 else
42001 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
42004 if (is_mulwiden)
42005 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
42008 *total = (cost->mult_init[MODE_INDEX (mode)]
42009 + nbits * cost->mult_bit
42010 + rtx_cost (op0, outer_code, opno, speed)
42011 + rtx_cost (op1, outer_code, opno, speed));
42013 return true;
42016 case DIV:
42017 case UDIV:
42018 case MOD:
42019 case UMOD:
42020 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42021 /* ??? SSE cost should be used here. */
42022 *total = cost->fdiv;
42023 else if (X87_FLOAT_MODE_P (mode))
42024 *total = cost->fdiv;
42025 else if (FLOAT_MODE_P (mode))
42026 /* ??? SSE vector cost should be used here. */
42027 *total = cost->fdiv;
42028 else
42029 *total = cost->divide[MODE_INDEX (mode)];
42030 return false;
42032 case PLUS:
42033 if (GET_MODE_CLASS (mode) == MODE_INT
42034 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
42036 if (GET_CODE (XEXP (x, 0)) == PLUS
42037 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
42038 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
42039 && CONSTANT_P (XEXP (x, 1)))
42041 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
42042 if (val == 2 || val == 4 || val == 8)
42044 *total = cost->lea;
42045 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42046 outer_code, opno, speed);
42047 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
42048 outer_code, opno, speed);
42049 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42050 return true;
42053 else if (GET_CODE (XEXP (x, 0)) == MULT
42054 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
42056 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
42057 if (val == 2 || val == 4 || val == 8)
42059 *total = cost->lea;
42060 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42061 outer_code, opno, speed);
42062 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42063 return true;
42066 else if (GET_CODE (XEXP (x, 0)) == PLUS)
42068 *total = cost->lea;
42069 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42070 outer_code, opno, speed);
42071 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42072 outer_code, opno, speed);
42073 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42074 return true;
42077 /* FALLTHRU */
42079 case MINUS:
42080 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42082 /* ??? SSE cost should be used here. */
42083 *total = cost->fadd;
42084 return false;
42086 else if (X87_FLOAT_MODE_P (mode))
42088 *total = cost->fadd;
42089 return false;
42091 else if (FLOAT_MODE_P (mode))
42093 /* ??? SSE vector cost should be used here. */
42094 *total = cost->fadd;
42095 return false;
42097 /* FALLTHRU */
42099 case AND:
42100 case IOR:
42101 case XOR:
42102 if (GET_MODE_CLASS (mode) == MODE_INT
42103 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42105 *total = (cost->add * 2
42106 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
42107 << (GET_MODE (XEXP (x, 0)) != DImode))
42108 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
42109 << (GET_MODE (XEXP (x, 1)) != DImode)));
42110 return true;
42112 /* FALLTHRU */
42114 case NEG:
42115 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42117 /* ??? SSE cost should be used here. */
42118 *total = cost->fchs;
42119 return false;
42121 else if (X87_FLOAT_MODE_P (mode))
42123 *total = cost->fchs;
42124 return false;
42126 else if (FLOAT_MODE_P (mode))
42128 /* ??? SSE vector cost should be used here. */
42129 *total = cost->fchs;
42130 return false;
42132 /* FALLTHRU */
42134 case NOT:
42135 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42137 /* ??? Should be SSE vector operation cost. */
42138 /* At least for published AMD latencies, this really is the same
42139 as the latency for a simple fpu operation like fabs. */
42140 *total = cost->fabs;
42142 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42143 *total = cost->add * 2;
42144 else
42145 *total = cost->add;
42146 return false;
42148 case COMPARE:
42149 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
42150 && XEXP (XEXP (x, 0), 1) == const1_rtx
42151 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
42152 && XEXP (x, 1) == const0_rtx)
42154 /* This kind of construct is implemented using test[bwl].
42155 Treat it as if we had an AND. */
42156 *total = (cost->add
42157 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
42158 + rtx_cost (const1_rtx, outer_code, opno, speed));
42159 return true;
42161 return false;
42163 case FLOAT_EXTEND:
42164 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
42165 *total = 0;
42166 return false;
42168 case ABS:
42169 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42170 /* ??? SSE cost should be used here. */
42171 *total = cost->fabs;
42172 else if (X87_FLOAT_MODE_P (mode))
42173 *total = cost->fabs;
42174 else if (FLOAT_MODE_P (mode))
42175 /* ??? SSE vector cost should be used here. */
42176 *total = cost->fabs;
42177 return false;
42179 case SQRT:
42180 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42181 /* ??? SSE cost should be used here. */
42182 *total = cost->fsqrt;
42183 else if (X87_FLOAT_MODE_P (mode))
42184 *total = cost->fsqrt;
42185 else if (FLOAT_MODE_P (mode))
42186 /* ??? SSE vector cost should be used here. */
42187 *total = cost->fsqrt;
42188 return false;
42190 case UNSPEC:
42191 if (XINT (x, 1) == UNSPEC_TP)
42192 *total = 0;
42193 return false;
42195 case VEC_SELECT:
42196 case VEC_CONCAT:
42197 case VEC_DUPLICATE:
42198 /* ??? Assume all of these vector manipulation patterns are
42199 recognizable. In which case they all pretty much have the
42200 same cost. */
42201 *total = cost->fabs;
42202 return true;
42203 case VEC_MERGE:
42204 mask = XEXP (x, 2);
42205 /* This is masked instruction, assume the same cost,
42206 as nonmasked variant. */
42207 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
42208 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
42209 else
42210 *total = cost->fabs;
42211 return true;
42213 default:
42214 return false;
42218 #if TARGET_MACHO
42220 static int current_machopic_label_num;
42222 /* Given a symbol name and its associated stub, write out the
42223 definition of the stub. */
42225 void
42226 machopic_output_stub (FILE *file, const char *symb, const char *stub)
42228 unsigned int length;
42229 char *binder_name, *symbol_name, lazy_ptr_name[32];
42230 int label = ++current_machopic_label_num;
42232 /* For 64-bit we shouldn't get here. */
42233 gcc_assert (!TARGET_64BIT);
42235 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
42236 symb = targetm.strip_name_encoding (symb);
42238 length = strlen (stub);
42239 binder_name = XALLOCAVEC (char, length + 32);
42240 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
42242 length = strlen (symb);
42243 symbol_name = XALLOCAVEC (char, length + 32);
42244 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
42246 sprintf (lazy_ptr_name, "L%d$lz", label);
42248 if (MACHOPIC_ATT_STUB)
42249 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
42250 else if (MACHOPIC_PURE)
42251 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
42252 else
42253 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
42255 fprintf (file, "%s:\n", stub);
42256 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42258 if (MACHOPIC_ATT_STUB)
42260 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
42262 else if (MACHOPIC_PURE)
42264 /* PIC stub. */
42265 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42266 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
42267 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
42268 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
42269 label, lazy_ptr_name, label);
42270 fprintf (file, "\tjmp\t*%%ecx\n");
42272 else
42273 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
42275 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
42276 it needs no stub-binding-helper. */
42277 if (MACHOPIC_ATT_STUB)
42278 return;
42280 fprintf (file, "%s:\n", binder_name);
42282 if (MACHOPIC_PURE)
42284 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
42285 fprintf (file, "\tpushl\t%%ecx\n");
42287 else
42288 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
42290 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
42292 /* N.B. Keep the correspondence of these
42293 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
42294 old-pic/new-pic/non-pic stubs; altering this will break
42295 compatibility with existing dylibs. */
42296 if (MACHOPIC_PURE)
42298 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42299 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
42301 else
42302 /* 16-byte -mdynamic-no-pic stub. */
42303 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
42305 fprintf (file, "%s:\n", lazy_ptr_name);
42306 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42307 fprintf (file, ASM_LONG "%s\n", binder_name);
42309 #endif /* TARGET_MACHO */
42311 /* Order the registers for register allocator. */
42313 void
42314 x86_order_regs_for_local_alloc (void)
42316 int pos = 0;
42317 int i;
42319 /* First allocate the local general purpose registers. */
42320 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42321 if (GENERAL_REGNO_P (i) && call_used_regs[i])
42322 reg_alloc_order [pos++] = i;
42324 /* Global general purpose registers. */
42325 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42326 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
42327 reg_alloc_order [pos++] = i;
42329 /* x87 registers come first in case we are doing FP math
42330 using them. */
42331 if (!TARGET_SSE_MATH)
42332 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42333 reg_alloc_order [pos++] = i;
42335 /* SSE registers. */
42336 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
42337 reg_alloc_order [pos++] = i;
42338 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
42339 reg_alloc_order [pos++] = i;
42341 /* Extended REX SSE registers. */
42342 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
42343 reg_alloc_order [pos++] = i;
42345 /* Mask register. */
42346 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
42347 reg_alloc_order [pos++] = i;
42349 /* MPX bound registers. */
42350 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
42351 reg_alloc_order [pos++] = i;
42353 /* x87 registers. */
42354 if (TARGET_SSE_MATH)
42355 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42356 reg_alloc_order [pos++] = i;
42358 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
42359 reg_alloc_order [pos++] = i;
42361 /* Initialize the rest of array as we do not allocate some registers
42362 at all. */
42363 while (pos < FIRST_PSEUDO_REGISTER)
42364 reg_alloc_order [pos++] = 0;
42367 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
42368 in struct attribute_spec handler. */
42369 static tree
42370 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
42371 tree args,
42372 int,
42373 bool *no_add_attrs)
42375 if (TREE_CODE (*node) != FUNCTION_TYPE
42376 && TREE_CODE (*node) != METHOD_TYPE
42377 && TREE_CODE (*node) != FIELD_DECL
42378 && TREE_CODE (*node) != TYPE_DECL)
42380 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42381 name);
42382 *no_add_attrs = true;
42383 return NULL_TREE;
42385 if (TARGET_64BIT)
42387 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
42388 name);
42389 *no_add_attrs = true;
42390 return NULL_TREE;
42392 if (is_attribute_p ("callee_pop_aggregate_return", name))
42394 tree cst;
42396 cst = TREE_VALUE (args);
42397 if (TREE_CODE (cst) != INTEGER_CST)
42399 warning (OPT_Wattributes,
42400 "%qE attribute requires an integer constant argument",
42401 name);
42402 *no_add_attrs = true;
42404 else if (compare_tree_int (cst, 0) != 0
42405 && compare_tree_int (cst, 1) != 0)
42407 warning (OPT_Wattributes,
42408 "argument to %qE attribute is neither zero, nor one",
42409 name);
42410 *no_add_attrs = true;
42413 return NULL_TREE;
42416 return NULL_TREE;
42419 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
42420 struct attribute_spec.handler. */
42421 static tree
42422 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
42423 bool *no_add_attrs)
42425 if (TREE_CODE (*node) != FUNCTION_TYPE
42426 && TREE_CODE (*node) != METHOD_TYPE
42427 && TREE_CODE (*node) != FIELD_DECL
42428 && TREE_CODE (*node) != TYPE_DECL)
42430 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42431 name);
42432 *no_add_attrs = true;
42433 return NULL_TREE;
42436 /* Can combine regparm with all attributes but fastcall. */
42437 if (is_attribute_p ("ms_abi", name))
42439 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
42441 error ("ms_abi and sysv_abi attributes are not compatible");
42444 return NULL_TREE;
42446 else if (is_attribute_p ("sysv_abi", name))
42448 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
42450 error ("ms_abi and sysv_abi attributes are not compatible");
42453 return NULL_TREE;
42456 return NULL_TREE;
42459 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
42460 struct attribute_spec.handler. */
42461 static tree
42462 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
42463 bool *no_add_attrs)
42465 tree *type = NULL;
42466 if (DECL_P (*node))
42468 if (TREE_CODE (*node) == TYPE_DECL)
42469 type = &TREE_TYPE (*node);
42471 else
42472 type = node;
42474 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
42476 warning (OPT_Wattributes, "%qE attribute ignored",
42477 name);
42478 *no_add_attrs = true;
42481 else if ((is_attribute_p ("ms_struct", name)
42482 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
42483 || ((is_attribute_p ("gcc_struct", name)
42484 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
42486 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
42487 name);
42488 *no_add_attrs = true;
42491 return NULL_TREE;
42494 static tree
42495 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
42496 bool *no_add_attrs)
42498 if (TREE_CODE (*node) != FUNCTION_DECL)
42500 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42501 name);
42502 *no_add_attrs = true;
42504 return NULL_TREE;
42507 static bool
42508 ix86_ms_bitfield_layout_p (const_tree record_type)
42510 return ((TARGET_MS_BITFIELD_LAYOUT
42511 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
42512 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
42515 /* Returns an expression indicating where the this parameter is
42516 located on entry to the FUNCTION. */
42518 static rtx
42519 x86_this_parameter (tree function)
42521 tree type = TREE_TYPE (function);
42522 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
42523 int nregs;
42525 if (TARGET_64BIT)
42527 const int *parm_regs;
42529 if (ix86_function_type_abi (type) == MS_ABI)
42530 parm_regs = x86_64_ms_abi_int_parameter_registers;
42531 else
42532 parm_regs = x86_64_int_parameter_registers;
42533 return gen_rtx_REG (Pmode, parm_regs[aggr]);
42536 nregs = ix86_function_regparm (type, function);
42538 if (nregs > 0 && !stdarg_p (type))
42540 int regno;
42541 unsigned int ccvt = ix86_get_callcvt (type);
42543 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42544 regno = aggr ? DX_REG : CX_REG;
42545 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42547 regno = CX_REG;
42548 if (aggr)
42549 return gen_rtx_MEM (SImode,
42550 plus_constant (Pmode, stack_pointer_rtx, 4));
42552 else
42554 regno = AX_REG;
42555 if (aggr)
42557 regno = DX_REG;
42558 if (nregs == 1)
42559 return gen_rtx_MEM (SImode,
42560 plus_constant (Pmode,
42561 stack_pointer_rtx, 4));
42564 return gen_rtx_REG (SImode, regno);
42567 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
42568 aggr ? 8 : 4));
42571 /* Determine whether x86_output_mi_thunk can succeed. */
42573 static bool
42574 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
42575 const_tree function)
42577 /* 64-bit can handle anything. */
42578 if (TARGET_64BIT)
42579 return true;
42581 /* For 32-bit, everything's fine if we have one free register. */
42582 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
42583 return true;
42585 /* Need a free register for vcall_offset. */
42586 if (vcall_offset)
42587 return false;
42589 /* Need a free register for GOT references. */
42590 if (flag_pic && !targetm.binds_local_p (function))
42591 return false;
42593 /* Otherwise ok. */
42594 return true;
42597 /* Output the assembler code for a thunk function. THUNK_DECL is the
42598 declaration for the thunk function itself, FUNCTION is the decl for
42599 the target function. DELTA is an immediate constant offset to be
42600 added to THIS. If VCALL_OFFSET is nonzero, the word at
42601 *(*this + vcall_offset) should be added to THIS. */
42603 static void
42604 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
42605 HOST_WIDE_INT vcall_offset, tree function)
42607 rtx this_param = x86_this_parameter (function);
42608 rtx this_reg, tmp, fnaddr;
42609 unsigned int tmp_regno;
42610 rtx_insn *insn;
42612 if (TARGET_64BIT)
42613 tmp_regno = R10_REG;
42614 else
42616 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
42617 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42618 tmp_regno = AX_REG;
42619 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42620 tmp_regno = DX_REG;
42621 else
42622 tmp_regno = CX_REG;
42625 emit_note (NOTE_INSN_PROLOGUE_END);
42627 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
42628 pull it in now and let DELTA benefit. */
42629 if (REG_P (this_param))
42630 this_reg = this_param;
42631 else if (vcall_offset)
42633 /* Put the this parameter into %eax. */
42634 this_reg = gen_rtx_REG (Pmode, AX_REG);
42635 emit_move_insn (this_reg, this_param);
42637 else
42638 this_reg = NULL_RTX;
42640 /* Adjust the this parameter by a fixed constant. */
42641 if (delta)
42643 rtx delta_rtx = GEN_INT (delta);
42644 rtx delta_dst = this_reg ? this_reg : this_param;
42646 if (TARGET_64BIT)
42648 if (!x86_64_general_operand (delta_rtx, Pmode))
42650 tmp = gen_rtx_REG (Pmode, tmp_regno);
42651 emit_move_insn (tmp, delta_rtx);
42652 delta_rtx = tmp;
42656 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
42659 /* Adjust the this parameter by a value stored in the vtable. */
42660 if (vcall_offset)
42662 rtx vcall_addr, vcall_mem, this_mem;
42664 tmp = gen_rtx_REG (Pmode, tmp_regno);
42666 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
42667 if (Pmode != ptr_mode)
42668 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
42669 emit_move_insn (tmp, this_mem);
42671 /* Adjust the this parameter. */
42672 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
42673 if (TARGET_64BIT
42674 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
42676 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
42677 emit_move_insn (tmp2, GEN_INT (vcall_offset));
42678 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
42681 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
42682 if (Pmode != ptr_mode)
42683 emit_insn (gen_addsi_1_zext (this_reg,
42684 gen_rtx_REG (ptr_mode,
42685 REGNO (this_reg)),
42686 vcall_mem));
42687 else
42688 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
42691 /* If necessary, drop THIS back to its stack slot. */
42692 if (this_reg && this_reg != this_param)
42693 emit_move_insn (this_param, this_reg);
42695 fnaddr = XEXP (DECL_RTL (function), 0);
42696 if (TARGET_64BIT)
42698 if (!flag_pic || targetm.binds_local_p (function)
42699 || TARGET_PECOFF)
42701 else
42703 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
42704 tmp = gen_rtx_CONST (Pmode, tmp);
42705 fnaddr = gen_const_mem (Pmode, tmp);
42708 else
42710 if (!flag_pic || targetm.binds_local_p (function))
42712 #if TARGET_MACHO
42713 else if (TARGET_MACHO)
42715 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
42716 fnaddr = XEXP (fnaddr, 0);
42718 #endif /* TARGET_MACHO */
42719 else
42721 tmp = gen_rtx_REG (Pmode, CX_REG);
42722 output_set_got (tmp, NULL_RTX);
42724 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
42725 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
42726 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
42727 fnaddr = gen_const_mem (Pmode, fnaddr);
42731 /* Our sibling call patterns do not allow memories, because we have no
42732 predicate that can distinguish between frame and non-frame memory.
42733 For our purposes here, we can get away with (ab)using a jump pattern,
42734 because we're going to do no optimization. */
42735 if (MEM_P (fnaddr))
42737 if (sibcall_insn_operand (fnaddr, word_mode))
42739 fnaddr = XEXP (DECL_RTL (function), 0);
42740 tmp = gen_rtx_MEM (QImode, fnaddr);
42741 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
42742 tmp = emit_call_insn (tmp);
42743 SIBLING_CALL_P (tmp) = 1;
42745 else
42746 emit_jump_insn (gen_indirect_jump (fnaddr));
42748 else
42750 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
42752 // CM_LARGE_PIC always uses pseudo PIC register which is
42753 // uninitialized. Since FUNCTION is local and calling it
42754 // doesn't go through PLT, we use scratch register %r11 as
42755 // PIC register and initialize it here.
42756 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
42757 ix86_init_large_pic_reg (tmp_regno);
42758 fnaddr = legitimize_pic_address (fnaddr,
42759 gen_rtx_REG (Pmode, tmp_regno));
42762 if (!sibcall_insn_operand (fnaddr, word_mode))
42764 tmp = gen_rtx_REG (word_mode, tmp_regno);
42765 if (GET_MODE (fnaddr) != word_mode)
42766 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
42767 emit_move_insn (tmp, fnaddr);
42768 fnaddr = tmp;
42771 tmp = gen_rtx_MEM (QImode, fnaddr);
42772 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
42773 tmp = emit_call_insn (tmp);
42774 SIBLING_CALL_P (tmp) = 1;
42776 emit_barrier ();
42778 /* Emit just enough of rest_of_compilation to get the insns emitted.
42779 Note that use_thunk calls assemble_start_function et al. */
42780 insn = get_insns ();
42781 shorten_branches (insn);
42782 final_start_function (insn, file, 1);
42783 final (insn, file, 1);
42784 final_end_function ();
42787 static void
42788 x86_file_start (void)
42790 default_file_start ();
42791 if (TARGET_16BIT)
42792 fputs ("\t.code16gcc\n", asm_out_file);
42793 #if TARGET_MACHO
42794 darwin_file_start ();
42795 #endif
42796 if (X86_FILE_START_VERSION_DIRECTIVE)
42797 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
42798 if (X86_FILE_START_FLTUSED)
42799 fputs ("\t.global\t__fltused\n", asm_out_file);
42800 if (ix86_asm_dialect == ASM_INTEL)
42801 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
42805 x86_field_alignment (tree field, int computed)
42807 machine_mode mode;
42808 tree type = TREE_TYPE (field);
42810 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
42811 return computed;
42812 mode = TYPE_MODE (strip_array_types (type));
42813 if (mode == DFmode || mode == DCmode
42814 || GET_MODE_CLASS (mode) == MODE_INT
42815 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
42816 return MIN (32, computed);
42817 return computed;
42820 /* Print call to TARGET to FILE. */
42822 static void
42823 x86_print_call_or_nop (FILE *file, const char *target)
42825 if (flag_nop_mcount)
42826 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
42827 else
42828 fprintf (file, "1:\tcall\t%s\n", target);
42831 /* Output assembler code to FILE to increment profiler label # LABELNO
42832 for profiling a function entry. */
42833 void
42834 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
42836 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
42837 : MCOUNT_NAME);
42838 if (TARGET_64BIT)
42840 #ifndef NO_PROFILE_COUNTERS
42841 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
42842 #endif
42844 if (!TARGET_PECOFF && flag_pic)
42845 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
42846 else
42847 x86_print_call_or_nop (file, mcount_name);
42849 else if (flag_pic)
42851 #ifndef NO_PROFILE_COUNTERS
42852 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
42853 LPREFIX, labelno);
42854 #endif
42855 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
42857 else
42859 #ifndef NO_PROFILE_COUNTERS
42860 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
42861 LPREFIX, labelno);
42862 #endif
42863 x86_print_call_or_nop (file, mcount_name);
42866 if (flag_record_mcount)
42868 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
42869 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
42870 fprintf (file, "\t.previous\n");
42874 /* We don't have exact information about the insn sizes, but we may assume
42875 quite safely that we are informed about all 1 byte insns and memory
42876 address sizes. This is enough to eliminate unnecessary padding in
42877 99% of cases. */
42879 static int
42880 min_insn_size (rtx_insn *insn)
42882 int l = 0, len;
42884 if (!INSN_P (insn) || !active_insn_p (insn))
42885 return 0;
42887 /* Discard alignments we've emit and jump instructions. */
42888 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
42889 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
42890 return 0;
42892 /* Important case - calls are always 5 bytes.
42893 It is common to have many calls in the row. */
42894 if (CALL_P (insn)
42895 && symbolic_reference_mentioned_p (PATTERN (insn))
42896 && !SIBLING_CALL_P (insn))
42897 return 5;
42898 len = get_attr_length (insn);
42899 if (len <= 1)
42900 return 1;
42902 /* For normal instructions we rely on get_attr_length being exact,
42903 with a few exceptions. */
42904 if (!JUMP_P (insn))
42906 enum attr_type type = get_attr_type (insn);
42908 switch (type)
42910 case TYPE_MULTI:
42911 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
42912 || asm_noperands (PATTERN (insn)) >= 0)
42913 return 0;
42914 break;
42915 case TYPE_OTHER:
42916 case TYPE_FCMP:
42917 break;
42918 default:
42919 /* Otherwise trust get_attr_length. */
42920 return len;
42923 l = get_attr_length_address (insn);
42924 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
42925 l = 4;
42927 if (l)
42928 return 1+l;
42929 else
42930 return 2;
42933 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
42935 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
42936 window. */
42938 static void
42939 ix86_avoid_jump_mispredicts (void)
42941 rtx_insn *insn, *start = get_insns ();
42942 int nbytes = 0, njumps = 0;
42943 int isjump = 0;
42945 /* Look for all minimal intervals of instructions containing 4 jumps.
42946 The intervals are bounded by START and INSN. NBYTES is the total
42947 size of instructions in the interval including INSN and not including
42948 START. When the NBYTES is smaller than 16 bytes, it is possible
42949 that the end of START and INSN ends up in the same 16byte page.
42951 The smallest offset in the page INSN can start is the case where START
42952 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
42953 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
42955 Don't consider asm goto as jump, while it can contain a jump, it doesn't
42956 have to, control transfer to label(s) can be performed through other
42957 means, and also we estimate minimum length of all asm stmts as 0. */
42958 for (insn = start; insn; insn = NEXT_INSN (insn))
42960 int min_size;
42962 if (LABEL_P (insn))
42964 int align = label_to_alignment (insn);
42965 int max_skip = label_to_max_skip (insn);
42967 if (max_skip > 15)
42968 max_skip = 15;
42969 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
42970 already in the current 16 byte page, because otherwise
42971 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
42972 bytes to reach 16 byte boundary. */
42973 if (align <= 0
42974 || (align <= 3 && max_skip != (1 << align) - 1))
42975 max_skip = 0;
42976 if (dump_file)
42977 fprintf (dump_file, "Label %i with max_skip %i\n",
42978 INSN_UID (insn), max_skip);
42979 if (max_skip)
42981 while (nbytes + max_skip >= 16)
42983 start = NEXT_INSN (start);
42984 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
42985 || CALL_P (start))
42986 njumps--, isjump = 1;
42987 else
42988 isjump = 0;
42989 nbytes -= min_insn_size (start);
42992 continue;
42995 min_size = min_insn_size (insn);
42996 nbytes += min_size;
42997 if (dump_file)
42998 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
42999 INSN_UID (insn), min_size);
43000 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
43001 || CALL_P (insn))
43002 njumps++;
43003 else
43004 continue;
43006 while (njumps > 3)
43008 start = NEXT_INSN (start);
43009 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43010 || CALL_P (start))
43011 njumps--, isjump = 1;
43012 else
43013 isjump = 0;
43014 nbytes -= min_insn_size (start);
43016 gcc_assert (njumps >= 0);
43017 if (dump_file)
43018 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
43019 INSN_UID (start), INSN_UID (insn), nbytes);
43021 if (njumps == 3 && isjump && nbytes < 16)
43023 int padsize = 15 - nbytes + min_insn_size (insn);
43025 if (dump_file)
43026 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
43027 INSN_UID (insn), padsize);
43028 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
43032 #endif
43034 /* AMD Athlon works faster
43035 when RET is not destination of conditional jump or directly preceded
43036 by other jump instruction. We avoid the penalty by inserting NOP just
43037 before the RET instructions in such cases. */
43038 static void
43039 ix86_pad_returns (void)
43041 edge e;
43042 edge_iterator ei;
43044 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43046 basic_block bb = e->src;
43047 rtx_insn *ret = BB_END (bb);
43048 rtx_insn *prev;
43049 bool replace = false;
43051 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
43052 || optimize_bb_for_size_p (bb))
43053 continue;
43054 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
43055 if (active_insn_p (prev) || LABEL_P (prev))
43056 break;
43057 if (prev && LABEL_P (prev))
43059 edge e;
43060 edge_iterator ei;
43062 FOR_EACH_EDGE (e, ei, bb->preds)
43063 if (EDGE_FREQUENCY (e) && e->src->index >= 0
43064 && !(e->flags & EDGE_FALLTHRU))
43066 replace = true;
43067 break;
43070 if (!replace)
43072 prev = prev_active_insn (ret);
43073 if (prev
43074 && ((JUMP_P (prev) && any_condjump_p (prev))
43075 || CALL_P (prev)))
43076 replace = true;
43077 /* Empty functions get branch mispredict even when
43078 the jump destination is not visible to us. */
43079 if (!prev && !optimize_function_for_size_p (cfun))
43080 replace = true;
43082 if (replace)
43084 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
43085 delete_insn (ret);
43090 /* Count the minimum number of instructions in BB. Return 4 if the
43091 number of instructions >= 4. */
43093 static int
43094 ix86_count_insn_bb (basic_block bb)
43096 rtx_insn *insn;
43097 int insn_count = 0;
43099 /* Count number of instructions in this block. Return 4 if the number
43100 of instructions >= 4. */
43101 FOR_BB_INSNS (bb, insn)
43103 /* Only happen in exit blocks. */
43104 if (JUMP_P (insn)
43105 && ANY_RETURN_P (PATTERN (insn)))
43106 break;
43108 if (NONDEBUG_INSN_P (insn)
43109 && GET_CODE (PATTERN (insn)) != USE
43110 && GET_CODE (PATTERN (insn)) != CLOBBER)
43112 insn_count++;
43113 if (insn_count >= 4)
43114 return insn_count;
43118 return insn_count;
43122 /* Count the minimum number of instructions in code path in BB.
43123 Return 4 if the number of instructions >= 4. */
43125 static int
43126 ix86_count_insn (basic_block bb)
43128 edge e;
43129 edge_iterator ei;
43130 int min_prev_count;
43132 /* Only bother counting instructions along paths with no
43133 more than 2 basic blocks between entry and exit. Given
43134 that BB has an edge to exit, determine if a predecessor
43135 of BB has an edge from entry. If so, compute the number
43136 of instructions in the predecessor block. If there
43137 happen to be multiple such blocks, compute the minimum. */
43138 min_prev_count = 4;
43139 FOR_EACH_EDGE (e, ei, bb->preds)
43141 edge prev_e;
43142 edge_iterator prev_ei;
43144 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43146 min_prev_count = 0;
43147 break;
43149 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
43151 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43153 int count = ix86_count_insn_bb (e->src);
43154 if (count < min_prev_count)
43155 min_prev_count = count;
43156 break;
43161 if (min_prev_count < 4)
43162 min_prev_count += ix86_count_insn_bb (bb);
43164 return min_prev_count;
43167 /* Pad short function to 4 instructions. */
43169 static void
43170 ix86_pad_short_function (void)
43172 edge e;
43173 edge_iterator ei;
43175 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43177 rtx_insn *ret = BB_END (e->src);
43178 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
43180 int insn_count = ix86_count_insn (e->src);
43182 /* Pad short function. */
43183 if (insn_count < 4)
43185 rtx_insn *insn = ret;
43187 /* Find epilogue. */
43188 while (insn
43189 && (!NOTE_P (insn)
43190 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
43191 insn = PREV_INSN (insn);
43193 if (!insn)
43194 insn = ret;
43196 /* Two NOPs count as one instruction. */
43197 insn_count = 2 * (4 - insn_count);
43198 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
43204 /* Fix up a Windows system unwinder issue. If an EH region falls through into
43205 the epilogue, the Windows system unwinder will apply epilogue logic and
43206 produce incorrect offsets. This can be avoided by adding a nop between
43207 the last insn that can throw and the first insn of the epilogue. */
43209 static void
43210 ix86_seh_fixup_eh_fallthru (void)
43212 edge e;
43213 edge_iterator ei;
43215 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43217 rtx_insn *insn, *next;
43219 /* Find the beginning of the epilogue. */
43220 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
43221 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
43222 break;
43223 if (insn == NULL)
43224 continue;
43226 /* We only care about preceding insns that can throw. */
43227 insn = prev_active_insn (insn);
43228 if (insn == NULL || !can_throw_internal (insn))
43229 continue;
43231 /* Do not separate calls from their debug information. */
43232 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
43233 if (NOTE_P (next)
43234 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
43235 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
43236 insn = next;
43237 else
43238 break;
43240 emit_insn_after (gen_nops (const1_rtx), insn);
43244 /* Implement machine specific optimizations. We implement padding of returns
43245 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
43246 static void
43247 ix86_reorg (void)
43249 /* We are freeing block_for_insn in the toplev to keep compatibility
43250 with old MDEP_REORGS that are not CFG based. Recompute it now. */
43251 compute_bb_for_insn ();
43253 if (TARGET_SEH && current_function_has_exception_handlers ())
43254 ix86_seh_fixup_eh_fallthru ();
43256 if (optimize && optimize_function_for_speed_p (cfun))
43258 if (TARGET_PAD_SHORT_FUNCTION)
43259 ix86_pad_short_function ();
43260 else if (TARGET_PAD_RETURNS)
43261 ix86_pad_returns ();
43262 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43263 if (TARGET_FOUR_JUMP_LIMIT)
43264 ix86_avoid_jump_mispredicts ();
43265 #endif
43269 /* Return nonzero when QImode register that must be represented via REX prefix
43270 is used. */
43271 bool
43272 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
43274 int i;
43275 extract_insn_cached (insn);
43276 for (i = 0; i < recog_data.n_operands; i++)
43277 if (GENERAL_REG_P (recog_data.operand[i])
43278 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
43279 return true;
43280 return false;
43283 /* Return true when INSN mentions register that must be encoded using REX
43284 prefix. */
43285 bool
43286 x86_extended_reg_mentioned_p (rtx insn)
43288 subrtx_iterator::array_type array;
43289 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
43291 const_rtx x = *iter;
43292 if (REG_P (x)
43293 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
43294 return true;
43296 return false;
43299 /* If profitable, negate (without causing overflow) integer constant
43300 of mode MODE at location LOC. Return true in this case. */
43301 bool
43302 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
43304 HOST_WIDE_INT val;
43306 if (!CONST_INT_P (*loc))
43307 return false;
43309 switch (mode)
43311 case DImode:
43312 /* DImode x86_64 constants must fit in 32 bits. */
43313 gcc_assert (x86_64_immediate_operand (*loc, mode));
43315 mode = SImode;
43316 break;
43318 case SImode:
43319 case HImode:
43320 case QImode:
43321 break;
43323 default:
43324 gcc_unreachable ();
43327 /* Avoid overflows. */
43328 if (mode_signbit_p (mode, *loc))
43329 return false;
43331 val = INTVAL (*loc);
43333 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
43334 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
43335 if ((val < 0 && val != -128)
43336 || val == 128)
43338 *loc = GEN_INT (-val);
43339 return true;
43342 return false;
43345 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
43346 optabs would emit if we didn't have TFmode patterns. */
43348 void
43349 x86_emit_floatuns (rtx operands[2])
43351 rtx_code_label *neglab, *donelab;
43352 rtx i0, i1, f0, in, out;
43353 machine_mode mode, inmode;
43355 inmode = GET_MODE (operands[1]);
43356 gcc_assert (inmode == SImode || inmode == DImode);
43358 out = operands[0];
43359 in = force_reg (inmode, operands[1]);
43360 mode = GET_MODE (out);
43361 neglab = gen_label_rtx ();
43362 donelab = gen_label_rtx ();
43363 f0 = gen_reg_rtx (mode);
43365 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
43367 expand_float (out, in, 0);
43369 emit_jump_insn (gen_jump (donelab));
43370 emit_barrier ();
43372 emit_label (neglab);
43374 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
43375 1, OPTAB_DIRECT);
43376 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
43377 1, OPTAB_DIRECT);
43378 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
43380 expand_float (f0, i0, 0);
43382 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
43384 emit_label (donelab);
43387 static bool canonicalize_perm (struct expand_vec_perm_d *d);
43388 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
43389 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
43390 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
43392 /* Get a vector mode of the same size as the original but with elements
43393 twice as wide. This is only guaranteed to apply to integral vectors. */
43395 static inline machine_mode
43396 get_mode_wider_vector (machine_mode o)
43398 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
43399 machine_mode n = GET_MODE_WIDER_MODE (o);
43400 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
43401 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
43402 return n;
43405 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
43406 fill target with val via vec_duplicate. */
43408 static bool
43409 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
43411 bool ok;
43412 rtx_insn *insn;
43413 rtx dup;
43415 /* First attempt to recognize VAL as-is. */
43416 dup = gen_rtx_VEC_DUPLICATE (mode, val);
43417 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
43418 if (recog_memoized (insn) < 0)
43420 rtx_insn *seq;
43421 /* If that fails, force VAL into a register. */
43423 start_sequence ();
43424 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
43425 seq = get_insns ();
43426 end_sequence ();
43427 if (seq)
43428 emit_insn_before (seq, insn);
43430 ok = recog_memoized (insn) >= 0;
43431 gcc_assert (ok);
43433 return true;
43436 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43437 with all elements equal to VAR. Return true if successful. */
43439 static bool
43440 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
43441 rtx target, rtx val)
43443 bool ok;
43445 switch (mode)
43447 case V2SImode:
43448 case V2SFmode:
43449 if (!mmx_ok)
43450 return false;
43451 /* FALLTHRU */
43453 case V4DFmode:
43454 case V4DImode:
43455 case V8SFmode:
43456 case V8SImode:
43457 case V2DFmode:
43458 case V2DImode:
43459 case V4SFmode:
43460 case V4SImode:
43461 case V16SImode:
43462 case V8DImode:
43463 case V16SFmode:
43464 case V8DFmode:
43465 return ix86_vector_duplicate_value (mode, target, val);
43467 case V4HImode:
43468 if (!mmx_ok)
43469 return false;
43470 if (TARGET_SSE || TARGET_3DNOW_A)
43472 rtx x;
43474 val = gen_lowpart (SImode, val);
43475 x = gen_rtx_TRUNCATE (HImode, val);
43476 x = gen_rtx_VEC_DUPLICATE (mode, x);
43477 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43478 return true;
43480 goto widen;
43482 case V8QImode:
43483 if (!mmx_ok)
43484 return false;
43485 goto widen;
43487 case V8HImode:
43488 if (TARGET_AVX2)
43489 return ix86_vector_duplicate_value (mode, target, val);
43491 if (TARGET_SSE2)
43493 struct expand_vec_perm_d dperm;
43494 rtx tmp1, tmp2;
43496 permute:
43497 memset (&dperm, 0, sizeof (dperm));
43498 dperm.target = target;
43499 dperm.vmode = mode;
43500 dperm.nelt = GET_MODE_NUNITS (mode);
43501 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
43502 dperm.one_operand_p = true;
43504 /* Extend to SImode using a paradoxical SUBREG. */
43505 tmp1 = gen_reg_rtx (SImode);
43506 emit_move_insn (tmp1, gen_lowpart (SImode, val));
43508 /* Insert the SImode value as low element of a V4SImode vector. */
43509 tmp2 = gen_reg_rtx (V4SImode);
43510 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
43511 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
43513 ok = (expand_vec_perm_1 (&dperm)
43514 || expand_vec_perm_broadcast_1 (&dperm));
43515 gcc_assert (ok);
43516 return ok;
43518 goto widen;
43520 case V16QImode:
43521 if (TARGET_AVX2)
43522 return ix86_vector_duplicate_value (mode, target, val);
43524 if (TARGET_SSE2)
43525 goto permute;
43526 goto widen;
43528 widen:
43529 /* Replicate the value once into the next wider mode and recurse. */
43531 machine_mode smode, wsmode, wvmode;
43532 rtx x;
43534 smode = GET_MODE_INNER (mode);
43535 wvmode = get_mode_wider_vector (mode);
43536 wsmode = GET_MODE_INNER (wvmode);
43538 val = convert_modes (wsmode, smode, val, true);
43539 x = expand_simple_binop (wsmode, ASHIFT, val,
43540 GEN_INT (GET_MODE_BITSIZE (smode)),
43541 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43542 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
43544 x = gen_reg_rtx (wvmode);
43545 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
43546 gcc_assert (ok);
43547 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
43548 return ok;
43551 case V16HImode:
43552 case V32QImode:
43553 if (TARGET_AVX2)
43554 return ix86_vector_duplicate_value (mode, target, val);
43555 else
43557 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
43558 rtx x = gen_reg_rtx (hvmode);
43560 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43561 gcc_assert (ok);
43563 x = gen_rtx_VEC_CONCAT (mode, x, x);
43564 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43566 return true;
43568 case V64QImode:
43569 case V32HImode:
43570 if (TARGET_AVX512BW)
43571 return ix86_vector_duplicate_value (mode, target, val);
43572 else
43574 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
43575 rtx x = gen_reg_rtx (hvmode);
43577 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43578 gcc_assert (ok);
43580 x = gen_rtx_VEC_CONCAT (mode, x, x);
43581 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43583 return true;
43585 default:
43586 return false;
43590 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43591 whose ONE_VAR element is VAR, and other elements are zero. Return true
43592 if successful. */
43594 static bool
43595 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
43596 rtx target, rtx var, int one_var)
43598 machine_mode vsimode;
43599 rtx new_target;
43600 rtx x, tmp;
43601 bool use_vector_set = false;
43603 switch (mode)
43605 case V2DImode:
43606 /* For SSE4.1, we normally use vector set. But if the second
43607 element is zero and inter-unit moves are OK, we use movq
43608 instead. */
43609 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
43610 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
43611 && one_var == 0));
43612 break;
43613 case V16QImode:
43614 case V4SImode:
43615 case V4SFmode:
43616 use_vector_set = TARGET_SSE4_1;
43617 break;
43618 case V8HImode:
43619 use_vector_set = TARGET_SSE2;
43620 break;
43621 case V4HImode:
43622 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
43623 break;
43624 case V32QImode:
43625 case V16HImode:
43626 case V8SImode:
43627 case V8SFmode:
43628 case V4DFmode:
43629 use_vector_set = TARGET_AVX;
43630 break;
43631 case V4DImode:
43632 /* Use ix86_expand_vector_set in 64bit mode only. */
43633 use_vector_set = TARGET_AVX && TARGET_64BIT;
43634 break;
43635 default:
43636 break;
43639 if (use_vector_set)
43641 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
43642 var = force_reg (GET_MODE_INNER (mode), var);
43643 ix86_expand_vector_set (mmx_ok, target, var, one_var);
43644 return true;
43647 switch (mode)
43649 case V2SFmode:
43650 case V2SImode:
43651 if (!mmx_ok)
43652 return false;
43653 /* FALLTHRU */
43655 case V2DFmode:
43656 case V2DImode:
43657 if (one_var != 0)
43658 return false;
43659 var = force_reg (GET_MODE_INNER (mode), var);
43660 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
43661 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43662 return true;
43664 case V4SFmode:
43665 case V4SImode:
43666 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
43667 new_target = gen_reg_rtx (mode);
43668 else
43669 new_target = target;
43670 var = force_reg (GET_MODE_INNER (mode), var);
43671 x = gen_rtx_VEC_DUPLICATE (mode, var);
43672 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
43673 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
43674 if (one_var != 0)
43676 /* We need to shuffle the value to the correct position, so
43677 create a new pseudo to store the intermediate result. */
43679 /* With SSE2, we can use the integer shuffle insns. */
43680 if (mode != V4SFmode && TARGET_SSE2)
43682 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
43683 const1_rtx,
43684 GEN_INT (one_var == 1 ? 0 : 1),
43685 GEN_INT (one_var == 2 ? 0 : 1),
43686 GEN_INT (one_var == 3 ? 0 : 1)));
43687 if (target != new_target)
43688 emit_move_insn (target, new_target);
43689 return true;
43692 /* Otherwise convert the intermediate result to V4SFmode and
43693 use the SSE1 shuffle instructions. */
43694 if (mode != V4SFmode)
43696 tmp = gen_reg_rtx (V4SFmode);
43697 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
43699 else
43700 tmp = new_target;
43702 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
43703 const1_rtx,
43704 GEN_INT (one_var == 1 ? 0 : 1),
43705 GEN_INT (one_var == 2 ? 0+4 : 1+4),
43706 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
43708 if (mode != V4SFmode)
43709 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
43710 else if (tmp != target)
43711 emit_move_insn (target, tmp);
43713 else if (target != new_target)
43714 emit_move_insn (target, new_target);
43715 return true;
43717 case V8HImode:
43718 case V16QImode:
43719 vsimode = V4SImode;
43720 goto widen;
43721 case V4HImode:
43722 case V8QImode:
43723 if (!mmx_ok)
43724 return false;
43725 vsimode = V2SImode;
43726 goto widen;
43727 widen:
43728 if (one_var != 0)
43729 return false;
43731 /* Zero extend the variable element to SImode and recurse. */
43732 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
43734 x = gen_reg_rtx (vsimode);
43735 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
43736 var, one_var))
43737 gcc_unreachable ();
43739 emit_move_insn (target, gen_lowpart (mode, x));
43740 return true;
43742 default:
43743 return false;
43747 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43748 consisting of the values in VALS. It is known that all elements
43749 except ONE_VAR are constants. Return true if successful. */
43751 static bool
43752 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
43753 rtx target, rtx vals, int one_var)
43755 rtx var = XVECEXP (vals, 0, one_var);
43756 machine_mode wmode;
43757 rtx const_vec, x;
43759 const_vec = copy_rtx (vals);
43760 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
43761 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
43763 switch (mode)
43765 case V2DFmode:
43766 case V2DImode:
43767 case V2SFmode:
43768 case V2SImode:
43769 /* For the two element vectors, it's just as easy to use
43770 the general case. */
43771 return false;
43773 case V4DImode:
43774 /* Use ix86_expand_vector_set in 64bit mode only. */
43775 if (!TARGET_64BIT)
43776 return false;
43777 case V4DFmode:
43778 case V8SFmode:
43779 case V8SImode:
43780 case V16HImode:
43781 case V32QImode:
43782 case V4SFmode:
43783 case V4SImode:
43784 case V8HImode:
43785 case V4HImode:
43786 break;
43788 case V16QImode:
43789 if (TARGET_SSE4_1)
43790 break;
43791 wmode = V8HImode;
43792 goto widen;
43793 case V8QImode:
43794 wmode = V4HImode;
43795 goto widen;
43796 widen:
43797 /* There's no way to set one QImode entry easily. Combine
43798 the variable value with its adjacent constant value, and
43799 promote to an HImode set. */
43800 x = XVECEXP (vals, 0, one_var ^ 1);
43801 if (one_var & 1)
43803 var = convert_modes (HImode, QImode, var, true);
43804 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
43805 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43806 x = GEN_INT (INTVAL (x) & 0xff);
43808 else
43810 var = convert_modes (HImode, QImode, var, true);
43811 x = gen_int_mode (INTVAL (x) << 8, HImode);
43813 if (x != const0_rtx)
43814 var = expand_simple_binop (HImode, IOR, var, x, var,
43815 1, OPTAB_LIB_WIDEN);
43817 x = gen_reg_rtx (wmode);
43818 emit_move_insn (x, gen_lowpart (wmode, const_vec));
43819 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
43821 emit_move_insn (target, gen_lowpart (mode, x));
43822 return true;
43824 default:
43825 return false;
43828 emit_move_insn (target, const_vec);
43829 ix86_expand_vector_set (mmx_ok, target, var, one_var);
43830 return true;
43833 /* A subroutine of ix86_expand_vector_init_general. Use vector
43834 concatenate to handle the most general case: all values variable,
43835 and none identical. */
43837 static void
43838 ix86_expand_vector_init_concat (machine_mode mode,
43839 rtx target, rtx *ops, int n)
43841 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
43842 rtx first[16], second[8], third[4];
43843 rtvec v;
43844 int i, j;
43846 switch (n)
43848 case 2:
43849 switch (mode)
43851 case V16SImode:
43852 cmode = V8SImode;
43853 break;
43854 case V16SFmode:
43855 cmode = V8SFmode;
43856 break;
43857 case V8DImode:
43858 cmode = V4DImode;
43859 break;
43860 case V8DFmode:
43861 cmode = V4DFmode;
43862 break;
43863 case V8SImode:
43864 cmode = V4SImode;
43865 break;
43866 case V8SFmode:
43867 cmode = V4SFmode;
43868 break;
43869 case V4DImode:
43870 cmode = V2DImode;
43871 break;
43872 case V4DFmode:
43873 cmode = V2DFmode;
43874 break;
43875 case V4SImode:
43876 cmode = V2SImode;
43877 break;
43878 case V4SFmode:
43879 cmode = V2SFmode;
43880 break;
43881 case V2DImode:
43882 cmode = DImode;
43883 break;
43884 case V2SImode:
43885 cmode = SImode;
43886 break;
43887 case V2DFmode:
43888 cmode = DFmode;
43889 break;
43890 case V2SFmode:
43891 cmode = SFmode;
43892 break;
43893 default:
43894 gcc_unreachable ();
43897 if (!register_operand (ops[1], cmode))
43898 ops[1] = force_reg (cmode, ops[1]);
43899 if (!register_operand (ops[0], cmode))
43900 ops[0] = force_reg (cmode, ops[0]);
43901 emit_insn (gen_rtx_SET (VOIDmode, target,
43902 gen_rtx_VEC_CONCAT (mode, ops[0],
43903 ops[1])));
43904 break;
43906 case 4:
43907 switch (mode)
43909 case V4DImode:
43910 cmode = V2DImode;
43911 break;
43912 case V4DFmode:
43913 cmode = V2DFmode;
43914 break;
43915 case V4SImode:
43916 cmode = V2SImode;
43917 break;
43918 case V4SFmode:
43919 cmode = V2SFmode;
43920 break;
43921 default:
43922 gcc_unreachable ();
43924 goto half;
43926 case 8:
43927 switch (mode)
43929 case V8DImode:
43930 cmode = V2DImode;
43931 hmode = V4DImode;
43932 break;
43933 case V8DFmode:
43934 cmode = V2DFmode;
43935 hmode = V4DFmode;
43936 break;
43937 case V8SImode:
43938 cmode = V2SImode;
43939 hmode = V4SImode;
43940 break;
43941 case V8SFmode:
43942 cmode = V2SFmode;
43943 hmode = V4SFmode;
43944 break;
43945 default:
43946 gcc_unreachable ();
43948 goto half;
43950 case 16:
43951 switch (mode)
43953 case V16SImode:
43954 cmode = V2SImode;
43955 hmode = V4SImode;
43956 gmode = V8SImode;
43957 break;
43958 case V16SFmode:
43959 cmode = V2SFmode;
43960 hmode = V4SFmode;
43961 gmode = V8SFmode;
43962 break;
43963 default:
43964 gcc_unreachable ();
43966 goto half;
43968 half:
43969 /* FIXME: We process inputs backward to help RA. PR 36222. */
43970 i = n - 1;
43971 j = (n >> 1) - 1;
43972 for (; i > 0; i -= 2, j--)
43974 first[j] = gen_reg_rtx (cmode);
43975 v = gen_rtvec (2, ops[i - 1], ops[i]);
43976 ix86_expand_vector_init (false, first[j],
43977 gen_rtx_PARALLEL (cmode, v));
43980 n >>= 1;
43981 if (n > 4)
43983 gcc_assert (hmode != VOIDmode);
43984 gcc_assert (gmode != VOIDmode);
43985 for (i = j = 0; i < n; i += 2, j++)
43987 second[j] = gen_reg_rtx (hmode);
43988 ix86_expand_vector_init_concat (hmode, second [j],
43989 &first [i], 2);
43991 n >>= 1;
43992 for (i = j = 0; i < n; i += 2, j++)
43994 third[j] = gen_reg_rtx (gmode);
43995 ix86_expand_vector_init_concat (gmode, third[j],
43996 &second[i], 2);
43998 n >>= 1;
43999 ix86_expand_vector_init_concat (mode, target, third, n);
44001 else if (n > 2)
44003 gcc_assert (hmode != VOIDmode);
44004 for (i = j = 0; i < n; i += 2, j++)
44006 second[j] = gen_reg_rtx (hmode);
44007 ix86_expand_vector_init_concat (hmode, second [j],
44008 &first [i], 2);
44010 n >>= 1;
44011 ix86_expand_vector_init_concat (mode, target, second, n);
44013 else
44014 ix86_expand_vector_init_concat (mode, target, first, n);
44015 break;
44017 default:
44018 gcc_unreachable ();
44022 /* A subroutine of ix86_expand_vector_init_general. Use vector
44023 interleave to handle the most general case: all values variable,
44024 and none identical. */
44026 static void
44027 ix86_expand_vector_init_interleave (machine_mode mode,
44028 rtx target, rtx *ops, int n)
44030 machine_mode first_imode, second_imode, third_imode, inner_mode;
44031 int i, j;
44032 rtx op0, op1;
44033 rtx (*gen_load_even) (rtx, rtx, rtx);
44034 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
44035 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
44037 switch (mode)
44039 case V8HImode:
44040 gen_load_even = gen_vec_setv8hi;
44041 gen_interleave_first_low = gen_vec_interleave_lowv4si;
44042 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44043 inner_mode = HImode;
44044 first_imode = V4SImode;
44045 second_imode = V2DImode;
44046 third_imode = VOIDmode;
44047 break;
44048 case V16QImode:
44049 gen_load_even = gen_vec_setv16qi;
44050 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
44051 gen_interleave_second_low = gen_vec_interleave_lowv4si;
44052 inner_mode = QImode;
44053 first_imode = V8HImode;
44054 second_imode = V4SImode;
44055 third_imode = V2DImode;
44056 break;
44057 default:
44058 gcc_unreachable ();
44061 for (i = 0; i < n; i++)
44063 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
44064 op0 = gen_reg_rtx (SImode);
44065 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
44067 /* Insert the SImode value as low element of V4SImode vector. */
44068 op1 = gen_reg_rtx (V4SImode);
44069 op0 = gen_rtx_VEC_MERGE (V4SImode,
44070 gen_rtx_VEC_DUPLICATE (V4SImode,
44071 op0),
44072 CONST0_RTX (V4SImode),
44073 const1_rtx);
44074 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
44076 /* Cast the V4SImode vector back to a vector in orignal mode. */
44077 op0 = gen_reg_rtx (mode);
44078 emit_move_insn (op0, gen_lowpart (mode, op1));
44080 /* Load even elements into the second position. */
44081 emit_insn (gen_load_even (op0,
44082 force_reg (inner_mode,
44083 ops [i + i + 1]),
44084 const1_rtx));
44086 /* Cast vector to FIRST_IMODE vector. */
44087 ops[i] = gen_reg_rtx (first_imode);
44088 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
44091 /* Interleave low FIRST_IMODE vectors. */
44092 for (i = j = 0; i < n; i += 2, j++)
44094 op0 = gen_reg_rtx (first_imode);
44095 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
44097 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
44098 ops[j] = gen_reg_rtx (second_imode);
44099 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
44102 /* Interleave low SECOND_IMODE vectors. */
44103 switch (second_imode)
44105 case V4SImode:
44106 for (i = j = 0; i < n / 2; i += 2, j++)
44108 op0 = gen_reg_rtx (second_imode);
44109 emit_insn (gen_interleave_second_low (op0, ops[i],
44110 ops[i + 1]));
44112 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
44113 vector. */
44114 ops[j] = gen_reg_rtx (third_imode);
44115 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
44117 second_imode = V2DImode;
44118 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44119 /* FALLTHRU */
44121 case V2DImode:
44122 op0 = gen_reg_rtx (second_imode);
44123 emit_insn (gen_interleave_second_low (op0, ops[0],
44124 ops[1]));
44126 /* Cast the SECOND_IMODE vector back to a vector on original
44127 mode. */
44128 emit_insn (gen_rtx_SET (VOIDmode, target,
44129 gen_lowpart (mode, op0)));
44130 break;
44132 default:
44133 gcc_unreachable ();
44137 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
44138 all values variable, and none identical. */
44140 static void
44141 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
44142 rtx target, rtx vals)
44144 rtx ops[64], op0, op1, op2, op3, op4, op5;
44145 machine_mode half_mode = VOIDmode;
44146 machine_mode quarter_mode = VOIDmode;
44147 int n, i;
44149 switch (mode)
44151 case V2SFmode:
44152 case V2SImode:
44153 if (!mmx_ok && !TARGET_SSE)
44154 break;
44155 /* FALLTHRU */
44157 case V16SImode:
44158 case V16SFmode:
44159 case V8DFmode:
44160 case V8DImode:
44161 case V8SFmode:
44162 case V8SImode:
44163 case V4DFmode:
44164 case V4DImode:
44165 case V4SFmode:
44166 case V4SImode:
44167 case V2DFmode:
44168 case V2DImode:
44169 n = GET_MODE_NUNITS (mode);
44170 for (i = 0; i < n; i++)
44171 ops[i] = XVECEXP (vals, 0, i);
44172 ix86_expand_vector_init_concat (mode, target, ops, n);
44173 return;
44175 case V32QImode:
44176 half_mode = V16QImode;
44177 goto half;
44179 case V16HImode:
44180 half_mode = V8HImode;
44181 goto half;
44183 half:
44184 n = GET_MODE_NUNITS (mode);
44185 for (i = 0; i < n; i++)
44186 ops[i] = XVECEXP (vals, 0, i);
44187 op0 = gen_reg_rtx (half_mode);
44188 op1 = gen_reg_rtx (half_mode);
44189 ix86_expand_vector_init_interleave (half_mode, op0, ops,
44190 n >> 2);
44191 ix86_expand_vector_init_interleave (half_mode, op1,
44192 &ops [n >> 1], n >> 2);
44193 emit_insn (gen_rtx_SET (VOIDmode, target,
44194 gen_rtx_VEC_CONCAT (mode, op0, op1)));
44195 return;
44197 case V64QImode:
44198 quarter_mode = V16QImode;
44199 half_mode = V32QImode;
44200 goto quarter;
44202 case V32HImode:
44203 quarter_mode = V8HImode;
44204 half_mode = V16HImode;
44205 goto quarter;
44207 quarter:
44208 n = GET_MODE_NUNITS (mode);
44209 for (i = 0; i < n; i++)
44210 ops[i] = XVECEXP (vals, 0, i);
44211 op0 = gen_reg_rtx (quarter_mode);
44212 op1 = gen_reg_rtx (quarter_mode);
44213 op2 = gen_reg_rtx (quarter_mode);
44214 op3 = gen_reg_rtx (quarter_mode);
44215 op4 = gen_reg_rtx (half_mode);
44216 op5 = gen_reg_rtx (half_mode);
44217 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
44218 n >> 3);
44219 ix86_expand_vector_init_interleave (quarter_mode, op1,
44220 &ops [n >> 2], n >> 3);
44221 ix86_expand_vector_init_interleave (quarter_mode, op2,
44222 &ops [n >> 1], n >> 3);
44223 ix86_expand_vector_init_interleave (quarter_mode, op3,
44224 &ops [(n >> 1) | (n >> 2)], n >> 3);
44225 emit_insn (gen_rtx_SET (VOIDmode, op4,
44226 gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
44227 emit_insn (gen_rtx_SET (VOIDmode, op5,
44228 gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
44229 emit_insn (gen_rtx_SET (VOIDmode, target,
44230 gen_rtx_VEC_CONCAT (mode, op4, op5)));
44231 return;
44233 case V16QImode:
44234 if (!TARGET_SSE4_1)
44235 break;
44236 /* FALLTHRU */
44238 case V8HImode:
44239 if (!TARGET_SSE2)
44240 break;
44242 /* Don't use ix86_expand_vector_init_interleave if we can't
44243 move from GPR to SSE register directly. */
44244 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
44245 break;
44247 n = GET_MODE_NUNITS (mode);
44248 for (i = 0; i < n; i++)
44249 ops[i] = XVECEXP (vals, 0, i);
44250 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
44251 return;
44253 case V4HImode:
44254 case V8QImode:
44255 break;
44257 default:
44258 gcc_unreachable ();
44262 int i, j, n_elts, n_words, n_elt_per_word;
44263 machine_mode inner_mode;
44264 rtx words[4], shift;
44266 inner_mode = GET_MODE_INNER (mode);
44267 n_elts = GET_MODE_NUNITS (mode);
44268 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
44269 n_elt_per_word = n_elts / n_words;
44270 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
44272 for (i = 0; i < n_words; ++i)
44274 rtx word = NULL_RTX;
44276 for (j = 0; j < n_elt_per_word; ++j)
44278 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
44279 elt = convert_modes (word_mode, inner_mode, elt, true);
44281 if (j == 0)
44282 word = elt;
44283 else
44285 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
44286 word, 1, OPTAB_LIB_WIDEN);
44287 word = expand_simple_binop (word_mode, IOR, word, elt,
44288 word, 1, OPTAB_LIB_WIDEN);
44292 words[i] = word;
44295 if (n_words == 1)
44296 emit_move_insn (target, gen_lowpart (mode, words[0]));
44297 else if (n_words == 2)
44299 rtx tmp = gen_reg_rtx (mode);
44300 emit_clobber (tmp);
44301 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
44302 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
44303 emit_move_insn (target, tmp);
44305 else if (n_words == 4)
44307 rtx tmp = gen_reg_rtx (V4SImode);
44308 gcc_assert (word_mode == SImode);
44309 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
44310 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
44311 emit_move_insn (target, gen_lowpart (mode, tmp));
44313 else
44314 gcc_unreachable ();
44318 /* Initialize vector TARGET via VALS. Suppress the use of MMX
44319 instructions unless MMX_OK is true. */
44321 void
44322 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
44324 machine_mode mode = GET_MODE (target);
44325 machine_mode inner_mode = GET_MODE_INNER (mode);
44326 int n_elts = GET_MODE_NUNITS (mode);
44327 int n_var = 0, one_var = -1;
44328 bool all_same = true, all_const_zero = true;
44329 int i;
44330 rtx x;
44332 for (i = 0; i < n_elts; ++i)
44334 x = XVECEXP (vals, 0, i);
44335 if (!(CONST_INT_P (x)
44336 || GET_CODE (x) == CONST_DOUBLE
44337 || GET_CODE (x) == CONST_FIXED))
44338 n_var++, one_var = i;
44339 else if (x != CONST0_RTX (inner_mode))
44340 all_const_zero = false;
44341 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
44342 all_same = false;
44345 /* Constants are best loaded from the constant pool. */
44346 if (n_var == 0)
44348 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
44349 return;
44352 /* If all values are identical, broadcast the value. */
44353 if (all_same
44354 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
44355 XVECEXP (vals, 0, 0)))
44356 return;
44358 /* Values where only one field is non-constant are best loaded from
44359 the pool and overwritten via move later. */
44360 if (n_var == 1)
44362 if (all_const_zero
44363 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
44364 XVECEXP (vals, 0, one_var),
44365 one_var))
44366 return;
44368 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
44369 return;
44372 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
44375 void
44376 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
44378 machine_mode mode = GET_MODE (target);
44379 machine_mode inner_mode = GET_MODE_INNER (mode);
44380 machine_mode half_mode;
44381 bool use_vec_merge = false;
44382 rtx tmp;
44383 static rtx (*gen_extract[6][2]) (rtx, rtx)
44385 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
44386 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
44387 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
44388 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
44389 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
44390 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
44392 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
44394 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
44395 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
44396 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
44397 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
44398 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
44399 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
44401 int i, j, n;
44403 switch (mode)
44405 case V2SFmode:
44406 case V2SImode:
44407 if (mmx_ok)
44409 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44410 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
44411 if (elt == 0)
44412 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44413 else
44414 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44415 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44416 return;
44418 break;
44420 case V2DImode:
44421 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
44422 if (use_vec_merge)
44423 break;
44425 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44426 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
44427 if (elt == 0)
44428 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44429 else
44430 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44431 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44432 return;
44434 case V2DFmode:
44436 rtx op0, op1;
44438 /* For the two element vectors, we implement a VEC_CONCAT with
44439 the extraction of the other element. */
44441 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
44442 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
44444 if (elt == 0)
44445 op0 = val, op1 = tmp;
44446 else
44447 op0 = tmp, op1 = val;
44449 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
44450 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44452 return;
44454 case V4SFmode:
44455 use_vec_merge = TARGET_SSE4_1;
44456 if (use_vec_merge)
44457 break;
44459 switch (elt)
44461 case 0:
44462 use_vec_merge = true;
44463 break;
44465 case 1:
44466 /* tmp = target = A B C D */
44467 tmp = copy_to_reg (target);
44468 /* target = A A B B */
44469 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
44470 /* target = X A B B */
44471 ix86_expand_vector_set (false, target, val, 0);
44472 /* target = A X C D */
44473 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44474 const1_rtx, const0_rtx,
44475 GEN_INT (2+4), GEN_INT (3+4)));
44476 return;
44478 case 2:
44479 /* tmp = target = A B C D */
44480 tmp = copy_to_reg (target);
44481 /* tmp = X B C D */
44482 ix86_expand_vector_set (false, tmp, val, 0);
44483 /* target = A B X D */
44484 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44485 const0_rtx, const1_rtx,
44486 GEN_INT (0+4), GEN_INT (3+4)));
44487 return;
44489 case 3:
44490 /* tmp = target = A B C D */
44491 tmp = copy_to_reg (target);
44492 /* tmp = X B C D */
44493 ix86_expand_vector_set (false, tmp, val, 0);
44494 /* target = A B X D */
44495 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44496 const0_rtx, const1_rtx,
44497 GEN_INT (2+4), GEN_INT (0+4)));
44498 return;
44500 default:
44501 gcc_unreachable ();
44503 break;
44505 case V4SImode:
44506 use_vec_merge = TARGET_SSE4_1;
44507 if (use_vec_merge)
44508 break;
44510 /* Element 0 handled by vec_merge below. */
44511 if (elt == 0)
44513 use_vec_merge = true;
44514 break;
44517 if (TARGET_SSE2)
44519 /* With SSE2, use integer shuffles to swap element 0 and ELT,
44520 store into element 0, then shuffle them back. */
44522 rtx order[4];
44524 order[0] = GEN_INT (elt);
44525 order[1] = const1_rtx;
44526 order[2] = const2_rtx;
44527 order[3] = GEN_INT (3);
44528 order[elt] = const0_rtx;
44530 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44531 order[1], order[2], order[3]));
44533 ix86_expand_vector_set (false, target, val, 0);
44535 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44536 order[1], order[2], order[3]));
44538 else
44540 /* For SSE1, we have to reuse the V4SF code. */
44541 rtx t = gen_reg_rtx (V4SFmode);
44542 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
44543 emit_move_insn (target, gen_lowpart (mode, t));
44545 return;
44547 case V8HImode:
44548 use_vec_merge = TARGET_SSE2;
44549 break;
44550 case V4HImode:
44551 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44552 break;
44554 case V16QImode:
44555 use_vec_merge = TARGET_SSE4_1;
44556 break;
44558 case V8QImode:
44559 break;
44561 case V32QImode:
44562 half_mode = V16QImode;
44563 j = 0;
44564 n = 16;
44565 goto half;
44567 case V16HImode:
44568 half_mode = V8HImode;
44569 j = 1;
44570 n = 8;
44571 goto half;
44573 case V8SImode:
44574 half_mode = V4SImode;
44575 j = 2;
44576 n = 4;
44577 goto half;
44579 case V4DImode:
44580 half_mode = V2DImode;
44581 j = 3;
44582 n = 2;
44583 goto half;
44585 case V8SFmode:
44586 half_mode = V4SFmode;
44587 j = 4;
44588 n = 4;
44589 goto half;
44591 case V4DFmode:
44592 half_mode = V2DFmode;
44593 j = 5;
44594 n = 2;
44595 goto half;
44597 half:
44598 /* Compute offset. */
44599 i = elt / n;
44600 elt %= n;
44602 gcc_assert (i <= 1);
44604 /* Extract the half. */
44605 tmp = gen_reg_rtx (half_mode);
44606 emit_insn (gen_extract[j][i] (tmp, target));
44608 /* Put val in tmp at elt. */
44609 ix86_expand_vector_set (false, tmp, val, elt);
44611 /* Put it back. */
44612 emit_insn (gen_insert[j][i] (target, target, tmp));
44613 return;
44615 case V8DFmode:
44616 if (TARGET_AVX512F)
44618 tmp = gen_reg_rtx (mode);
44619 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44620 gen_rtx_VEC_DUPLICATE (mode, val)));
44621 emit_insn (gen_avx512f_blendmv8df (target, tmp, target,
44622 force_reg (QImode, GEN_INT (1 << elt))));
44623 return;
44625 else
44626 break;
44627 case V8DImode:
44628 if (TARGET_AVX512F)
44630 tmp = gen_reg_rtx (mode);
44631 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44632 gen_rtx_VEC_DUPLICATE (mode, val)));
44633 emit_insn (gen_avx512f_blendmv8di (target, tmp, target,
44634 force_reg (QImode, GEN_INT (1 << elt))));
44635 return;
44637 else
44638 break;
44639 case V16SFmode:
44640 if (TARGET_AVX512F)
44642 tmp = gen_reg_rtx (mode);
44643 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44644 gen_rtx_VEC_DUPLICATE (mode, val)));
44645 emit_insn (gen_avx512f_blendmv16sf (target, tmp, target,
44646 force_reg (HImode, GEN_INT (1 << elt))));
44647 return;
44649 else
44650 break;
44651 case V16SImode:
44652 if (TARGET_AVX512F)
44654 tmp = gen_reg_rtx (mode);
44655 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44656 gen_rtx_VEC_DUPLICATE (mode, val)));
44657 emit_insn (gen_avx512f_blendmv16si (target, tmp, target,
44658 force_reg (HImode, GEN_INT (1 << elt))));
44659 return;
44661 else
44662 break;
44663 case V32HImode:
44664 if (TARGET_AVX512F && TARGET_AVX512BW)
44666 tmp = gen_reg_rtx (mode);
44667 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44668 gen_rtx_VEC_DUPLICATE (mode, val)));
44669 emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target,
44670 force_reg (SImode, GEN_INT (1 << elt))));
44671 return;
44673 else
44674 break;
44675 case V64QImode:
44676 if (TARGET_AVX512F && TARGET_AVX512BW)
44678 tmp = gen_reg_rtx (mode);
44679 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44680 gen_rtx_VEC_DUPLICATE (mode, val)));
44681 emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target,
44682 force_reg (DImode, GEN_INT (1 << elt))));
44683 return;
44685 else
44686 break;
44688 default:
44689 break;
44692 if (use_vec_merge)
44694 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
44695 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
44696 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44698 else
44700 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
44702 emit_move_insn (mem, target);
44704 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
44705 emit_move_insn (tmp, val);
44707 emit_move_insn (target, mem);
44711 void
44712 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
44714 machine_mode mode = GET_MODE (vec);
44715 machine_mode inner_mode = GET_MODE_INNER (mode);
44716 bool use_vec_extr = false;
44717 rtx tmp;
44719 switch (mode)
44721 case V2SImode:
44722 case V2SFmode:
44723 if (!mmx_ok)
44724 break;
44725 /* FALLTHRU */
44727 case V2DFmode:
44728 case V2DImode:
44729 use_vec_extr = true;
44730 break;
44732 case V4SFmode:
44733 use_vec_extr = TARGET_SSE4_1;
44734 if (use_vec_extr)
44735 break;
44737 switch (elt)
44739 case 0:
44740 tmp = vec;
44741 break;
44743 case 1:
44744 case 3:
44745 tmp = gen_reg_rtx (mode);
44746 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
44747 GEN_INT (elt), GEN_INT (elt),
44748 GEN_INT (elt+4), GEN_INT (elt+4)));
44749 break;
44751 case 2:
44752 tmp = gen_reg_rtx (mode);
44753 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
44754 break;
44756 default:
44757 gcc_unreachable ();
44759 vec = tmp;
44760 use_vec_extr = true;
44761 elt = 0;
44762 break;
44764 case V4SImode:
44765 use_vec_extr = TARGET_SSE4_1;
44766 if (use_vec_extr)
44767 break;
44769 if (TARGET_SSE2)
44771 switch (elt)
44773 case 0:
44774 tmp = vec;
44775 break;
44777 case 1:
44778 case 3:
44779 tmp = gen_reg_rtx (mode);
44780 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
44781 GEN_INT (elt), GEN_INT (elt),
44782 GEN_INT (elt), GEN_INT (elt)));
44783 break;
44785 case 2:
44786 tmp = gen_reg_rtx (mode);
44787 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
44788 break;
44790 default:
44791 gcc_unreachable ();
44793 vec = tmp;
44794 use_vec_extr = true;
44795 elt = 0;
44797 else
44799 /* For SSE1, we have to reuse the V4SF code. */
44800 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
44801 gen_lowpart (V4SFmode, vec), elt);
44802 return;
44804 break;
44806 case V8HImode:
44807 use_vec_extr = TARGET_SSE2;
44808 break;
44809 case V4HImode:
44810 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44811 break;
44813 case V16QImode:
44814 use_vec_extr = TARGET_SSE4_1;
44815 break;
44817 case V8SFmode:
44818 if (TARGET_AVX)
44820 tmp = gen_reg_rtx (V4SFmode);
44821 if (elt < 4)
44822 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
44823 else
44824 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
44825 ix86_expand_vector_extract (false, target, tmp, elt & 3);
44826 return;
44828 break;
44830 case V4DFmode:
44831 if (TARGET_AVX)
44833 tmp = gen_reg_rtx (V2DFmode);
44834 if (elt < 2)
44835 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
44836 else
44837 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
44838 ix86_expand_vector_extract (false, target, tmp, elt & 1);
44839 return;
44841 break;
44843 case V32QImode:
44844 if (TARGET_AVX)
44846 tmp = gen_reg_rtx (V16QImode);
44847 if (elt < 16)
44848 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
44849 else
44850 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
44851 ix86_expand_vector_extract (false, target, tmp, elt & 15);
44852 return;
44854 break;
44856 case V16HImode:
44857 if (TARGET_AVX)
44859 tmp = gen_reg_rtx (V8HImode);
44860 if (elt < 8)
44861 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
44862 else
44863 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
44864 ix86_expand_vector_extract (false, target, tmp, elt & 7);
44865 return;
44867 break;
44869 case V8SImode:
44870 if (TARGET_AVX)
44872 tmp = gen_reg_rtx (V4SImode);
44873 if (elt < 4)
44874 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
44875 else
44876 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
44877 ix86_expand_vector_extract (false, target, tmp, elt & 3);
44878 return;
44880 break;
44882 case V4DImode:
44883 if (TARGET_AVX)
44885 tmp = gen_reg_rtx (V2DImode);
44886 if (elt < 2)
44887 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
44888 else
44889 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
44890 ix86_expand_vector_extract (false, target, tmp, elt & 1);
44891 return;
44893 break;
44895 case V32HImode:
44896 if (TARGET_AVX512BW)
44898 tmp = gen_reg_rtx (V16HImode);
44899 if (elt < 16)
44900 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
44901 else
44902 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
44903 ix86_expand_vector_extract (false, target, tmp, elt & 15);
44904 return;
44906 break;
44908 case V64QImode:
44909 if (TARGET_AVX512BW)
44911 tmp = gen_reg_rtx (V32QImode);
44912 if (elt < 32)
44913 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
44914 else
44915 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
44916 ix86_expand_vector_extract (false, target, tmp, elt & 31);
44917 return;
44919 break;
44921 case V16SFmode:
44922 tmp = gen_reg_rtx (V8SFmode);
44923 if (elt < 8)
44924 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
44925 else
44926 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
44927 ix86_expand_vector_extract (false, target, tmp, elt & 7);
44928 return;
44930 case V8DFmode:
44931 tmp = gen_reg_rtx (V4DFmode);
44932 if (elt < 4)
44933 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
44934 else
44935 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
44936 ix86_expand_vector_extract (false, target, tmp, elt & 3);
44937 return;
44939 case V16SImode:
44940 tmp = gen_reg_rtx (V8SImode);
44941 if (elt < 8)
44942 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
44943 else
44944 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
44945 ix86_expand_vector_extract (false, target, tmp, elt & 7);
44946 return;
44948 case V8DImode:
44949 tmp = gen_reg_rtx (V4DImode);
44950 if (elt < 4)
44951 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
44952 else
44953 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
44954 ix86_expand_vector_extract (false, target, tmp, elt & 3);
44955 return;
44957 case V8QImode:
44958 /* ??? Could extract the appropriate HImode element and shift. */
44959 default:
44960 break;
44963 if (use_vec_extr)
44965 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
44966 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
44968 /* Let the rtl optimizers know about the zero extension performed. */
44969 if (inner_mode == QImode || inner_mode == HImode)
44971 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
44972 target = gen_lowpart (SImode, target);
44975 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44977 else
44979 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
44981 emit_move_insn (mem, vec);
44983 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
44984 emit_move_insn (target, tmp);
44988 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
44989 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
44990 The upper bits of DEST are undefined, though they shouldn't cause
44991 exceptions (some bits from src or all zeros are ok). */
44993 static void
44994 emit_reduc_half (rtx dest, rtx src, int i)
44996 rtx tem, d = dest;
44997 switch (GET_MODE (src))
44999 case V4SFmode:
45000 if (i == 128)
45001 tem = gen_sse_movhlps (dest, src, src);
45002 else
45003 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
45004 GEN_INT (1 + 4), GEN_INT (1 + 4));
45005 break;
45006 case V2DFmode:
45007 tem = gen_vec_interleave_highv2df (dest, src, src);
45008 break;
45009 case V16QImode:
45010 case V8HImode:
45011 case V4SImode:
45012 case V2DImode:
45013 d = gen_reg_rtx (V1TImode);
45014 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
45015 GEN_INT (i / 2));
45016 break;
45017 case V8SFmode:
45018 if (i == 256)
45019 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
45020 else
45021 tem = gen_avx_shufps256 (dest, src, src,
45022 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
45023 break;
45024 case V4DFmode:
45025 if (i == 256)
45026 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
45027 else
45028 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
45029 break;
45030 case V32QImode:
45031 case V16HImode:
45032 case V8SImode:
45033 case V4DImode:
45034 if (i == 256)
45036 if (GET_MODE (dest) != V4DImode)
45037 d = gen_reg_rtx (V4DImode);
45038 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
45039 gen_lowpart (V4DImode, src),
45040 const1_rtx);
45042 else
45044 d = gen_reg_rtx (V2TImode);
45045 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
45046 GEN_INT (i / 2));
45048 break;
45049 case V64QImode:
45050 case V32HImode:
45051 case V16SImode:
45052 case V16SFmode:
45053 case V8DImode:
45054 case V8DFmode:
45055 if (i > 128)
45056 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
45057 gen_lowpart (V16SImode, src),
45058 gen_lowpart (V16SImode, src),
45059 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
45060 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
45061 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
45062 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
45063 GEN_INT (0xC), GEN_INT (0xD),
45064 GEN_INT (0xE), GEN_INT (0xF),
45065 GEN_INT (0x10), GEN_INT (0x11),
45066 GEN_INT (0x12), GEN_INT (0x13),
45067 GEN_INT (0x14), GEN_INT (0x15),
45068 GEN_INT (0x16), GEN_INT (0x17));
45069 else
45070 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
45071 gen_lowpart (V16SImode, src),
45072 GEN_INT (i == 128 ? 0x2 : 0x1),
45073 GEN_INT (0x3),
45074 GEN_INT (0x3),
45075 GEN_INT (0x3),
45076 GEN_INT (i == 128 ? 0x6 : 0x5),
45077 GEN_INT (0x7),
45078 GEN_INT (0x7),
45079 GEN_INT (0x7),
45080 GEN_INT (i == 128 ? 0xA : 0x9),
45081 GEN_INT (0xB),
45082 GEN_INT (0xB),
45083 GEN_INT (0xB),
45084 GEN_INT (i == 128 ? 0xE : 0xD),
45085 GEN_INT (0xF),
45086 GEN_INT (0xF),
45087 GEN_INT (0xF));
45088 break;
45089 default:
45090 gcc_unreachable ();
45092 emit_insn (tem);
45093 if (d != dest)
45094 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
45097 /* Expand a vector reduction. FN is the binary pattern to reduce;
45098 DEST is the destination; IN is the input vector. */
45100 void
45101 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
45103 rtx half, dst, vec = in;
45104 machine_mode mode = GET_MODE (in);
45105 int i;
45107 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
45108 if (TARGET_SSE4_1
45109 && mode == V8HImode
45110 && fn == gen_uminv8hi3)
45112 emit_insn (gen_sse4_1_phminposuw (dest, in));
45113 return;
45116 for (i = GET_MODE_BITSIZE (mode);
45117 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
45118 i >>= 1)
45120 half = gen_reg_rtx (mode);
45121 emit_reduc_half (half, vec, i);
45122 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
45123 dst = dest;
45124 else
45125 dst = gen_reg_rtx (mode);
45126 emit_insn (fn (dst, half, vec));
45127 vec = dst;
45131 /* Target hook for scalar_mode_supported_p. */
45132 static bool
45133 ix86_scalar_mode_supported_p (machine_mode mode)
45135 if (DECIMAL_FLOAT_MODE_P (mode))
45136 return default_decimal_float_supported_p ();
45137 else if (mode == TFmode)
45138 return true;
45139 else
45140 return default_scalar_mode_supported_p (mode);
45143 /* Implements target hook vector_mode_supported_p. */
45144 static bool
45145 ix86_vector_mode_supported_p (machine_mode mode)
45147 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
45148 return true;
45149 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
45150 return true;
45151 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
45152 return true;
45153 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
45154 return true;
45155 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
45156 return true;
45157 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
45158 return true;
45159 return false;
45162 /* Implement target hook libgcc_floating_mode_supported_p. */
45163 static bool
45164 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
45166 switch (mode)
45168 case SFmode:
45169 case DFmode:
45170 case XFmode:
45171 return true;
45173 case TFmode:
45174 #ifdef IX86_NO_LIBGCC_TFMODE
45175 return false;
45176 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
45177 return TARGET_LONG_DOUBLE_128;
45178 #else
45179 return true;
45180 #endif
45182 default:
45183 return false;
45187 /* Target hook for c_mode_for_suffix. */
45188 static machine_mode
45189 ix86_c_mode_for_suffix (char suffix)
45191 if (suffix == 'q')
45192 return TFmode;
45193 if (suffix == 'w')
45194 return XFmode;
45196 return VOIDmode;
45199 /* Worker function for TARGET_MD_ASM_CLOBBERS.
45201 We do this in the new i386 backend to maintain source compatibility
45202 with the old cc0-based compiler. */
45204 static tree
45205 ix86_md_asm_clobbers (tree, tree, tree clobbers)
45207 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
45208 clobbers);
45209 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
45210 clobbers);
45211 return clobbers;
45214 /* Implements target vector targetm.asm.encode_section_info. */
45216 static void ATTRIBUTE_UNUSED
45217 ix86_encode_section_info (tree decl, rtx rtl, int first)
45219 default_encode_section_info (decl, rtl, first);
45221 if (ix86_in_large_data_p (decl))
45222 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
45225 /* Worker function for REVERSE_CONDITION. */
45227 enum rtx_code
45228 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
45230 return (mode != CCFPmode && mode != CCFPUmode
45231 ? reverse_condition (code)
45232 : reverse_condition_maybe_unordered (code));
45235 /* Output code to perform an x87 FP register move, from OPERANDS[1]
45236 to OPERANDS[0]. */
45238 const char *
45239 output_387_reg_move (rtx insn, rtx *operands)
45241 if (REG_P (operands[0]))
45243 if (REG_P (operands[1])
45244 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45246 if (REGNO (operands[0]) == FIRST_STACK_REG)
45247 return output_387_ffreep (operands, 0);
45248 return "fstp\t%y0";
45250 if (STACK_TOP_P (operands[0]))
45251 return "fld%Z1\t%y1";
45252 return "fst\t%y0";
45254 else if (MEM_P (operands[0]))
45256 gcc_assert (REG_P (operands[1]));
45257 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45258 return "fstp%Z0\t%y0";
45259 else
45261 /* There is no non-popping store to memory for XFmode.
45262 So if we need one, follow the store with a load. */
45263 if (GET_MODE (operands[0]) == XFmode)
45264 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
45265 else
45266 return "fst%Z0\t%y0";
45269 else
45270 gcc_unreachable();
45273 /* Output code to perform a conditional jump to LABEL, if C2 flag in
45274 FP status register is set. */
45276 void
45277 ix86_emit_fp_unordered_jump (rtx label)
45279 rtx reg = gen_reg_rtx (HImode);
45280 rtx temp;
45282 emit_insn (gen_x86_fnstsw_1 (reg));
45284 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
45286 emit_insn (gen_x86_sahf_1 (reg));
45288 temp = gen_rtx_REG (CCmode, FLAGS_REG);
45289 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
45291 else
45293 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
45295 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
45296 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
45299 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
45300 gen_rtx_LABEL_REF (VOIDmode, label),
45301 pc_rtx);
45302 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
45304 emit_jump_insn (temp);
45305 predict_jump (REG_BR_PROB_BASE * 10 / 100);
45308 /* Output code to perform a log1p XFmode calculation. */
45310 void ix86_emit_i387_log1p (rtx op0, rtx op1)
45312 rtx_code_label *label1 = gen_label_rtx ();
45313 rtx_code_label *label2 = gen_label_rtx ();
45315 rtx tmp = gen_reg_rtx (XFmode);
45316 rtx tmp2 = gen_reg_rtx (XFmode);
45317 rtx test;
45319 emit_insn (gen_absxf2 (tmp, op1));
45320 test = gen_rtx_GE (VOIDmode, tmp,
45321 CONST_DOUBLE_FROM_REAL_VALUE (
45322 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
45323 XFmode));
45324 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
45326 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45327 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
45328 emit_jump (label2);
45330 emit_label (label1);
45331 emit_move_insn (tmp, CONST1_RTX (XFmode));
45332 emit_insn (gen_addxf3 (tmp, op1, tmp));
45333 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45334 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
45336 emit_label (label2);
45339 /* Emit code for round calculation. */
45340 void ix86_emit_i387_round (rtx op0, rtx op1)
45342 machine_mode inmode = GET_MODE (op1);
45343 machine_mode outmode = GET_MODE (op0);
45344 rtx e1, e2, res, tmp, tmp1, half;
45345 rtx scratch = gen_reg_rtx (HImode);
45346 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
45347 rtx_code_label *jump_label = gen_label_rtx ();
45348 rtx insn;
45349 rtx (*gen_abs) (rtx, rtx);
45350 rtx (*gen_neg) (rtx, rtx);
45352 switch (inmode)
45354 case SFmode:
45355 gen_abs = gen_abssf2;
45356 break;
45357 case DFmode:
45358 gen_abs = gen_absdf2;
45359 break;
45360 case XFmode:
45361 gen_abs = gen_absxf2;
45362 break;
45363 default:
45364 gcc_unreachable ();
45367 switch (outmode)
45369 case SFmode:
45370 gen_neg = gen_negsf2;
45371 break;
45372 case DFmode:
45373 gen_neg = gen_negdf2;
45374 break;
45375 case XFmode:
45376 gen_neg = gen_negxf2;
45377 break;
45378 case HImode:
45379 gen_neg = gen_neghi2;
45380 break;
45381 case SImode:
45382 gen_neg = gen_negsi2;
45383 break;
45384 case DImode:
45385 gen_neg = gen_negdi2;
45386 break;
45387 default:
45388 gcc_unreachable ();
45391 e1 = gen_reg_rtx (inmode);
45392 e2 = gen_reg_rtx (inmode);
45393 res = gen_reg_rtx (outmode);
45395 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
45397 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
45399 /* scratch = fxam(op1) */
45400 emit_insn (gen_rtx_SET (VOIDmode, scratch,
45401 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
45402 UNSPEC_FXAM)));
45403 /* e1 = fabs(op1) */
45404 emit_insn (gen_abs (e1, op1));
45406 /* e2 = e1 + 0.5 */
45407 half = force_reg (inmode, half);
45408 emit_insn (gen_rtx_SET (VOIDmode, e2,
45409 gen_rtx_PLUS (inmode, e1, half)));
45411 /* res = floor(e2) */
45412 if (inmode != XFmode)
45414 tmp1 = gen_reg_rtx (XFmode);
45416 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
45417 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
45419 else
45420 tmp1 = e2;
45422 switch (outmode)
45424 case SFmode:
45425 case DFmode:
45427 rtx tmp0 = gen_reg_rtx (XFmode);
45429 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
45431 emit_insn (gen_rtx_SET (VOIDmode, res,
45432 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
45433 UNSPEC_TRUNC_NOOP)));
45435 break;
45436 case XFmode:
45437 emit_insn (gen_frndintxf2_floor (res, tmp1));
45438 break;
45439 case HImode:
45440 emit_insn (gen_lfloorxfhi2 (res, tmp1));
45441 break;
45442 case SImode:
45443 emit_insn (gen_lfloorxfsi2 (res, tmp1));
45444 break;
45445 case DImode:
45446 emit_insn (gen_lfloorxfdi2 (res, tmp1));
45447 break;
45448 default:
45449 gcc_unreachable ();
45452 /* flags = signbit(a) */
45453 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
45455 /* if (flags) then res = -res */
45456 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
45457 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
45458 gen_rtx_LABEL_REF (VOIDmode, jump_label),
45459 pc_rtx);
45460 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
45461 predict_jump (REG_BR_PROB_BASE * 50 / 100);
45462 JUMP_LABEL (insn) = jump_label;
45464 emit_insn (gen_neg (res, res));
45466 emit_label (jump_label);
45467 LABEL_NUSES (jump_label) = 1;
45469 emit_move_insn (op0, res);
45472 /* Output code to perform a Newton-Rhapson approximation of a single precision
45473 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
45475 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
45477 rtx x0, x1, e0, e1;
45479 x0 = gen_reg_rtx (mode);
45480 e0 = gen_reg_rtx (mode);
45481 e1 = gen_reg_rtx (mode);
45482 x1 = gen_reg_rtx (mode);
45484 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
45486 b = force_reg (mode, b);
45488 /* x0 = rcp(b) estimate */
45489 if (mode == V16SFmode || mode == V8DFmode)
45490 emit_insn (gen_rtx_SET (VOIDmode, x0,
45491 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45492 UNSPEC_RCP14)));
45493 else
45494 emit_insn (gen_rtx_SET (VOIDmode, x0,
45495 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45496 UNSPEC_RCP)));
45498 /* e0 = x0 * b */
45499 emit_insn (gen_rtx_SET (VOIDmode, e0,
45500 gen_rtx_MULT (mode, x0, b)));
45502 /* e0 = x0 * e0 */
45503 emit_insn (gen_rtx_SET (VOIDmode, e0,
45504 gen_rtx_MULT (mode, x0, e0)));
45506 /* e1 = x0 + x0 */
45507 emit_insn (gen_rtx_SET (VOIDmode, e1,
45508 gen_rtx_PLUS (mode, x0, x0)));
45510 /* x1 = e1 - e0 */
45511 emit_insn (gen_rtx_SET (VOIDmode, x1,
45512 gen_rtx_MINUS (mode, e1, e0)));
45514 /* res = a * x1 */
45515 emit_insn (gen_rtx_SET (VOIDmode, res,
45516 gen_rtx_MULT (mode, a, x1)));
45519 /* Output code to perform a Newton-Rhapson approximation of a
45520 single precision floating point [reciprocal] square root. */
45522 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode,
45523 bool recip)
45525 rtx x0, e0, e1, e2, e3, mthree, mhalf;
45526 REAL_VALUE_TYPE r;
45527 int unspec;
45529 x0 = gen_reg_rtx (mode);
45530 e0 = gen_reg_rtx (mode);
45531 e1 = gen_reg_rtx (mode);
45532 e2 = gen_reg_rtx (mode);
45533 e3 = gen_reg_rtx (mode);
45535 real_from_integer (&r, VOIDmode, -3, SIGNED);
45536 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45538 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
45539 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45540 unspec = UNSPEC_RSQRT;
45542 if (VECTOR_MODE_P (mode))
45544 mthree = ix86_build_const_vector (mode, true, mthree);
45545 mhalf = ix86_build_const_vector (mode, true, mhalf);
45546 /* There is no 512-bit rsqrt. There is however rsqrt14. */
45547 if (GET_MODE_SIZE (mode) == 64)
45548 unspec = UNSPEC_RSQRT14;
45551 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
45552 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
45554 a = force_reg (mode, a);
45556 /* x0 = rsqrt(a) estimate */
45557 emit_insn (gen_rtx_SET (VOIDmode, x0,
45558 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
45559 unspec)));
45561 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
45562 if (!recip)
45564 rtx zero, mask;
45566 zero = gen_reg_rtx (mode);
45567 mask = gen_reg_rtx (mode);
45569 zero = force_reg (mode, CONST0_RTX(mode));
45571 /* Handle masked compare. */
45572 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
45574 mask = gen_reg_rtx (HImode);
45575 /* Imm value 0x4 corresponds to not-equal comparison. */
45576 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
45577 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
45579 else
45581 emit_insn (gen_rtx_SET (VOIDmode, mask,
45582 gen_rtx_NE (mode, zero, a)));
45584 emit_insn (gen_rtx_SET (VOIDmode, x0,
45585 gen_rtx_AND (mode, x0, mask)));
45589 /* e0 = x0 * a */
45590 emit_insn (gen_rtx_SET (VOIDmode, e0,
45591 gen_rtx_MULT (mode, x0, a)));
45592 /* e1 = e0 * x0 */
45593 emit_insn (gen_rtx_SET (VOIDmode, e1,
45594 gen_rtx_MULT (mode, e0, x0)));
45596 /* e2 = e1 - 3. */
45597 mthree = force_reg (mode, mthree);
45598 emit_insn (gen_rtx_SET (VOIDmode, e2,
45599 gen_rtx_PLUS (mode, e1, mthree)));
45601 mhalf = force_reg (mode, mhalf);
45602 if (recip)
45603 /* e3 = -.5 * x0 */
45604 emit_insn (gen_rtx_SET (VOIDmode, e3,
45605 gen_rtx_MULT (mode, x0, mhalf)));
45606 else
45607 /* e3 = -.5 * e0 */
45608 emit_insn (gen_rtx_SET (VOIDmode, e3,
45609 gen_rtx_MULT (mode, e0, mhalf)));
45610 /* ret = e2 * e3 */
45611 emit_insn (gen_rtx_SET (VOIDmode, res,
45612 gen_rtx_MULT (mode, e2, e3)));
45615 #ifdef TARGET_SOLARIS
45616 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
45618 static void
45619 i386_solaris_elf_named_section (const char *name, unsigned int flags,
45620 tree decl)
45622 /* With Binutils 2.15, the "@unwind" marker must be specified on
45623 every occurrence of the ".eh_frame" section, not just the first
45624 one. */
45625 if (TARGET_64BIT
45626 && strcmp (name, ".eh_frame") == 0)
45628 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
45629 flags & SECTION_WRITE ? "aw" : "a");
45630 return;
45633 #ifndef USE_GAS
45634 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
45636 solaris_elf_asm_comdat_section (name, flags, decl);
45637 return;
45639 #endif
45641 default_elf_asm_named_section (name, flags, decl);
45643 #endif /* TARGET_SOLARIS */
45645 /* Return the mangling of TYPE if it is an extended fundamental type. */
45647 static const char *
45648 ix86_mangle_type (const_tree type)
45650 type = TYPE_MAIN_VARIANT (type);
45652 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
45653 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
45654 return NULL;
45656 switch (TYPE_MODE (type))
45658 case TFmode:
45659 /* __float128 is "g". */
45660 return "g";
45661 case XFmode:
45662 /* "long double" or __float80 is "e". */
45663 return "e";
45664 default:
45665 return NULL;
45669 /* For 32-bit code we can save PIC register setup by using
45670 __stack_chk_fail_local hidden function instead of calling
45671 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
45672 register, so it is better to call __stack_chk_fail directly. */
45674 static tree ATTRIBUTE_UNUSED
45675 ix86_stack_protect_fail (void)
45677 return TARGET_64BIT
45678 ? default_external_stack_protect_fail ()
45679 : default_hidden_stack_protect_fail ();
45682 /* Select a format to encode pointers in exception handling data. CODE
45683 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
45684 true if the symbol may be affected by dynamic relocations.
45686 ??? All x86 object file formats are capable of representing this.
45687 After all, the relocation needed is the same as for the call insn.
45688 Whether or not a particular assembler allows us to enter such, I
45689 guess we'll have to see. */
45691 asm_preferred_eh_data_format (int code, int global)
45693 if (flag_pic)
45695 int type = DW_EH_PE_sdata8;
45696 if (!TARGET_64BIT
45697 || ix86_cmodel == CM_SMALL_PIC
45698 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
45699 type = DW_EH_PE_sdata4;
45700 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
45702 if (ix86_cmodel == CM_SMALL
45703 || (ix86_cmodel == CM_MEDIUM && code))
45704 return DW_EH_PE_udata4;
45705 return DW_EH_PE_absptr;
45708 /* Expand copysign from SIGN to the positive value ABS_VALUE
45709 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
45710 the sign-bit. */
45711 static void
45712 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
45714 machine_mode mode = GET_MODE (sign);
45715 rtx sgn = gen_reg_rtx (mode);
45716 if (mask == NULL_RTX)
45718 machine_mode vmode;
45720 if (mode == SFmode)
45721 vmode = V4SFmode;
45722 else if (mode == DFmode)
45723 vmode = V2DFmode;
45724 else
45725 vmode = mode;
45727 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
45728 if (!VECTOR_MODE_P (mode))
45730 /* We need to generate a scalar mode mask in this case. */
45731 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45732 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45733 mask = gen_reg_rtx (mode);
45734 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
45737 else
45738 mask = gen_rtx_NOT (mode, mask);
45739 emit_insn (gen_rtx_SET (VOIDmode, sgn,
45740 gen_rtx_AND (mode, mask, sign)));
45741 emit_insn (gen_rtx_SET (VOIDmode, result,
45742 gen_rtx_IOR (mode, abs_value, sgn)));
45745 /* Expand fabs (OP0) and return a new rtx that holds the result. The
45746 mask for masking out the sign-bit is stored in *SMASK, if that is
45747 non-null. */
45748 static rtx
45749 ix86_expand_sse_fabs (rtx op0, rtx *smask)
45751 machine_mode vmode, mode = GET_MODE (op0);
45752 rtx xa, mask;
45754 xa = gen_reg_rtx (mode);
45755 if (mode == SFmode)
45756 vmode = V4SFmode;
45757 else if (mode == DFmode)
45758 vmode = V2DFmode;
45759 else
45760 vmode = mode;
45761 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
45762 if (!VECTOR_MODE_P (mode))
45764 /* We need to generate a scalar mode mask in this case. */
45765 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45766 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45767 mask = gen_reg_rtx (mode);
45768 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
45770 emit_insn (gen_rtx_SET (VOIDmode, xa,
45771 gen_rtx_AND (mode, op0, mask)));
45773 if (smask)
45774 *smask = mask;
45776 return xa;
45779 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
45780 swapping the operands if SWAP_OPERANDS is true. The expanded
45781 code is a forward jump to a newly created label in case the
45782 comparison is true. The generated label rtx is returned. */
45783 static rtx_code_label *
45784 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
45785 bool swap_operands)
45787 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
45788 rtx_code_label *label;
45789 rtx tmp;
45791 if (swap_operands)
45792 std::swap (op0, op1);
45794 label = gen_label_rtx ();
45795 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
45796 emit_insn (gen_rtx_SET (VOIDmode, tmp,
45797 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
45798 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
45799 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
45800 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
45801 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
45802 JUMP_LABEL (tmp) = label;
45804 return label;
45807 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
45808 using comparison code CODE. Operands are swapped for the comparison if
45809 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
45810 static rtx
45811 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
45812 bool swap_operands)
45814 rtx (*insn)(rtx, rtx, rtx, rtx);
45815 machine_mode mode = GET_MODE (op0);
45816 rtx mask = gen_reg_rtx (mode);
45818 if (swap_operands)
45819 std::swap (op0, op1);
45821 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
45823 emit_insn (insn (mask, op0, op1,
45824 gen_rtx_fmt_ee (code, mode, op0, op1)));
45825 return mask;
45828 /* Generate and return a rtx of mode MODE for 2**n where n is the number
45829 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
45830 static rtx
45831 ix86_gen_TWO52 (machine_mode mode)
45833 REAL_VALUE_TYPE TWO52r;
45834 rtx TWO52;
45836 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
45837 TWO52 = const_double_from_real_value (TWO52r, mode);
45838 TWO52 = force_reg (mode, TWO52);
45840 return TWO52;
45843 /* Expand SSE sequence for computing lround from OP1 storing
45844 into OP0. */
45845 void
45846 ix86_expand_lround (rtx op0, rtx op1)
45848 /* C code for the stuff we're doing below:
45849 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
45850 return (long)tmp;
45852 machine_mode mode = GET_MODE (op1);
45853 const struct real_format *fmt;
45854 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
45855 rtx adj;
45857 /* load nextafter (0.5, 0.0) */
45858 fmt = REAL_MODE_FORMAT (mode);
45859 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
45860 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
45862 /* adj = copysign (0.5, op1) */
45863 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
45864 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
45866 /* adj = op1 + adj */
45867 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
45869 /* op0 = (imode)adj */
45870 expand_fix (op0, adj, 0);
45873 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
45874 into OPERAND0. */
45875 void
45876 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
45878 /* C code for the stuff we're doing below (for do_floor):
45879 xi = (long)op1;
45880 xi -= (double)xi > op1 ? 1 : 0;
45881 return xi;
45883 machine_mode fmode = GET_MODE (op1);
45884 machine_mode imode = GET_MODE (op0);
45885 rtx ireg, freg, tmp;
45886 rtx_code_label *label;
45888 /* reg = (long)op1 */
45889 ireg = gen_reg_rtx (imode);
45890 expand_fix (ireg, op1, 0);
45892 /* freg = (double)reg */
45893 freg = gen_reg_rtx (fmode);
45894 expand_float (freg, ireg, 0);
45896 /* ireg = (freg > op1) ? ireg - 1 : ireg */
45897 label = ix86_expand_sse_compare_and_jump (UNLE,
45898 freg, op1, !do_floor);
45899 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
45900 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
45901 emit_move_insn (ireg, tmp);
45903 emit_label (label);
45904 LABEL_NUSES (label) = 1;
45906 emit_move_insn (op0, ireg);
45909 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
45910 result in OPERAND0. */
45911 void
45912 ix86_expand_rint (rtx operand0, rtx operand1)
45914 /* C code for the stuff we're doing below:
45915 xa = fabs (operand1);
45916 if (!isless (xa, 2**52))
45917 return operand1;
45918 xa = xa + 2**52 - 2**52;
45919 return copysign (xa, operand1);
45921 machine_mode mode = GET_MODE (operand0);
45922 rtx res, xa, TWO52, mask;
45923 rtx_code_label *label;
45925 res = gen_reg_rtx (mode);
45926 emit_move_insn (res, operand1);
45928 /* xa = abs (operand1) */
45929 xa = ix86_expand_sse_fabs (res, &mask);
45931 /* if (!isless (xa, TWO52)) goto label; */
45932 TWO52 = ix86_gen_TWO52 (mode);
45933 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45935 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
45936 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
45938 ix86_sse_copysign_to_positive (res, xa, res, mask);
45940 emit_label (label);
45941 LABEL_NUSES (label) = 1;
45943 emit_move_insn (operand0, res);
45946 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
45947 into OPERAND0. */
45948 void
45949 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
45951 /* C code for the stuff we expand below.
45952 double xa = fabs (x), x2;
45953 if (!isless (xa, TWO52))
45954 return x;
45955 xa = xa + TWO52 - TWO52;
45956 x2 = copysign (xa, x);
45957 Compensate. Floor:
45958 if (x2 > x)
45959 x2 -= 1;
45960 Compensate. Ceil:
45961 if (x2 < x)
45962 x2 -= -1;
45963 return x2;
45965 machine_mode mode = GET_MODE (operand0);
45966 rtx xa, TWO52, tmp, one, res, mask;
45967 rtx_code_label *label;
45969 TWO52 = ix86_gen_TWO52 (mode);
45971 /* Temporary for holding the result, initialized to the input
45972 operand to ease control flow. */
45973 res = gen_reg_rtx (mode);
45974 emit_move_insn (res, operand1);
45976 /* xa = abs (operand1) */
45977 xa = ix86_expand_sse_fabs (res, &mask);
45979 /* if (!isless (xa, TWO52)) goto label; */
45980 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45982 /* xa = xa + TWO52 - TWO52; */
45983 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
45984 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
45986 /* xa = copysign (xa, operand1) */
45987 ix86_sse_copysign_to_positive (xa, xa, res, mask);
45989 /* generate 1.0 or -1.0 */
45990 one = force_reg (mode,
45991 const_double_from_real_value (do_floor
45992 ? dconst1 : dconstm1, mode));
45994 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
45995 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
45996 emit_insn (gen_rtx_SET (VOIDmode, tmp,
45997 gen_rtx_AND (mode, one, tmp)));
45998 /* We always need to subtract here to preserve signed zero. */
45999 tmp = expand_simple_binop (mode, MINUS,
46000 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46001 emit_move_insn (res, tmp);
46003 emit_label (label);
46004 LABEL_NUSES (label) = 1;
46006 emit_move_insn (operand0, res);
46009 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46010 into OPERAND0. */
46011 void
46012 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
46014 /* C code for the stuff we expand below.
46015 double xa = fabs (x), x2;
46016 if (!isless (xa, TWO52))
46017 return x;
46018 x2 = (double)(long)x;
46019 Compensate. Floor:
46020 if (x2 > x)
46021 x2 -= 1;
46022 Compensate. Ceil:
46023 if (x2 < x)
46024 x2 += 1;
46025 if (HONOR_SIGNED_ZEROS (mode))
46026 return copysign (x2, x);
46027 return x2;
46029 machine_mode mode = GET_MODE (operand0);
46030 rtx xa, xi, TWO52, tmp, one, res, mask;
46031 rtx_code_label *label;
46033 TWO52 = ix86_gen_TWO52 (mode);
46035 /* Temporary for holding the result, initialized to the input
46036 operand to ease control flow. */
46037 res = gen_reg_rtx (mode);
46038 emit_move_insn (res, operand1);
46040 /* xa = abs (operand1) */
46041 xa = ix86_expand_sse_fabs (res, &mask);
46043 /* if (!isless (xa, TWO52)) goto label; */
46044 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46046 /* xa = (double)(long)x */
46047 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46048 expand_fix (xi, res, 0);
46049 expand_float (xa, xi, 0);
46051 /* generate 1.0 */
46052 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46054 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46055 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46056 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46057 gen_rtx_AND (mode, one, tmp)));
46058 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
46059 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46060 emit_move_insn (res, tmp);
46062 if (HONOR_SIGNED_ZEROS (mode))
46063 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46065 emit_label (label);
46066 LABEL_NUSES (label) = 1;
46068 emit_move_insn (operand0, res);
46071 /* Expand SSE sequence for computing round from OPERAND1 storing
46072 into OPERAND0. Sequence that works without relying on DImode truncation
46073 via cvttsd2siq that is only available on 64bit targets. */
46074 void
46075 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
46077 /* C code for the stuff we expand below.
46078 double xa = fabs (x), xa2, x2;
46079 if (!isless (xa, TWO52))
46080 return x;
46081 Using the absolute value and copying back sign makes
46082 -0.0 -> -0.0 correct.
46083 xa2 = xa + TWO52 - TWO52;
46084 Compensate.
46085 dxa = xa2 - xa;
46086 if (dxa <= -0.5)
46087 xa2 += 1;
46088 else if (dxa > 0.5)
46089 xa2 -= 1;
46090 x2 = copysign (xa2, x);
46091 return x2;
46093 machine_mode mode = GET_MODE (operand0);
46094 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
46095 rtx_code_label *label;
46097 TWO52 = ix86_gen_TWO52 (mode);
46099 /* Temporary for holding the result, initialized to the input
46100 operand to ease control flow. */
46101 res = gen_reg_rtx (mode);
46102 emit_move_insn (res, operand1);
46104 /* xa = abs (operand1) */
46105 xa = ix86_expand_sse_fabs (res, &mask);
46107 /* if (!isless (xa, TWO52)) goto label; */
46108 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46110 /* xa2 = xa + TWO52 - TWO52; */
46111 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46112 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
46114 /* dxa = xa2 - xa; */
46115 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
46117 /* generate 0.5, 1.0 and -0.5 */
46118 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
46119 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
46120 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
46121 0, OPTAB_DIRECT);
46123 /* Compensate. */
46124 tmp = gen_reg_rtx (mode);
46125 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
46126 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
46127 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46128 gen_rtx_AND (mode, one, tmp)));
46129 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46130 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
46131 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
46132 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46133 gen_rtx_AND (mode, one, tmp)));
46134 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46136 /* res = copysign (xa2, operand1) */
46137 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
46139 emit_label (label);
46140 LABEL_NUSES (label) = 1;
46142 emit_move_insn (operand0, res);
46145 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46146 into OPERAND0. */
46147 void
46148 ix86_expand_trunc (rtx operand0, rtx operand1)
46150 /* C code for SSE variant we expand below.
46151 double xa = fabs (x), x2;
46152 if (!isless (xa, TWO52))
46153 return x;
46154 x2 = (double)(long)x;
46155 if (HONOR_SIGNED_ZEROS (mode))
46156 return copysign (x2, x);
46157 return x2;
46159 machine_mode mode = GET_MODE (operand0);
46160 rtx xa, xi, TWO52, res, mask;
46161 rtx_code_label *label;
46163 TWO52 = ix86_gen_TWO52 (mode);
46165 /* Temporary for holding the result, initialized to the input
46166 operand to ease control flow. */
46167 res = gen_reg_rtx (mode);
46168 emit_move_insn (res, operand1);
46170 /* xa = abs (operand1) */
46171 xa = ix86_expand_sse_fabs (res, &mask);
46173 /* if (!isless (xa, TWO52)) goto label; */
46174 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46176 /* x = (double)(long)x */
46177 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46178 expand_fix (xi, res, 0);
46179 expand_float (res, xi, 0);
46181 if (HONOR_SIGNED_ZEROS (mode))
46182 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46184 emit_label (label);
46185 LABEL_NUSES (label) = 1;
46187 emit_move_insn (operand0, res);
46190 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46191 into OPERAND0. */
46192 void
46193 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
46195 machine_mode mode = GET_MODE (operand0);
46196 rtx xa, mask, TWO52, one, res, smask, tmp;
46197 rtx_code_label *label;
46199 /* C code for SSE variant we expand below.
46200 double xa = fabs (x), x2;
46201 if (!isless (xa, TWO52))
46202 return x;
46203 xa2 = xa + TWO52 - TWO52;
46204 Compensate:
46205 if (xa2 > xa)
46206 xa2 -= 1.0;
46207 x2 = copysign (xa2, x);
46208 return x2;
46211 TWO52 = ix86_gen_TWO52 (mode);
46213 /* Temporary for holding the result, initialized to the input
46214 operand to ease control flow. */
46215 res = gen_reg_rtx (mode);
46216 emit_move_insn (res, operand1);
46218 /* xa = abs (operand1) */
46219 xa = ix86_expand_sse_fabs (res, &smask);
46221 /* if (!isless (xa, TWO52)) goto label; */
46222 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46224 /* res = xa + TWO52 - TWO52; */
46225 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46226 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
46227 emit_move_insn (res, tmp);
46229 /* generate 1.0 */
46230 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46232 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
46233 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
46234 emit_insn (gen_rtx_SET (VOIDmode, mask,
46235 gen_rtx_AND (mode, mask, one)));
46236 tmp = expand_simple_binop (mode, MINUS,
46237 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
46238 emit_move_insn (res, tmp);
46240 /* res = copysign (res, operand1) */
46241 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
46243 emit_label (label);
46244 LABEL_NUSES (label) = 1;
46246 emit_move_insn (operand0, res);
46249 /* Expand SSE sequence for computing round from OPERAND1 storing
46250 into OPERAND0. */
46251 void
46252 ix86_expand_round (rtx operand0, rtx operand1)
46254 /* C code for the stuff we're doing below:
46255 double xa = fabs (x);
46256 if (!isless (xa, TWO52))
46257 return x;
46258 xa = (double)(long)(xa + nextafter (0.5, 0.0));
46259 return copysign (xa, x);
46261 machine_mode mode = GET_MODE (operand0);
46262 rtx res, TWO52, xa, xi, half, mask;
46263 rtx_code_label *label;
46264 const struct real_format *fmt;
46265 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46267 /* Temporary for holding the result, initialized to the input
46268 operand to ease control flow. */
46269 res = gen_reg_rtx (mode);
46270 emit_move_insn (res, operand1);
46272 TWO52 = ix86_gen_TWO52 (mode);
46273 xa = ix86_expand_sse_fabs (res, &mask);
46274 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46276 /* load nextafter (0.5, 0.0) */
46277 fmt = REAL_MODE_FORMAT (mode);
46278 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46279 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46281 /* xa = xa + 0.5 */
46282 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
46283 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
46285 /* xa = (double)(int64_t)xa */
46286 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46287 expand_fix (xi, xa, 0);
46288 expand_float (xa, xi, 0);
46290 /* res = copysign (xa, operand1) */
46291 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
46293 emit_label (label);
46294 LABEL_NUSES (label) = 1;
46296 emit_move_insn (operand0, res);
46299 /* Expand SSE sequence for computing round
46300 from OP1 storing into OP0 using sse4 round insn. */
46301 void
46302 ix86_expand_round_sse4 (rtx op0, rtx op1)
46304 machine_mode mode = GET_MODE (op0);
46305 rtx e1, e2, res, half;
46306 const struct real_format *fmt;
46307 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46308 rtx (*gen_copysign) (rtx, rtx, rtx);
46309 rtx (*gen_round) (rtx, rtx, rtx);
46311 switch (mode)
46313 case SFmode:
46314 gen_copysign = gen_copysignsf3;
46315 gen_round = gen_sse4_1_roundsf2;
46316 break;
46317 case DFmode:
46318 gen_copysign = gen_copysigndf3;
46319 gen_round = gen_sse4_1_rounddf2;
46320 break;
46321 default:
46322 gcc_unreachable ();
46325 /* round (a) = trunc (a + copysign (0.5, a)) */
46327 /* load nextafter (0.5, 0.0) */
46328 fmt = REAL_MODE_FORMAT (mode);
46329 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46330 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46331 half = const_double_from_real_value (pred_half, mode);
46333 /* e1 = copysign (0.5, op1) */
46334 e1 = gen_reg_rtx (mode);
46335 emit_insn (gen_copysign (e1, half, op1));
46337 /* e2 = op1 + e1 */
46338 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
46340 /* res = trunc (e2) */
46341 res = gen_reg_rtx (mode);
46342 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
46344 emit_move_insn (op0, res);
46348 /* Table of valid machine attributes. */
46349 static const struct attribute_spec ix86_attribute_table[] =
46351 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
46352 affects_type_identity } */
46353 /* Stdcall attribute says callee is responsible for popping arguments
46354 if they are not variable. */
46355 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46356 true },
46357 /* Fastcall attribute says callee is responsible for popping arguments
46358 if they are not variable. */
46359 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46360 true },
46361 /* Thiscall attribute says callee is responsible for popping arguments
46362 if they are not variable. */
46363 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46364 true },
46365 /* Cdecl attribute says the callee is a normal C declaration */
46366 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46367 true },
46368 /* Regparm attribute specifies how many integer arguments are to be
46369 passed in registers. */
46370 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
46371 true },
46372 /* Sseregparm attribute says we are using x86_64 calling conventions
46373 for FP arguments. */
46374 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46375 true },
46376 /* The transactional memory builtins are implicitly regparm or fastcall
46377 depending on the ABI. Override the generic do-nothing attribute that
46378 these builtins were declared with. */
46379 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
46380 true },
46381 /* force_align_arg_pointer says this function realigns the stack at entry. */
46382 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
46383 false, true, true, ix86_handle_cconv_attribute, false },
46384 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46385 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
46386 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
46387 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
46388 false },
46389 #endif
46390 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46391 false },
46392 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46393 false },
46394 #ifdef SUBTARGET_ATTRIBUTE_TABLE
46395 SUBTARGET_ATTRIBUTE_TABLE,
46396 #endif
46397 /* ms_abi and sysv_abi calling convention function attributes. */
46398 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46399 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46400 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
46401 false },
46402 { "callee_pop_aggregate_return", 1, 1, false, true, true,
46403 ix86_handle_callee_pop_aggregate_return, true },
46404 /* End element. */
46405 { NULL, 0, 0, false, false, false, NULL, false }
46408 /* Implement targetm.vectorize.builtin_vectorization_cost. */
46409 static int
46410 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
46411 tree vectype, int)
46413 unsigned elements;
46415 switch (type_of_cost)
46417 case scalar_stmt:
46418 return ix86_cost->scalar_stmt_cost;
46420 case scalar_load:
46421 return ix86_cost->scalar_load_cost;
46423 case scalar_store:
46424 return ix86_cost->scalar_store_cost;
46426 case vector_stmt:
46427 return ix86_cost->vec_stmt_cost;
46429 case vector_load:
46430 return ix86_cost->vec_align_load_cost;
46432 case vector_store:
46433 return ix86_cost->vec_store_cost;
46435 case vec_to_scalar:
46436 return ix86_cost->vec_to_scalar_cost;
46438 case scalar_to_vec:
46439 return ix86_cost->scalar_to_vec_cost;
46441 case unaligned_load:
46442 case unaligned_store:
46443 return ix86_cost->vec_unalign_load_cost;
46445 case cond_branch_taken:
46446 return ix86_cost->cond_taken_branch_cost;
46448 case cond_branch_not_taken:
46449 return ix86_cost->cond_not_taken_branch_cost;
46451 case vec_perm:
46452 case vec_promote_demote:
46453 return ix86_cost->vec_stmt_cost;
46455 case vec_construct:
46456 elements = TYPE_VECTOR_SUBPARTS (vectype);
46457 return elements / 2 + 1;
46459 default:
46460 gcc_unreachable ();
46464 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
46465 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
46466 insn every time. */
46468 static GTY(()) rtx_insn *vselect_insn;
46470 /* Initialize vselect_insn. */
46472 static void
46473 init_vselect_insn (void)
46475 unsigned i;
46476 rtx x;
46478 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
46479 for (i = 0; i < MAX_VECT_LEN; ++i)
46480 XVECEXP (x, 0, i) = const0_rtx;
46481 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
46482 const0_rtx), x);
46483 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
46484 start_sequence ();
46485 vselect_insn = emit_insn (x);
46486 end_sequence ();
46489 /* Construct (set target (vec_select op0 (parallel perm))) and
46490 return true if that's a valid instruction in the active ISA. */
46492 static bool
46493 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
46494 unsigned nelt, bool testing_p)
46496 unsigned int i;
46497 rtx x, save_vconcat;
46498 int icode;
46500 if (vselect_insn == NULL_RTX)
46501 init_vselect_insn ();
46503 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
46504 PUT_NUM_ELEM (XVEC (x, 0), nelt);
46505 for (i = 0; i < nelt; ++i)
46506 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
46507 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46508 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
46509 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
46510 SET_DEST (PATTERN (vselect_insn)) = target;
46511 icode = recog_memoized (vselect_insn);
46513 if (icode >= 0 && !testing_p)
46514 emit_insn (copy_rtx (PATTERN (vselect_insn)));
46516 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
46517 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
46518 INSN_CODE (vselect_insn) = -1;
46520 return icode >= 0;
46523 /* Similar, but generate a vec_concat from op0 and op1 as well. */
46525 static bool
46526 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
46527 const unsigned char *perm, unsigned nelt,
46528 bool testing_p)
46530 machine_mode v2mode;
46531 rtx x;
46532 bool ok;
46534 if (vselect_insn == NULL_RTX)
46535 init_vselect_insn ();
46537 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
46538 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46539 PUT_MODE (x, v2mode);
46540 XEXP (x, 0) = op0;
46541 XEXP (x, 1) = op1;
46542 ok = expand_vselect (target, x, perm, nelt, testing_p);
46543 XEXP (x, 0) = const0_rtx;
46544 XEXP (x, 1) = const0_rtx;
46545 return ok;
46548 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46549 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
46551 static bool
46552 expand_vec_perm_blend (struct expand_vec_perm_d *d)
46554 machine_mode vmode = d->vmode;
46555 unsigned i, mask, nelt = d->nelt;
46556 rtx target, op0, op1, x;
46557 rtx rperm[32], vperm;
46559 if (d->one_operand_p)
46560 return false;
46561 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
46562 && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)
46564 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
46566 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
46568 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
46570 else
46571 return false;
46573 /* This is a blend, not a permute. Elements must stay in their
46574 respective lanes. */
46575 for (i = 0; i < nelt; ++i)
46577 unsigned e = d->perm[i];
46578 if (!(e == i || e == i + nelt))
46579 return false;
46582 if (d->testing_p)
46583 return true;
46585 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
46586 decision should be extracted elsewhere, so that we only try that
46587 sequence once all budget==3 options have been tried. */
46588 target = d->target;
46589 op0 = d->op0;
46590 op1 = d->op1;
46591 mask = 0;
46593 switch (vmode)
46595 case V8DFmode:
46596 case V16SFmode:
46597 case V4DFmode:
46598 case V8SFmode:
46599 case V2DFmode:
46600 case V4SFmode:
46601 case V8HImode:
46602 case V8SImode:
46603 case V32HImode:
46604 case V64QImode:
46605 case V16SImode:
46606 case V8DImode:
46607 for (i = 0; i < nelt; ++i)
46608 mask |= (d->perm[i] >= nelt) << i;
46609 break;
46611 case V2DImode:
46612 for (i = 0; i < 2; ++i)
46613 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
46614 vmode = V8HImode;
46615 goto do_subreg;
46617 case V4SImode:
46618 for (i = 0; i < 4; ++i)
46619 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46620 vmode = V8HImode;
46621 goto do_subreg;
46623 case V16QImode:
46624 /* See if bytes move in pairs so we can use pblendw with
46625 an immediate argument, rather than pblendvb with a vector
46626 argument. */
46627 for (i = 0; i < 16; i += 2)
46628 if (d->perm[i] + 1 != d->perm[i + 1])
46630 use_pblendvb:
46631 for (i = 0; i < nelt; ++i)
46632 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
46634 finish_pblendvb:
46635 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
46636 vperm = force_reg (vmode, vperm);
46638 if (GET_MODE_SIZE (vmode) == 16)
46639 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
46640 else
46641 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
46642 if (target != d->target)
46643 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46644 return true;
46647 for (i = 0; i < 8; ++i)
46648 mask |= (d->perm[i * 2] >= 16) << i;
46649 vmode = V8HImode;
46650 /* FALLTHRU */
46652 do_subreg:
46653 target = gen_reg_rtx (vmode);
46654 op0 = gen_lowpart (vmode, op0);
46655 op1 = gen_lowpart (vmode, op1);
46656 break;
46658 case V32QImode:
46659 /* See if bytes move in pairs. If not, vpblendvb must be used. */
46660 for (i = 0; i < 32; i += 2)
46661 if (d->perm[i] + 1 != d->perm[i + 1])
46662 goto use_pblendvb;
46663 /* See if bytes move in quadruplets. If yes, vpblendd
46664 with immediate can be used. */
46665 for (i = 0; i < 32; i += 4)
46666 if (d->perm[i] + 2 != d->perm[i + 2])
46667 break;
46668 if (i < 32)
46670 /* See if bytes move the same in both lanes. If yes,
46671 vpblendw with immediate can be used. */
46672 for (i = 0; i < 16; i += 2)
46673 if (d->perm[i] + 16 != d->perm[i + 16])
46674 goto use_pblendvb;
46676 /* Use vpblendw. */
46677 for (i = 0; i < 16; ++i)
46678 mask |= (d->perm[i * 2] >= 32) << i;
46679 vmode = V16HImode;
46680 goto do_subreg;
46683 /* Use vpblendd. */
46684 for (i = 0; i < 8; ++i)
46685 mask |= (d->perm[i * 4] >= 32) << i;
46686 vmode = V8SImode;
46687 goto do_subreg;
46689 case V16HImode:
46690 /* See if words move in pairs. If yes, vpblendd can be used. */
46691 for (i = 0; i < 16; i += 2)
46692 if (d->perm[i] + 1 != d->perm[i + 1])
46693 break;
46694 if (i < 16)
46696 /* See if words move the same in both lanes. If not,
46697 vpblendvb must be used. */
46698 for (i = 0; i < 8; i++)
46699 if (d->perm[i] + 8 != d->perm[i + 8])
46701 /* Use vpblendvb. */
46702 for (i = 0; i < 32; ++i)
46703 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
46705 vmode = V32QImode;
46706 nelt = 32;
46707 target = gen_reg_rtx (vmode);
46708 op0 = gen_lowpart (vmode, op0);
46709 op1 = gen_lowpart (vmode, op1);
46710 goto finish_pblendvb;
46713 /* Use vpblendw. */
46714 for (i = 0; i < 16; ++i)
46715 mask |= (d->perm[i] >= 16) << i;
46716 break;
46719 /* Use vpblendd. */
46720 for (i = 0; i < 8; ++i)
46721 mask |= (d->perm[i * 2] >= 16) << i;
46722 vmode = V8SImode;
46723 goto do_subreg;
46725 case V4DImode:
46726 /* Use vpblendd. */
46727 for (i = 0; i < 4; ++i)
46728 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46729 vmode = V8SImode;
46730 goto do_subreg;
46732 default:
46733 gcc_unreachable ();
46736 /* This matches five different patterns with the different modes. */
46737 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
46738 x = gen_rtx_SET (VOIDmode, target, x);
46739 emit_insn (x);
46740 if (target != d->target)
46741 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46743 return true;
46746 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46747 in terms of the variable form of vpermilps.
46749 Note that we will have already failed the immediate input vpermilps,
46750 which requires that the high and low part shuffle be identical; the
46751 variable form doesn't require that. */
46753 static bool
46754 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
46756 rtx rperm[8], vperm;
46757 unsigned i;
46759 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
46760 return false;
46762 /* We can only permute within the 128-bit lane. */
46763 for (i = 0; i < 8; ++i)
46765 unsigned e = d->perm[i];
46766 if (i < 4 ? e >= 4 : e < 4)
46767 return false;
46770 if (d->testing_p)
46771 return true;
46773 for (i = 0; i < 8; ++i)
46775 unsigned e = d->perm[i];
46777 /* Within each 128-bit lane, the elements of op0 are numbered
46778 from 0 and the elements of op1 are numbered from 4. */
46779 if (e >= 8 + 4)
46780 e -= 8;
46781 else if (e >= 4)
46782 e -= 4;
46784 rperm[i] = GEN_INT (e);
46787 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
46788 vperm = force_reg (V8SImode, vperm);
46789 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
46791 return true;
46794 /* Return true if permutation D can be performed as VMODE permutation
46795 instead. */
46797 static bool
46798 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
46800 unsigned int i, j, chunk;
46802 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
46803 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
46804 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
46805 return false;
46807 if (GET_MODE_NUNITS (vmode) >= d->nelt)
46808 return true;
46810 chunk = d->nelt / GET_MODE_NUNITS (vmode);
46811 for (i = 0; i < d->nelt; i += chunk)
46812 if (d->perm[i] & (chunk - 1))
46813 return false;
46814 else
46815 for (j = 1; j < chunk; ++j)
46816 if (d->perm[i] + j != d->perm[i + j])
46817 return false;
46819 return true;
46822 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46823 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
46825 static bool
46826 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
46828 unsigned i, nelt, eltsz, mask;
46829 unsigned char perm[64];
46830 machine_mode vmode = V16QImode;
46831 rtx rperm[64], vperm, target, op0, op1;
46833 nelt = d->nelt;
46835 if (!d->one_operand_p)
46837 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
46839 if (TARGET_AVX2
46840 && valid_perm_using_mode_p (V2TImode, d))
46842 if (d->testing_p)
46843 return true;
46845 /* Use vperm2i128 insn. The pattern uses
46846 V4DImode instead of V2TImode. */
46847 target = d->target;
46848 if (d->vmode != V4DImode)
46849 target = gen_reg_rtx (V4DImode);
46850 op0 = gen_lowpart (V4DImode, d->op0);
46851 op1 = gen_lowpart (V4DImode, d->op1);
46852 rperm[0]
46853 = GEN_INT ((d->perm[0] / (nelt / 2))
46854 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
46855 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
46856 if (target != d->target)
46857 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46858 return true;
46860 return false;
46863 else
46865 if (GET_MODE_SIZE (d->vmode) == 16)
46867 if (!TARGET_SSSE3)
46868 return false;
46870 else if (GET_MODE_SIZE (d->vmode) == 32)
46872 if (!TARGET_AVX2)
46873 return false;
46875 /* V4DImode should be already handled through
46876 expand_vselect by vpermq instruction. */
46877 gcc_assert (d->vmode != V4DImode);
46879 vmode = V32QImode;
46880 if (d->vmode == V8SImode
46881 || d->vmode == V16HImode
46882 || d->vmode == V32QImode)
46884 /* First see if vpermq can be used for
46885 V8SImode/V16HImode/V32QImode. */
46886 if (valid_perm_using_mode_p (V4DImode, d))
46888 for (i = 0; i < 4; i++)
46889 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
46890 if (d->testing_p)
46891 return true;
46892 target = gen_reg_rtx (V4DImode);
46893 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
46894 perm, 4, false))
46896 emit_move_insn (d->target,
46897 gen_lowpart (d->vmode, target));
46898 return true;
46900 return false;
46903 /* Next see if vpermd can be used. */
46904 if (valid_perm_using_mode_p (V8SImode, d))
46905 vmode = V8SImode;
46907 /* Or if vpermps can be used. */
46908 else if (d->vmode == V8SFmode)
46909 vmode = V8SImode;
46911 if (vmode == V32QImode)
46913 /* vpshufb only works intra lanes, it is not
46914 possible to shuffle bytes in between the lanes. */
46915 for (i = 0; i < nelt; ++i)
46916 if ((d->perm[i] ^ i) & (nelt / 2))
46917 return false;
46920 else if (GET_MODE_SIZE (d->vmode) == 64)
46922 if (!TARGET_AVX512BW)
46923 return false;
46925 /* If vpermq didn't work, vpshufb won't work either. */
46926 if (d->vmode == V8DFmode || d->vmode == V8DImode)
46927 return false;
46929 vmode = V64QImode;
46930 if (d->vmode == V16SImode
46931 || d->vmode == V32HImode
46932 || d->vmode == V64QImode)
46934 /* First see if vpermq can be used for
46935 V16SImode/V32HImode/V64QImode. */
46936 if (valid_perm_using_mode_p (V8DImode, d))
46938 for (i = 0; i < 8; i++)
46939 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
46940 if (d->testing_p)
46941 return true;
46942 target = gen_reg_rtx (V8DImode);
46943 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
46944 perm, 8, false))
46946 emit_move_insn (d->target,
46947 gen_lowpart (d->vmode, target));
46948 return true;
46950 return false;
46953 /* Next see if vpermd can be used. */
46954 if (valid_perm_using_mode_p (V16SImode, d))
46955 vmode = V16SImode;
46957 /* Or if vpermps can be used. */
46958 else if (d->vmode == V16SFmode)
46959 vmode = V16SImode;
46960 if (vmode == V64QImode)
46962 /* vpshufb only works intra lanes, it is not
46963 possible to shuffle bytes in between the lanes. */
46964 for (i = 0; i < nelt; ++i)
46965 if ((d->perm[i] ^ i) & (nelt / 4))
46966 return false;
46969 else
46970 return false;
46973 if (d->testing_p)
46974 return true;
46976 if (vmode == V8SImode)
46977 for (i = 0; i < 8; ++i)
46978 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
46979 else if (vmode == V16SImode)
46980 for (i = 0; i < 16; ++i)
46981 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
46982 else
46984 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
46985 if (!d->one_operand_p)
46986 mask = 2 * nelt - 1;
46987 else if (vmode == V16QImode)
46988 mask = nelt - 1;
46989 else if (vmode == V64QImode)
46990 mask = nelt / 4 - 1;
46991 else
46992 mask = nelt / 2 - 1;
46994 for (i = 0; i < nelt; ++i)
46996 unsigned j, e = d->perm[i] & mask;
46997 for (j = 0; j < eltsz; ++j)
46998 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
47002 vperm = gen_rtx_CONST_VECTOR (vmode,
47003 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
47004 vperm = force_reg (vmode, vperm);
47006 target = d->target;
47007 if (d->vmode != vmode)
47008 target = gen_reg_rtx (vmode);
47009 op0 = gen_lowpart (vmode, d->op0);
47010 if (d->one_operand_p)
47012 if (vmode == V16QImode)
47013 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
47014 else if (vmode == V32QImode)
47015 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
47016 else if (vmode == V64QImode)
47017 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
47018 else if (vmode == V8SFmode)
47019 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
47020 else if (vmode == V8SImode)
47021 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
47022 else if (vmode == V16SFmode)
47023 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
47024 else if (vmode == V16SImode)
47025 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
47026 else
47027 gcc_unreachable ();
47029 else
47031 op1 = gen_lowpart (vmode, d->op1);
47032 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
47034 if (target != d->target)
47035 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47037 return true;
47040 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
47041 in a single instruction. */
47043 static bool
47044 expand_vec_perm_1 (struct expand_vec_perm_d *d)
47046 unsigned i, nelt = d->nelt;
47047 unsigned char perm2[MAX_VECT_LEN];
47049 /* Check plain VEC_SELECT first, because AVX has instructions that could
47050 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
47051 input where SEL+CONCAT may not. */
47052 if (d->one_operand_p)
47054 int mask = nelt - 1;
47055 bool identity_perm = true;
47056 bool broadcast_perm = true;
47058 for (i = 0; i < nelt; i++)
47060 perm2[i] = d->perm[i] & mask;
47061 if (perm2[i] != i)
47062 identity_perm = false;
47063 if (perm2[i])
47064 broadcast_perm = false;
47067 if (identity_perm)
47069 if (!d->testing_p)
47070 emit_move_insn (d->target, d->op0);
47071 return true;
47073 else if (broadcast_perm && TARGET_AVX2)
47075 /* Use vpbroadcast{b,w,d}. */
47076 rtx (*gen) (rtx, rtx) = NULL;
47077 switch (d->vmode)
47079 case V64QImode:
47080 if (TARGET_AVX512BW)
47081 gen = gen_avx512bw_vec_dupv64qi_1;
47082 break;
47083 case V32QImode:
47084 gen = gen_avx2_pbroadcastv32qi_1;
47085 break;
47086 case V32HImode:
47087 if (TARGET_AVX512BW)
47088 gen = gen_avx512bw_vec_dupv32hi_1;
47089 break;
47090 case V16HImode:
47091 gen = gen_avx2_pbroadcastv16hi_1;
47092 break;
47093 case V16SImode:
47094 if (TARGET_AVX512F)
47095 gen = gen_avx512f_vec_dupv16si_1;
47096 break;
47097 case V8SImode:
47098 gen = gen_avx2_pbroadcastv8si_1;
47099 break;
47100 case V16QImode:
47101 gen = gen_avx2_pbroadcastv16qi;
47102 break;
47103 case V8HImode:
47104 gen = gen_avx2_pbroadcastv8hi;
47105 break;
47106 case V16SFmode:
47107 if (TARGET_AVX512F)
47108 gen = gen_avx512f_vec_dupv16sf_1;
47109 break;
47110 case V8SFmode:
47111 gen = gen_avx2_vec_dupv8sf_1;
47112 break;
47113 case V8DFmode:
47114 if (TARGET_AVX512F)
47115 gen = gen_avx512f_vec_dupv8df_1;
47116 break;
47117 case V8DImode:
47118 if (TARGET_AVX512F)
47119 gen = gen_avx512f_vec_dupv8di_1;
47120 break;
47121 /* For other modes prefer other shuffles this function creates. */
47122 default: break;
47124 if (gen != NULL)
47126 if (!d->testing_p)
47127 emit_insn (gen (d->target, d->op0));
47128 return true;
47132 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
47133 return true;
47135 /* There are plenty of patterns in sse.md that are written for
47136 SEL+CONCAT and are not replicated for a single op. Perhaps
47137 that should be changed, to avoid the nastiness here. */
47139 /* Recognize interleave style patterns, which means incrementing
47140 every other permutation operand. */
47141 for (i = 0; i < nelt; i += 2)
47143 perm2[i] = d->perm[i] & mask;
47144 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
47146 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47147 d->testing_p))
47148 return true;
47150 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
47151 if (nelt >= 4)
47153 for (i = 0; i < nelt; i += 4)
47155 perm2[i + 0] = d->perm[i + 0] & mask;
47156 perm2[i + 1] = d->perm[i + 1] & mask;
47157 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
47158 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
47161 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47162 d->testing_p))
47163 return true;
47167 /* Finally, try the fully general two operand permute. */
47168 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
47169 d->testing_p))
47170 return true;
47172 /* Recognize interleave style patterns with reversed operands. */
47173 if (!d->one_operand_p)
47175 for (i = 0; i < nelt; ++i)
47177 unsigned e = d->perm[i];
47178 if (e >= nelt)
47179 e -= nelt;
47180 else
47181 e += nelt;
47182 perm2[i] = e;
47185 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
47186 d->testing_p))
47187 return true;
47190 /* Try the SSE4.1 blend variable merge instructions. */
47191 if (expand_vec_perm_blend (d))
47192 return true;
47194 /* Try one of the AVX vpermil variable permutations. */
47195 if (expand_vec_perm_vpermil (d))
47196 return true;
47198 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47199 vpshufb, vpermd, vpermps or vpermq variable permutation. */
47200 if (expand_vec_perm_pshufb (d))
47201 return true;
47203 /* Try the AVX2 vpalignr instruction. */
47204 if (expand_vec_perm_palignr (d, true))
47205 return true;
47207 /* Try the AVX512F vpermi2 instructions. */
47208 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47209 return true;
47211 return false;
47214 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47215 in terms of a pair of pshuflw + pshufhw instructions. */
47217 static bool
47218 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47220 unsigned char perm2[MAX_VECT_LEN];
47221 unsigned i;
47222 bool ok;
47224 if (d->vmode != V8HImode || !d->one_operand_p)
47225 return false;
47227 /* The two permutations only operate in 64-bit lanes. */
47228 for (i = 0; i < 4; ++i)
47229 if (d->perm[i] >= 4)
47230 return false;
47231 for (i = 4; i < 8; ++i)
47232 if (d->perm[i] < 4)
47233 return false;
47235 if (d->testing_p)
47236 return true;
47238 /* Emit the pshuflw. */
47239 memcpy (perm2, d->perm, 4);
47240 for (i = 4; i < 8; ++i)
47241 perm2[i] = i;
47242 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47243 gcc_assert (ok);
47245 /* Emit the pshufhw. */
47246 memcpy (perm2 + 4, d->perm + 4, 4);
47247 for (i = 0; i < 4; ++i)
47248 perm2[i] = i;
47249 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47250 gcc_assert (ok);
47252 return true;
47255 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47256 the permutation using the SSSE3 palignr instruction. This succeeds
47257 when all of the elements in PERM fit within one vector and we merely
47258 need to shift them down so that a single vector permutation has a
47259 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
47260 the vpalignr instruction itself can perform the requested permutation. */
47262 static bool
47263 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47265 unsigned i, nelt = d->nelt;
47266 unsigned min, max, minswap, maxswap;
47267 bool in_order, ok, swap = false;
47268 rtx shift, target;
47269 struct expand_vec_perm_d dcopy;
47271 /* Even with AVX, palignr only operates on 128-bit vectors,
47272 in AVX2 palignr operates on both 128-bit lanes. */
47273 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47274 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47275 return false;
47277 min = 2 * nelt;
47278 max = 0;
47279 minswap = 2 * nelt;
47280 maxswap = 0;
47281 for (i = 0; i < nelt; ++i)
47283 unsigned e = d->perm[i];
47284 unsigned eswap = d->perm[i] ^ nelt;
47285 if (GET_MODE_SIZE (d->vmode) == 32)
47287 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47288 eswap = e ^ (nelt / 2);
47290 if (e < min)
47291 min = e;
47292 if (e > max)
47293 max = e;
47294 if (eswap < minswap)
47295 minswap = eswap;
47296 if (eswap > maxswap)
47297 maxswap = eswap;
47299 if (min == 0
47300 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47302 if (d->one_operand_p
47303 || minswap == 0
47304 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47305 ? nelt / 2 : nelt))
47306 return false;
47307 swap = true;
47308 min = minswap;
47309 max = maxswap;
47312 /* Given that we have SSSE3, we know we'll be able to implement the
47313 single operand permutation after the palignr with pshufb for
47314 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
47315 first. */
47316 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47317 return true;
47319 dcopy = *d;
47320 if (swap)
47322 dcopy.op0 = d->op1;
47323 dcopy.op1 = d->op0;
47324 for (i = 0; i < nelt; ++i)
47325 dcopy.perm[i] ^= nelt;
47328 in_order = true;
47329 for (i = 0; i < nelt; ++i)
47331 unsigned e = dcopy.perm[i];
47332 if (GET_MODE_SIZE (d->vmode) == 32
47333 && e >= nelt
47334 && (e & (nelt / 2 - 1)) < min)
47335 e = e - min - (nelt / 2);
47336 else
47337 e = e - min;
47338 if (e != i)
47339 in_order = false;
47340 dcopy.perm[i] = e;
47342 dcopy.one_operand_p = true;
47344 if (single_insn_only_p && !in_order)
47345 return false;
47347 /* For AVX2, test whether we can permute the result in one instruction. */
47348 if (d->testing_p)
47350 if (in_order)
47351 return true;
47352 dcopy.op1 = dcopy.op0;
47353 return expand_vec_perm_1 (&dcopy);
47356 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
47357 if (GET_MODE_SIZE (d->vmode) == 16)
47359 target = gen_reg_rtx (TImode);
47360 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
47361 gen_lowpart (TImode, dcopy.op0), shift));
47363 else
47365 target = gen_reg_rtx (V2TImode);
47366 emit_insn (gen_avx2_palignrv2ti (target,
47367 gen_lowpart (V2TImode, dcopy.op1),
47368 gen_lowpart (V2TImode, dcopy.op0),
47369 shift));
47372 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
47374 /* Test for the degenerate case where the alignment by itself
47375 produces the desired permutation. */
47376 if (in_order)
47378 emit_move_insn (d->target, dcopy.op0);
47379 return true;
47382 ok = expand_vec_perm_1 (&dcopy);
47383 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
47385 return ok;
47388 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
47389 the permutation using the SSE4_1 pblendv instruction. Potentially
47390 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
47392 static bool
47393 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
47395 unsigned i, which, nelt = d->nelt;
47396 struct expand_vec_perm_d dcopy, dcopy1;
47397 machine_mode vmode = d->vmode;
47398 bool ok;
47400 /* Use the same checks as in expand_vec_perm_blend. */
47401 if (d->one_operand_p)
47402 return false;
47403 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47405 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47407 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47409 else
47410 return false;
47412 /* Figure out where permutation elements stay not in their
47413 respective lanes. */
47414 for (i = 0, which = 0; i < nelt; ++i)
47416 unsigned e = d->perm[i];
47417 if (e != i)
47418 which |= (e < nelt ? 1 : 2);
47420 /* We can pblend the part where elements stay not in their
47421 respective lanes only when these elements are all in one
47422 half of a permutation.
47423 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
47424 lanes, but both 8 and 9 >= 8
47425 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
47426 respective lanes and 8 >= 8, but 2 not. */
47427 if (which != 1 && which != 2)
47428 return false;
47429 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
47430 return true;
47432 /* First we apply one operand permutation to the part where
47433 elements stay not in their respective lanes. */
47434 dcopy = *d;
47435 if (which == 2)
47436 dcopy.op0 = dcopy.op1 = d->op1;
47437 else
47438 dcopy.op0 = dcopy.op1 = d->op0;
47439 dcopy.one_operand_p = true;
47441 for (i = 0; i < nelt; ++i)
47442 dcopy.perm[i] = d->perm[i] & (nelt - 1);
47444 ok = expand_vec_perm_1 (&dcopy);
47445 if (GET_MODE_SIZE (vmode) != 16 && !ok)
47446 return false;
47447 else
47448 gcc_assert (ok);
47449 if (d->testing_p)
47450 return true;
47452 /* Next we put permuted elements into their positions. */
47453 dcopy1 = *d;
47454 if (which == 2)
47455 dcopy1.op1 = dcopy.target;
47456 else
47457 dcopy1.op0 = dcopy.target;
47459 for (i = 0; i < nelt; ++i)
47460 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
47462 ok = expand_vec_perm_blend (&dcopy1);
47463 gcc_assert (ok);
47465 return true;
47468 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
47470 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47471 a two vector permutation into a single vector permutation by using
47472 an interleave operation to merge the vectors. */
47474 static bool
47475 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
47477 struct expand_vec_perm_d dremap, dfinal;
47478 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
47479 unsigned HOST_WIDE_INT contents;
47480 unsigned char remap[2 * MAX_VECT_LEN];
47481 rtx_insn *seq;
47482 bool ok, same_halves = false;
47484 if (GET_MODE_SIZE (d->vmode) == 16)
47486 if (d->one_operand_p)
47487 return false;
47489 else if (GET_MODE_SIZE (d->vmode) == 32)
47491 if (!TARGET_AVX)
47492 return false;
47493 /* For 32-byte modes allow even d->one_operand_p.
47494 The lack of cross-lane shuffling in some instructions
47495 might prevent a single insn shuffle. */
47496 dfinal = *d;
47497 dfinal.testing_p = true;
47498 /* If expand_vec_perm_interleave3 can expand this into
47499 a 3 insn sequence, give up and let it be expanded as
47500 3 insn sequence. While that is one insn longer,
47501 it doesn't need a memory operand and in the common
47502 case that both interleave low and high permutations
47503 with the same operands are adjacent needs 4 insns
47504 for both after CSE. */
47505 if (expand_vec_perm_interleave3 (&dfinal))
47506 return false;
47508 else
47509 return false;
47511 /* Examine from whence the elements come. */
47512 contents = 0;
47513 for (i = 0; i < nelt; ++i)
47514 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
47516 memset (remap, 0xff, sizeof (remap));
47517 dremap = *d;
47519 if (GET_MODE_SIZE (d->vmode) == 16)
47521 unsigned HOST_WIDE_INT h1, h2, h3, h4;
47523 /* Split the two input vectors into 4 halves. */
47524 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
47525 h2 = h1 << nelt2;
47526 h3 = h2 << nelt2;
47527 h4 = h3 << nelt2;
47529 /* If the elements from the low halves use interleave low, and similarly
47530 for interleave high. If the elements are from mis-matched halves, we
47531 can use shufps for V4SF/V4SI or do a DImode shuffle. */
47532 if ((contents & (h1 | h3)) == contents)
47534 /* punpckl* */
47535 for (i = 0; i < nelt2; ++i)
47537 remap[i] = i * 2;
47538 remap[i + nelt] = i * 2 + 1;
47539 dremap.perm[i * 2] = i;
47540 dremap.perm[i * 2 + 1] = i + nelt;
47542 if (!TARGET_SSE2 && d->vmode == V4SImode)
47543 dremap.vmode = V4SFmode;
47545 else if ((contents & (h2 | h4)) == contents)
47547 /* punpckh* */
47548 for (i = 0; i < nelt2; ++i)
47550 remap[i + nelt2] = i * 2;
47551 remap[i + nelt + nelt2] = i * 2 + 1;
47552 dremap.perm[i * 2] = i + nelt2;
47553 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
47555 if (!TARGET_SSE2 && d->vmode == V4SImode)
47556 dremap.vmode = V4SFmode;
47558 else if ((contents & (h1 | h4)) == contents)
47560 /* shufps */
47561 for (i = 0; i < nelt2; ++i)
47563 remap[i] = i;
47564 remap[i + nelt + nelt2] = i + nelt2;
47565 dremap.perm[i] = i;
47566 dremap.perm[i + nelt2] = i + nelt + nelt2;
47568 if (nelt != 4)
47570 /* shufpd */
47571 dremap.vmode = V2DImode;
47572 dremap.nelt = 2;
47573 dremap.perm[0] = 0;
47574 dremap.perm[1] = 3;
47577 else if ((contents & (h2 | h3)) == contents)
47579 /* shufps */
47580 for (i = 0; i < nelt2; ++i)
47582 remap[i + nelt2] = i;
47583 remap[i + nelt] = i + nelt2;
47584 dremap.perm[i] = i + nelt2;
47585 dremap.perm[i + nelt2] = i + nelt;
47587 if (nelt != 4)
47589 /* shufpd */
47590 dremap.vmode = V2DImode;
47591 dremap.nelt = 2;
47592 dremap.perm[0] = 1;
47593 dremap.perm[1] = 2;
47596 else
47597 return false;
47599 else
47601 unsigned int nelt4 = nelt / 4, nzcnt = 0;
47602 unsigned HOST_WIDE_INT q[8];
47603 unsigned int nonzero_halves[4];
47605 /* Split the two input vectors into 8 quarters. */
47606 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
47607 for (i = 1; i < 8; ++i)
47608 q[i] = q[0] << (nelt4 * i);
47609 for (i = 0; i < 4; ++i)
47610 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
47612 nonzero_halves[nzcnt] = i;
47613 ++nzcnt;
47616 if (nzcnt == 1)
47618 gcc_assert (d->one_operand_p);
47619 nonzero_halves[1] = nonzero_halves[0];
47620 same_halves = true;
47622 else if (d->one_operand_p)
47624 gcc_assert (nonzero_halves[0] == 0);
47625 gcc_assert (nonzero_halves[1] == 1);
47628 if (nzcnt <= 2)
47630 if (d->perm[0] / nelt2 == nonzero_halves[1])
47632 /* Attempt to increase the likelihood that dfinal
47633 shuffle will be intra-lane. */
47634 char tmph = nonzero_halves[0];
47635 nonzero_halves[0] = nonzero_halves[1];
47636 nonzero_halves[1] = tmph;
47639 /* vperm2f128 or vperm2i128. */
47640 for (i = 0; i < nelt2; ++i)
47642 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
47643 remap[i + nonzero_halves[0] * nelt2] = i;
47644 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
47645 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
47648 if (d->vmode != V8SFmode
47649 && d->vmode != V4DFmode
47650 && d->vmode != V8SImode)
47652 dremap.vmode = V8SImode;
47653 dremap.nelt = 8;
47654 for (i = 0; i < 4; ++i)
47656 dremap.perm[i] = i + nonzero_halves[0] * 4;
47657 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
47661 else if (d->one_operand_p)
47662 return false;
47663 else if (TARGET_AVX2
47664 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
47666 /* vpunpckl* */
47667 for (i = 0; i < nelt4; ++i)
47669 remap[i] = i * 2;
47670 remap[i + nelt] = i * 2 + 1;
47671 remap[i + nelt2] = i * 2 + nelt2;
47672 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
47673 dremap.perm[i * 2] = i;
47674 dremap.perm[i * 2 + 1] = i + nelt;
47675 dremap.perm[i * 2 + nelt2] = i + nelt2;
47676 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
47679 else if (TARGET_AVX2
47680 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
47682 /* vpunpckh* */
47683 for (i = 0; i < nelt4; ++i)
47685 remap[i + nelt4] = i * 2;
47686 remap[i + nelt + nelt4] = i * 2 + 1;
47687 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
47688 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
47689 dremap.perm[i * 2] = i + nelt4;
47690 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
47691 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
47692 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
47695 else
47696 return false;
47699 /* Use the remapping array set up above to move the elements from their
47700 swizzled locations into their final destinations. */
47701 dfinal = *d;
47702 for (i = 0; i < nelt; ++i)
47704 unsigned e = remap[d->perm[i]];
47705 gcc_assert (e < nelt);
47706 /* If same_halves is true, both halves of the remapped vector are the
47707 same. Avoid cross-lane accesses if possible. */
47708 if (same_halves && i >= nelt2)
47710 gcc_assert (e < nelt2);
47711 dfinal.perm[i] = e + nelt2;
47713 else
47714 dfinal.perm[i] = e;
47716 if (!d->testing_p)
47718 dremap.target = gen_reg_rtx (dremap.vmode);
47719 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47721 dfinal.op1 = dfinal.op0;
47722 dfinal.one_operand_p = true;
47724 /* Test if the final remap can be done with a single insn. For V4SFmode or
47725 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
47726 start_sequence ();
47727 ok = expand_vec_perm_1 (&dfinal);
47728 seq = get_insns ();
47729 end_sequence ();
47731 if (!ok)
47732 return false;
47734 if (d->testing_p)
47735 return true;
47737 if (dremap.vmode != dfinal.vmode)
47739 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
47740 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
47743 ok = expand_vec_perm_1 (&dremap);
47744 gcc_assert (ok);
47746 emit_insn (seq);
47747 return true;
47750 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47751 a single vector cross-lane permutation into vpermq followed
47752 by any of the single insn permutations. */
47754 static bool
47755 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
47757 struct expand_vec_perm_d dremap, dfinal;
47758 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
47759 unsigned contents[2];
47760 bool ok;
47762 if (!(TARGET_AVX2
47763 && (d->vmode == V32QImode || d->vmode == V16HImode)
47764 && d->one_operand_p))
47765 return false;
47767 contents[0] = 0;
47768 contents[1] = 0;
47769 for (i = 0; i < nelt2; ++i)
47771 contents[0] |= 1u << (d->perm[i] / nelt4);
47772 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
47775 for (i = 0; i < 2; ++i)
47777 unsigned int cnt = 0;
47778 for (j = 0; j < 4; ++j)
47779 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
47780 return false;
47783 if (d->testing_p)
47784 return true;
47786 dremap = *d;
47787 dremap.vmode = V4DImode;
47788 dremap.nelt = 4;
47789 dremap.target = gen_reg_rtx (V4DImode);
47790 dremap.op0 = gen_lowpart (V4DImode, d->op0);
47791 dremap.op1 = dremap.op0;
47792 dremap.one_operand_p = true;
47793 for (i = 0; i < 2; ++i)
47795 unsigned int cnt = 0;
47796 for (j = 0; j < 4; ++j)
47797 if ((contents[i] & (1u << j)) != 0)
47798 dremap.perm[2 * i + cnt++] = j;
47799 for (; cnt < 2; ++cnt)
47800 dremap.perm[2 * i + cnt] = 0;
47803 dfinal = *d;
47804 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47805 dfinal.op1 = dfinal.op0;
47806 dfinal.one_operand_p = true;
47807 for (i = 0, j = 0; i < nelt; ++i)
47809 if (i == nelt2)
47810 j = 2;
47811 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
47812 if ((d->perm[i] / nelt4) == dremap.perm[j])
47814 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
47815 dfinal.perm[i] |= nelt4;
47816 else
47817 gcc_unreachable ();
47820 ok = expand_vec_perm_1 (&dremap);
47821 gcc_assert (ok);
47823 ok = expand_vec_perm_1 (&dfinal);
47824 gcc_assert (ok);
47826 return true;
47829 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
47830 a vector permutation using two instructions, vperm2f128 resp.
47831 vperm2i128 followed by any single in-lane permutation. */
47833 static bool
47834 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
47836 struct expand_vec_perm_d dfirst, dsecond;
47837 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
47838 bool ok;
47840 if (!TARGET_AVX
47841 || GET_MODE_SIZE (d->vmode) != 32
47842 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
47843 return false;
47845 dsecond = *d;
47846 dsecond.one_operand_p = false;
47847 dsecond.testing_p = true;
47849 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
47850 immediate. For perm < 16 the second permutation uses
47851 d->op0 as first operand, for perm >= 16 it uses d->op1
47852 as first operand. The second operand is the result of
47853 vperm2[fi]128. */
47854 for (perm = 0; perm < 32; perm++)
47856 /* Ignore permutations which do not move anything cross-lane. */
47857 if (perm < 16)
47859 /* The second shuffle for e.g. V4DFmode has
47860 0123 and ABCD operands.
47861 Ignore AB23, as 23 is already in the second lane
47862 of the first operand. */
47863 if ((perm & 0xc) == (1 << 2)) continue;
47864 /* And 01CD, as 01 is in the first lane of the first
47865 operand. */
47866 if ((perm & 3) == 0) continue;
47867 /* And 4567, as then the vperm2[fi]128 doesn't change
47868 anything on the original 4567 second operand. */
47869 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
47871 else
47873 /* The second shuffle for e.g. V4DFmode has
47874 4567 and ABCD operands.
47875 Ignore AB67, as 67 is already in the second lane
47876 of the first operand. */
47877 if ((perm & 0xc) == (3 << 2)) continue;
47878 /* And 45CD, as 45 is in the first lane of the first
47879 operand. */
47880 if ((perm & 3) == 2) continue;
47881 /* And 0123, as then the vperm2[fi]128 doesn't change
47882 anything on the original 0123 first operand. */
47883 if ((perm & 0xf) == (1 << 2)) continue;
47886 for (i = 0; i < nelt; i++)
47888 j = d->perm[i] / nelt2;
47889 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
47890 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
47891 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
47892 dsecond.perm[i] = d->perm[i] & (nelt - 1);
47893 else
47894 break;
47897 if (i == nelt)
47899 start_sequence ();
47900 ok = expand_vec_perm_1 (&dsecond);
47901 end_sequence ();
47903 else
47904 ok = false;
47906 if (ok)
47908 if (d->testing_p)
47909 return true;
47911 /* Found a usable second shuffle. dfirst will be
47912 vperm2f128 on d->op0 and d->op1. */
47913 dsecond.testing_p = false;
47914 dfirst = *d;
47915 dfirst.target = gen_reg_rtx (d->vmode);
47916 for (i = 0; i < nelt; i++)
47917 dfirst.perm[i] = (i & (nelt2 - 1))
47918 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
47920 canonicalize_perm (&dfirst);
47921 ok = expand_vec_perm_1 (&dfirst);
47922 gcc_assert (ok);
47924 /* And dsecond is some single insn shuffle, taking
47925 d->op0 and result of vperm2f128 (if perm < 16) or
47926 d->op1 and result of vperm2f128 (otherwise). */
47927 if (perm >= 16)
47928 dsecond.op0 = dsecond.op1;
47929 dsecond.op1 = dfirst.target;
47931 ok = expand_vec_perm_1 (&dsecond);
47932 gcc_assert (ok);
47934 return true;
47937 /* For one operand, the only useful vperm2f128 permutation is 0x01
47938 aka lanes swap. */
47939 if (d->one_operand_p)
47940 return false;
47943 return false;
47946 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47947 a two vector permutation using 2 intra-lane interleave insns
47948 and cross-lane shuffle for 32-byte vectors. */
47950 static bool
47951 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
47953 unsigned i, nelt;
47954 rtx (*gen) (rtx, rtx, rtx);
47956 if (d->one_operand_p)
47957 return false;
47958 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
47960 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
47962 else
47963 return false;
47965 nelt = d->nelt;
47966 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
47967 return false;
47968 for (i = 0; i < nelt; i += 2)
47969 if (d->perm[i] != d->perm[0] + i / 2
47970 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
47971 return false;
47973 if (d->testing_p)
47974 return true;
47976 switch (d->vmode)
47978 case V32QImode:
47979 if (d->perm[0])
47980 gen = gen_vec_interleave_highv32qi;
47981 else
47982 gen = gen_vec_interleave_lowv32qi;
47983 break;
47984 case V16HImode:
47985 if (d->perm[0])
47986 gen = gen_vec_interleave_highv16hi;
47987 else
47988 gen = gen_vec_interleave_lowv16hi;
47989 break;
47990 case V8SImode:
47991 if (d->perm[0])
47992 gen = gen_vec_interleave_highv8si;
47993 else
47994 gen = gen_vec_interleave_lowv8si;
47995 break;
47996 case V4DImode:
47997 if (d->perm[0])
47998 gen = gen_vec_interleave_highv4di;
47999 else
48000 gen = gen_vec_interleave_lowv4di;
48001 break;
48002 case V8SFmode:
48003 if (d->perm[0])
48004 gen = gen_vec_interleave_highv8sf;
48005 else
48006 gen = gen_vec_interleave_lowv8sf;
48007 break;
48008 case V4DFmode:
48009 if (d->perm[0])
48010 gen = gen_vec_interleave_highv4df;
48011 else
48012 gen = gen_vec_interleave_lowv4df;
48013 break;
48014 default:
48015 gcc_unreachable ();
48018 emit_insn (gen (d->target, d->op0, d->op1));
48019 return true;
48022 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
48023 a single vector permutation using a single intra-lane vector
48024 permutation, vperm2f128 swapping the lanes and vblend* insn blending
48025 the non-swapped and swapped vectors together. */
48027 static bool
48028 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
48030 struct expand_vec_perm_d dfirst, dsecond;
48031 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
48032 rtx_insn *seq;
48033 bool ok;
48034 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
48036 if (!TARGET_AVX
48037 || TARGET_AVX2
48038 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
48039 || !d->one_operand_p)
48040 return false;
48042 dfirst = *d;
48043 for (i = 0; i < nelt; i++)
48044 dfirst.perm[i] = 0xff;
48045 for (i = 0, msk = 0; i < nelt; i++)
48047 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
48048 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
48049 return false;
48050 dfirst.perm[j] = d->perm[i];
48051 if (j != i)
48052 msk |= (1 << i);
48054 for (i = 0; i < nelt; i++)
48055 if (dfirst.perm[i] == 0xff)
48056 dfirst.perm[i] = i;
48058 if (!d->testing_p)
48059 dfirst.target = gen_reg_rtx (dfirst.vmode);
48061 start_sequence ();
48062 ok = expand_vec_perm_1 (&dfirst);
48063 seq = get_insns ();
48064 end_sequence ();
48066 if (!ok)
48067 return false;
48069 if (d->testing_p)
48070 return true;
48072 emit_insn (seq);
48074 dsecond = *d;
48075 dsecond.op0 = dfirst.target;
48076 dsecond.op1 = dfirst.target;
48077 dsecond.one_operand_p = true;
48078 dsecond.target = gen_reg_rtx (dsecond.vmode);
48079 for (i = 0; i < nelt; i++)
48080 dsecond.perm[i] = i ^ nelt2;
48082 ok = expand_vec_perm_1 (&dsecond);
48083 gcc_assert (ok);
48085 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
48086 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
48087 return true;
48090 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
48091 permutation using two vperm2f128, followed by a vshufpd insn blending
48092 the two vectors together. */
48094 static bool
48095 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
48097 struct expand_vec_perm_d dfirst, dsecond, dthird;
48098 bool ok;
48100 if (!TARGET_AVX || (d->vmode != V4DFmode))
48101 return false;
48103 if (d->testing_p)
48104 return true;
48106 dfirst = *d;
48107 dsecond = *d;
48108 dthird = *d;
48110 dfirst.perm[0] = (d->perm[0] & ~1);
48111 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
48112 dfirst.perm[2] = (d->perm[2] & ~1);
48113 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
48114 dsecond.perm[0] = (d->perm[1] & ~1);
48115 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
48116 dsecond.perm[2] = (d->perm[3] & ~1);
48117 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
48118 dthird.perm[0] = (d->perm[0] % 2);
48119 dthird.perm[1] = (d->perm[1] % 2) + 4;
48120 dthird.perm[2] = (d->perm[2] % 2) + 2;
48121 dthird.perm[3] = (d->perm[3] % 2) + 6;
48123 dfirst.target = gen_reg_rtx (dfirst.vmode);
48124 dsecond.target = gen_reg_rtx (dsecond.vmode);
48125 dthird.op0 = dfirst.target;
48126 dthird.op1 = dsecond.target;
48127 dthird.one_operand_p = false;
48129 canonicalize_perm (&dfirst);
48130 canonicalize_perm (&dsecond);
48132 ok = expand_vec_perm_1 (&dfirst)
48133 && expand_vec_perm_1 (&dsecond)
48134 && expand_vec_perm_1 (&dthird);
48136 gcc_assert (ok);
48138 return true;
48141 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
48142 permutation with two pshufb insns and an ior. We should have already
48143 failed all two instruction sequences. */
48145 static bool
48146 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
48148 rtx rperm[2][16], vperm, l, h, op, m128;
48149 unsigned int i, nelt, eltsz;
48151 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
48152 return false;
48153 gcc_assert (!d->one_operand_p);
48155 if (d->testing_p)
48156 return true;
48158 nelt = d->nelt;
48159 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48161 /* Generate two permutation masks. If the required element is within
48162 the given vector it is shuffled into the proper lane. If the required
48163 element is in the other vector, force a zero into the lane by setting
48164 bit 7 in the permutation mask. */
48165 m128 = GEN_INT (-128);
48166 for (i = 0; i < nelt; ++i)
48168 unsigned j, e = d->perm[i];
48169 unsigned which = (e >= nelt);
48170 if (e >= nelt)
48171 e -= nelt;
48173 for (j = 0; j < eltsz; ++j)
48175 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
48176 rperm[1-which][i*eltsz + j] = m128;
48180 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
48181 vperm = force_reg (V16QImode, vperm);
48183 l = gen_reg_rtx (V16QImode);
48184 op = gen_lowpart (V16QImode, d->op0);
48185 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
48187 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48188 vperm = force_reg (V16QImode, vperm);
48190 h = gen_reg_rtx (V16QImode);
48191 op = gen_lowpart (V16QImode, d->op1);
48192 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48194 op = d->target;
48195 if (d->vmode != V16QImode)
48196 op = gen_reg_rtx (V16QImode);
48197 emit_insn (gen_iorv16qi3 (op, l, h));
48198 if (op != d->target)
48199 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48201 return true;
48204 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48205 with two vpshufb insns, vpermq and vpor. We should have already failed
48206 all two or three instruction sequences. */
48208 static bool
48209 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48211 rtx rperm[2][32], vperm, l, h, hp, op, m128;
48212 unsigned int i, nelt, eltsz;
48214 if (!TARGET_AVX2
48215 || !d->one_operand_p
48216 || (d->vmode != V32QImode && d->vmode != V16HImode))
48217 return false;
48219 if (d->testing_p)
48220 return true;
48222 nelt = d->nelt;
48223 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48225 /* Generate two permutation masks. If the required element is within
48226 the same lane, it is shuffled in. If the required element from the
48227 other lane, force a zero by setting bit 7 in the permutation mask.
48228 In the other mask the mask has non-negative elements if element
48229 is requested from the other lane, but also moved to the other lane,
48230 so that the result of vpshufb can have the two V2TImode halves
48231 swapped. */
48232 m128 = GEN_INT (-128);
48233 for (i = 0; i < nelt; ++i)
48235 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48236 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48238 for (j = 0; j < eltsz; ++j)
48240 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48241 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48245 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48246 vperm = force_reg (V32QImode, vperm);
48248 h = gen_reg_rtx (V32QImode);
48249 op = gen_lowpart (V32QImode, d->op0);
48250 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48252 /* Swap the 128-byte lanes of h into hp. */
48253 hp = gen_reg_rtx (V4DImode);
48254 op = gen_lowpart (V4DImode, h);
48255 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48256 const1_rtx));
48258 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48259 vperm = force_reg (V32QImode, vperm);
48261 l = gen_reg_rtx (V32QImode);
48262 op = gen_lowpart (V32QImode, d->op0);
48263 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48265 op = d->target;
48266 if (d->vmode != V32QImode)
48267 op = gen_reg_rtx (V32QImode);
48268 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48269 if (op != d->target)
48270 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48272 return true;
48275 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48276 and extract-odd permutations of two V32QImode and V16QImode operand
48277 with two vpshufb insns, vpor and vpermq. We should have already
48278 failed all two or three instruction sequences. */
48280 static bool
48281 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48283 rtx rperm[2][32], vperm, l, h, ior, op, m128;
48284 unsigned int i, nelt, eltsz;
48286 if (!TARGET_AVX2
48287 || d->one_operand_p
48288 || (d->vmode != V32QImode && d->vmode != V16HImode))
48289 return false;
48291 for (i = 0; i < d->nelt; ++i)
48292 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48293 return false;
48295 if (d->testing_p)
48296 return true;
48298 nelt = d->nelt;
48299 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48301 /* Generate two permutation masks. In the first permutation mask
48302 the first quarter will contain indexes for the first half
48303 of the op0, the second quarter will contain bit 7 set, third quarter
48304 will contain indexes for the second half of the op0 and the
48305 last quarter bit 7 set. In the second permutation mask
48306 the first quarter will contain bit 7 set, the second quarter
48307 indexes for the first half of the op1, the third quarter bit 7 set
48308 and last quarter indexes for the second half of the op1.
48309 I.e. the first mask e.g. for V32QImode extract even will be:
48310 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48311 (all values masked with 0xf except for -128) and second mask
48312 for extract even will be
48313 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
48314 m128 = GEN_INT (-128);
48315 for (i = 0; i < nelt; ++i)
48317 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48318 unsigned which = d->perm[i] >= nelt;
48319 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48321 for (j = 0; j < eltsz; ++j)
48323 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48324 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48328 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48329 vperm = force_reg (V32QImode, vperm);
48331 l = gen_reg_rtx (V32QImode);
48332 op = gen_lowpart (V32QImode, d->op0);
48333 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48335 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48336 vperm = force_reg (V32QImode, vperm);
48338 h = gen_reg_rtx (V32QImode);
48339 op = gen_lowpart (V32QImode, d->op1);
48340 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48342 ior = gen_reg_rtx (V32QImode);
48343 emit_insn (gen_iorv32qi3 (ior, l, h));
48345 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
48346 op = gen_reg_rtx (V4DImode);
48347 ior = gen_lowpart (V4DImode, ior);
48348 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
48349 const1_rtx, GEN_INT (3)));
48350 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48352 return true;
48355 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48356 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
48357 with two "and" and "pack" or two "shift" and "pack" insns. We should
48358 have already failed all two instruction sequences. */
48360 static bool
48361 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
48363 rtx op, dop0, dop1, t, rperm[16];
48364 unsigned i, odd, c, s, nelt = d->nelt;
48365 bool end_perm = false;
48366 machine_mode half_mode;
48367 rtx (*gen_and) (rtx, rtx, rtx);
48368 rtx (*gen_pack) (rtx, rtx, rtx);
48369 rtx (*gen_shift) (rtx, rtx, rtx);
48371 if (d->one_operand_p)
48372 return false;
48374 switch (d->vmode)
48376 case V8HImode:
48377 /* Required for "pack". */
48378 if (!TARGET_SSE4_1)
48379 return false;
48380 c = 0xffff;
48381 s = 16;
48382 half_mode = V4SImode;
48383 gen_and = gen_andv4si3;
48384 gen_pack = gen_sse4_1_packusdw;
48385 gen_shift = gen_lshrv4si3;
48386 break;
48387 case V16QImode:
48388 /* No check as all instructions are SSE2. */
48389 c = 0xff;
48390 s = 8;
48391 half_mode = V8HImode;
48392 gen_and = gen_andv8hi3;
48393 gen_pack = gen_sse2_packuswb;
48394 gen_shift = gen_lshrv8hi3;
48395 break;
48396 case V16HImode:
48397 if (!TARGET_AVX2)
48398 return false;
48399 c = 0xffff;
48400 s = 16;
48401 half_mode = V8SImode;
48402 gen_and = gen_andv8si3;
48403 gen_pack = gen_avx2_packusdw;
48404 gen_shift = gen_lshrv8si3;
48405 end_perm = true;
48406 break;
48407 case V32QImode:
48408 if (!TARGET_AVX2)
48409 return false;
48410 c = 0xff;
48411 s = 8;
48412 half_mode = V16HImode;
48413 gen_and = gen_andv16hi3;
48414 gen_pack = gen_avx2_packuswb;
48415 gen_shift = gen_lshrv16hi3;
48416 end_perm = true;
48417 break;
48418 default:
48419 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
48420 general shuffles. */
48421 return false;
48424 /* Check that permutation is even or odd. */
48425 odd = d->perm[0];
48426 if (odd > 1)
48427 return false;
48429 for (i = 1; i < nelt; ++i)
48430 if (d->perm[i] != 2 * i + odd)
48431 return false;
48433 if (d->testing_p)
48434 return true;
48436 dop0 = gen_reg_rtx (half_mode);
48437 dop1 = gen_reg_rtx (half_mode);
48438 if (odd == 0)
48440 for (i = 0; i < nelt / 2; i++)
48441 rperm[i] = GEN_INT (c);
48442 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
48443 t = force_reg (half_mode, t);
48444 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
48445 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
48447 else
48449 emit_insn (gen_shift (dop0,
48450 gen_lowpart (half_mode, d->op0),
48451 GEN_INT (s)));
48452 emit_insn (gen_shift (dop1,
48453 gen_lowpart (half_mode, d->op1),
48454 GEN_INT (s)));
48456 /* In AVX2 for 256 bit case we need to permute pack result. */
48457 if (TARGET_AVX2 && end_perm)
48459 op = gen_reg_rtx (d->vmode);
48460 t = gen_reg_rtx (V4DImode);
48461 emit_insn (gen_pack (op, dop0, dop1));
48462 emit_insn (gen_avx2_permv4di_1 (t,
48463 gen_lowpart (V4DImode, op),
48464 const0_rtx,
48465 const2_rtx,
48466 const1_rtx,
48467 GEN_INT (3)));
48468 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
48470 else
48471 emit_insn (gen_pack (d->target, dop0, dop1));
48473 return true;
48476 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
48477 and extract-odd permutations. */
48479 static bool
48480 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
48482 rtx t1, t2, t3, t4, t5;
48484 switch (d->vmode)
48486 case V4DFmode:
48487 if (d->testing_p)
48488 break;
48489 t1 = gen_reg_rtx (V4DFmode);
48490 t2 = gen_reg_rtx (V4DFmode);
48492 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48493 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
48494 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
48496 /* Now an unpck[lh]pd will produce the result required. */
48497 if (odd)
48498 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
48499 else
48500 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
48501 emit_insn (t3);
48502 break;
48504 case V8SFmode:
48506 int mask = odd ? 0xdd : 0x88;
48508 if (d->testing_p)
48509 break;
48510 t1 = gen_reg_rtx (V8SFmode);
48511 t2 = gen_reg_rtx (V8SFmode);
48512 t3 = gen_reg_rtx (V8SFmode);
48514 /* Shuffle within the 128-bit lanes to produce:
48515 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
48516 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
48517 GEN_INT (mask)));
48519 /* Shuffle the lanes around to produce:
48520 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
48521 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
48522 GEN_INT (0x3)));
48524 /* Shuffle within the 128-bit lanes to produce:
48525 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
48526 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
48528 /* Shuffle within the 128-bit lanes to produce:
48529 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
48530 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
48532 /* Shuffle the lanes around to produce:
48533 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
48534 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
48535 GEN_INT (0x20)));
48537 break;
48539 case V2DFmode:
48540 case V4SFmode:
48541 case V2DImode:
48542 case V4SImode:
48543 /* These are always directly implementable by expand_vec_perm_1. */
48544 gcc_unreachable ();
48546 case V8HImode:
48547 if (TARGET_SSE4_1)
48548 return expand_vec_perm_even_odd_pack (d);
48549 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
48550 return expand_vec_perm_pshufb2 (d);
48551 else
48553 if (d->testing_p)
48554 break;
48555 /* We need 2*log2(N)-1 operations to achieve odd/even
48556 with interleave. */
48557 t1 = gen_reg_rtx (V8HImode);
48558 t2 = gen_reg_rtx (V8HImode);
48559 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
48560 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
48561 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
48562 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
48563 if (odd)
48564 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
48565 else
48566 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
48567 emit_insn (t3);
48569 break;
48571 case V16QImode:
48572 return expand_vec_perm_even_odd_pack (d);
48574 case V16HImode:
48575 case V32QImode:
48576 return expand_vec_perm_even_odd_pack (d);
48578 case V4DImode:
48579 if (!TARGET_AVX2)
48581 struct expand_vec_perm_d d_copy = *d;
48582 d_copy.vmode = V4DFmode;
48583 if (d->testing_p)
48584 d_copy.target = gen_lowpart (V4DFmode, d->target);
48585 else
48586 d_copy.target = gen_reg_rtx (V4DFmode);
48587 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
48588 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
48589 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48591 if (!d->testing_p)
48592 emit_move_insn (d->target,
48593 gen_lowpart (V4DImode, d_copy.target));
48594 return true;
48596 return false;
48599 if (d->testing_p)
48600 break;
48602 t1 = gen_reg_rtx (V4DImode);
48603 t2 = gen_reg_rtx (V4DImode);
48605 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48606 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
48607 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
48609 /* Now an vpunpck[lh]qdq will produce the result required. */
48610 if (odd)
48611 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
48612 else
48613 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
48614 emit_insn (t3);
48615 break;
48617 case V8SImode:
48618 if (!TARGET_AVX2)
48620 struct expand_vec_perm_d d_copy = *d;
48621 d_copy.vmode = V8SFmode;
48622 if (d->testing_p)
48623 d_copy.target = gen_lowpart (V8SFmode, d->target);
48624 else
48625 d_copy.target = gen_reg_rtx (V8SFmode);
48626 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
48627 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
48628 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48630 if (!d->testing_p)
48631 emit_move_insn (d->target,
48632 gen_lowpart (V8SImode, d_copy.target));
48633 return true;
48635 return false;
48638 if (d->testing_p)
48639 break;
48641 t1 = gen_reg_rtx (V8SImode);
48642 t2 = gen_reg_rtx (V8SImode);
48643 t3 = gen_reg_rtx (V4DImode);
48644 t4 = gen_reg_rtx (V4DImode);
48645 t5 = gen_reg_rtx (V4DImode);
48647 /* Shuffle the lanes around into
48648 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
48649 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
48650 gen_lowpart (V4DImode, d->op1),
48651 GEN_INT (0x20)));
48652 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
48653 gen_lowpart (V4DImode, d->op1),
48654 GEN_INT (0x31)));
48656 /* Swap the 2nd and 3rd position in each lane into
48657 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
48658 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
48659 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48660 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
48661 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48663 /* Now an vpunpck[lh]qdq will produce
48664 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
48665 if (odd)
48666 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
48667 gen_lowpart (V4DImode, t2));
48668 else
48669 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
48670 gen_lowpart (V4DImode, t2));
48671 emit_insn (t3);
48672 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
48673 break;
48675 default:
48676 gcc_unreachable ();
48679 return true;
48682 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48683 extract-even and extract-odd permutations. */
48685 static bool
48686 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
48688 unsigned i, odd, nelt = d->nelt;
48690 odd = d->perm[0];
48691 if (odd != 0 && odd != 1)
48692 return false;
48694 for (i = 1; i < nelt; ++i)
48695 if (d->perm[i] != 2 * i + odd)
48696 return false;
48698 return expand_vec_perm_even_odd_1 (d, odd);
48701 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
48702 permutations. We assume that expand_vec_perm_1 has already failed. */
48704 static bool
48705 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
48707 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
48708 machine_mode vmode = d->vmode;
48709 unsigned char perm2[4];
48710 rtx op0 = d->op0, dest;
48711 bool ok;
48713 switch (vmode)
48715 case V4DFmode:
48716 case V8SFmode:
48717 /* These are special-cased in sse.md so that we can optionally
48718 use the vbroadcast instruction. They expand to two insns
48719 if the input happens to be in a register. */
48720 gcc_unreachable ();
48722 case V2DFmode:
48723 case V2DImode:
48724 case V4SFmode:
48725 case V4SImode:
48726 /* These are always implementable using standard shuffle patterns. */
48727 gcc_unreachable ();
48729 case V8HImode:
48730 case V16QImode:
48731 /* These can be implemented via interleave. We save one insn by
48732 stopping once we have promoted to V4SImode and then use pshufd. */
48733 if (d->testing_p)
48734 return true;
48737 rtx dest;
48738 rtx (*gen) (rtx, rtx, rtx)
48739 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
48740 : gen_vec_interleave_lowv8hi;
48742 if (elt >= nelt2)
48744 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
48745 : gen_vec_interleave_highv8hi;
48746 elt -= nelt2;
48748 nelt2 /= 2;
48750 dest = gen_reg_rtx (vmode);
48751 emit_insn (gen (dest, op0, op0));
48752 vmode = get_mode_wider_vector (vmode);
48753 op0 = gen_lowpart (vmode, dest);
48755 while (vmode != V4SImode);
48757 memset (perm2, elt, 4);
48758 dest = gen_reg_rtx (V4SImode);
48759 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
48760 gcc_assert (ok);
48761 if (!d->testing_p)
48762 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
48763 return true;
48765 case V32QImode:
48766 case V16HImode:
48767 case V8SImode:
48768 case V4DImode:
48769 /* For AVX2 broadcasts of the first element vpbroadcast* or
48770 vpermq should be used by expand_vec_perm_1. */
48771 gcc_assert (!TARGET_AVX2 || d->perm[0]);
48772 return false;
48774 default:
48775 gcc_unreachable ();
48779 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48780 broadcast permutations. */
48782 static bool
48783 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
48785 unsigned i, elt, nelt = d->nelt;
48787 if (!d->one_operand_p)
48788 return false;
48790 elt = d->perm[0];
48791 for (i = 1; i < nelt; ++i)
48792 if (d->perm[i] != elt)
48793 return false;
48795 return expand_vec_perm_broadcast_1 (d);
48798 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
48799 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
48800 all the shorter instruction sequences. */
48802 static bool
48803 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
48805 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
48806 unsigned int i, nelt, eltsz;
48807 bool used[4];
48809 if (!TARGET_AVX2
48810 || d->one_operand_p
48811 || (d->vmode != V32QImode && d->vmode != V16HImode))
48812 return false;
48814 if (d->testing_p)
48815 return true;
48817 nelt = d->nelt;
48818 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48820 /* Generate 4 permutation masks. If the required element is within
48821 the same lane, it is shuffled in. If the required element from the
48822 other lane, force a zero by setting bit 7 in the permutation mask.
48823 In the other mask the mask has non-negative elements if element
48824 is requested from the other lane, but also moved to the other lane,
48825 so that the result of vpshufb can have the two V2TImode halves
48826 swapped. */
48827 m128 = GEN_INT (-128);
48828 for (i = 0; i < 32; ++i)
48830 rperm[0][i] = m128;
48831 rperm[1][i] = m128;
48832 rperm[2][i] = m128;
48833 rperm[3][i] = m128;
48835 used[0] = false;
48836 used[1] = false;
48837 used[2] = false;
48838 used[3] = false;
48839 for (i = 0; i < nelt; ++i)
48841 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48842 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48843 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
48845 for (j = 0; j < eltsz; ++j)
48846 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
48847 used[which] = true;
48850 for (i = 0; i < 2; ++i)
48852 if (!used[2 * i + 1])
48854 h[i] = NULL_RTX;
48855 continue;
48857 vperm = gen_rtx_CONST_VECTOR (V32QImode,
48858 gen_rtvec_v (32, rperm[2 * i + 1]));
48859 vperm = force_reg (V32QImode, vperm);
48860 h[i] = gen_reg_rtx (V32QImode);
48861 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
48862 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
48865 /* Swap the 128-byte lanes of h[X]. */
48866 for (i = 0; i < 2; ++i)
48868 if (h[i] == NULL_RTX)
48869 continue;
48870 op = gen_reg_rtx (V4DImode);
48871 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
48872 const2_rtx, GEN_INT (3), const0_rtx,
48873 const1_rtx));
48874 h[i] = gen_lowpart (V32QImode, op);
48877 for (i = 0; i < 2; ++i)
48879 if (!used[2 * i])
48881 l[i] = NULL_RTX;
48882 continue;
48884 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
48885 vperm = force_reg (V32QImode, vperm);
48886 l[i] = gen_reg_rtx (V32QImode);
48887 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
48888 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
48891 for (i = 0; i < 2; ++i)
48893 if (h[i] && l[i])
48895 op = gen_reg_rtx (V32QImode);
48896 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
48897 l[i] = op;
48899 else if (h[i])
48900 l[i] = h[i];
48903 gcc_assert (l[0] && l[1]);
48904 op = d->target;
48905 if (d->vmode != V32QImode)
48906 op = gen_reg_rtx (V32QImode);
48907 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
48908 if (op != d->target)
48909 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48910 return true;
48913 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
48914 With all of the interface bits taken care of, perform the expansion
48915 in D and return true on success. */
48917 static bool
48918 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
48920 /* Try a single instruction expansion. */
48921 if (expand_vec_perm_1 (d))
48922 return true;
48924 /* Try sequences of two instructions. */
48926 if (expand_vec_perm_pshuflw_pshufhw (d))
48927 return true;
48929 if (expand_vec_perm_palignr (d, false))
48930 return true;
48932 if (expand_vec_perm_interleave2 (d))
48933 return true;
48935 if (expand_vec_perm_broadcast (d))
48936 return true;
48938 if (expand_vec_perm_vpermq_perm_1 (d))
48939 return true;
48941 if (expand_vec_perm_vperm2f128 (d))
48942 return true;
48944 if (expand_vec_perm_pblendv (d))
48945 return true;
48947 /* Try sequences of three instructions. */
48949 if (expand_vec_perm_even_odd_pack (d))
48950 return true;
48952 if (expand_vec_perm_2vperm2f128_vshuf (d))
48953 return true;
48955 if (expand_vec_perm_pshufb2 (d))
48956 return true;
48958 if (expand_vec_perm_interleave3 (d))
48959 return true;
48961 if (expand_vec_perm_vperm2f128_vblend (d))
48962 return true;
48964 /* Try sequences of four instructions. */
48966 if (expand_vec_perm_vpshufb2_vpermq (d))
48967 return true;
48969 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
48970 return true;
48972 /* ??? Look for narrow permutations whose element orderings would
48973 allow the promotion to a wider mode. */
48975 /* ??? Look for sequences of interleave or a wider permute that place
48976 the data into the correct lanes for a half-vector shuffle like
48977 pshuf[lh]w or vpermilps. */
48979 /* ??? Look for sequences of interleave that produce the desired results.
48980 The combinatorics of punpck[lh] get pretty ugly... */
48982 if (expand_vec_perm_even_odd (d))
48983 return true;
48985 /* Even longer sequences. */
48986 if (expand_vec_perm_vpshufb4_vpermq2 (d))
48987 return true;
48989 return false;
48992 /* If a permutation only uses one operand, make it clear. Returns true
48993 if the permutation references both operands. */
48995 static bool
48996 canonicalize_perm (struct expand_vec_perm_d *d)
48998 int i, which, nelt = d->nelt;
49000 for (i = which = 0; i < nelt; ++i)
49001 which |= (d->perm[i] < nelt ? 1 : 2);
49003 d->one_operand_p = true;
49004 switch (which)
49006 default:
49007 gcc_unreachable();
49009 case 3:
49010 if (!rtx_equal_p (d->op0, d->op1))
49012 d->one_operand_p = false;
49013 break;
49015 /* The elements of PERM do not suggest that only the first operand
49016 is used, but both operands are identical. Allow easier matching
49017 of the permutation by folding the permutation into the single
49018 input vector. */
49019 /* FALLTHRU */
49021 case 2:
49022 for (i = 0; i < nelt; ++i)
49023 d->perm[i] &= nelt - 1;
49024 d->op0 = d->op1;
49025 break;
49027 case 1:
49028 d->op1 = d->op0;
49029 break;
49032 return (which == 3);
49035 bool
49036 ix86_expand_vec_perm_const (rtx operands[4])
49038 struct expand_vec_perm_d d;
49039 unsigned char perm[MAX_VECT_LEN];
49040 int i, nelt;
49041 bool two_args;
49042 rtx sel;
49044 d.target = operands[0];
49045 d.op0 = operands[1];
49046 d.op1 = operands[2];
49047 sel = operands[3];
49049 d.vmode = GET_MODE (d.target);
49050 gcc_assert (VECTOR_MODE_P (d.vmode));
49051 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49052 d.testing_p = false;
49054 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
49055 gcc_assert (XVECLEN (sel, 0) == nelt);
49056 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
49058 for (i = 0; i < nelt; ++i)
49060 rtx e = XVECEXP (sel, 0, i);
49061 int ei = INTVAL (e) & (2 * nelt - 1);
49062 d.perm[i] = ei;
49063 perm[i] = ei;
49066 two_args = canonicalize_perm (&d);
49068 if (ix86_expand_vec_perm_const_1 (&d))
49069 return true;
49071 /* If the selector says both arguments are needed, but the operands are the
49072 same, the above tried to expand with one_operand_p and flattened selector.
49073 If that didn't work, retry without one_operand_p; we succeeded with that
49074 during testing. */
49075 if (two_args && d.one_operand_p)
49077 d.one_operand_p = false;
49078 memcpy (d.perm, perm, sizeof (perm));
49079 return ix86_expand_vec_perm_const_1 (&d);
49082 return false;
49085 /* Implement targetm.vectorize.vec_perm_const_ok. */
49087 static bool
49088 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
49089 const unsigned char *sel)
49091 struct expand_vec_perm_d d;
49092 unsigned int i, nelt, which;
49093 bool ret;
49095 d.vmode = vmode;
49096 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49097 d.testing_p = true;
49099 /* Given sufficient ISA support we can just return true here
49100 for selected vector modes. */
49101 switch (d.vmode)
49103 case V16SFmode:
49104 case V16SImode:
49105 case V8DImode:
49106 case V8DFmode:
49107 if (TARGET_AVX512F)
49108 /* All implementable with a single vpermi2 insn. */
49109 return true;
49110 break;
49111 case V32HImode:
49112 if (TARGET_AVX512BW)
49113 /* All implementable with a single vpermi2 insn. */
49114 return true;
49115 break;
49116 case V8SImode:
49117 case V8SFmode:
49118 case V4DFmode:
49119 case V4DImode:
49120 if (TARGET_AVX512VL)
49121 /* All implementable with a single vpermi2 insn. */
49122 return true;
49123 break;
49124 case V16HImode:
49125 if (TARGET_AVX2)
49126 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49127 return true;
49128 break;
49129 case V32QImode:
49130 if (TARGET_AVX2)
49131 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49132 return true;
49133 break;
49134 case V4SImode:
49135 case V4SFmode:
49136 case V8HImode:
49137 case V16QImode:
49138 /* All implementable with a single vpperm insn. */
49139 if (TARGET_XOP)
49140 return true;
49141 /* All implementable with 2 pshufb + 1 ior. */
49142 if (TARGET_SSSE3)
49143 return true;
49144 break;
49145 case V2DImode:
49146 case V2DFmode:
49147 /* All implementable with shufpd or unpck[lh]pd. */
49148 return true;
49149 default:
49150 return false;
49153 /* Extract the values from the vector CST into the permutation
49154 array in D. */
49155 memcpy (d.perm, sel, nelt);
49156 for (i = which = 0; i < nelt; ++i)
49158 unsigned char e = d.perm[i];
49159 gcc_assert (e < 2 * nelt);
49160 which |= (e < nelt ? 1 : 2);
49163 /* For all elements from second vector, fold the elements to first. */
49164 if (which == 2)
49165 for (i = 0; i < nelt; ++i)
49166 d.perm[i] -= nelt;
49168 /* Check whether the mask can be applied to the vector type. */
49169 d.one_operand_p = (which != 3);
49171 /* Implementable with shufps or pshufd. */
49172 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49173 return true;
49175 /* Otherwise we have to go through the motions and see if we can
49176 figure out how to generate the requested permutation. */
49177 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49178 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49179 if (!d.one_operand_p)
49180 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49182 start_sequence ();
49183 ret = ix86_expand_vec_perm_const_1 (&d);
49184 end_sequence ();
49186 return ret;
49189 void
49190 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49192 struct expand_vec_perm_d d;
49193 unsigned i, nelt;
49195 d.target = targ;
49196 d.op0 = op0;
49197 d.op1 = op1;
49198 d.vmode = GET_MODE (targ);
49199 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49200 d.one_operand_p = false;
49201 d.testing_p = false;
49203 for (i = 0; i < nelt; ++i)
49204 d.perm[i] = i * 2 + odd;
49206 /* We'll either be able to implement the permutation directly... */
49207 if (expand_vec_perm_1 (&d))
49208 return;
49210 /* ... or we use the special-case patterns. */
49211 expand_vec_perm_even_odd_1 (&d, odd);
49214 static void
49215 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49217 struct expand_vec_perm_d d;
49218 unsigned i, nelt, base;
49219 bool ok;
49221 d.target = targ;
49222 d.op0 = op0;
49223 d.op1 = op1;
49224 d.vmode = GET_MODE (targ);
49225 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49226 d.one_operand_p = false;
49227 d.testing_p = false;
49229 base = high_p ? nelt / 2 : 0;
49230 for (i = 0; i < nelt / 2; ++i)
49232 d.perm[i * 2] = i + base;
49233 d.perm[i * 2 + 1] = i + base + nelt;
49236 /* Note that for AVX this isn't one instruction. */
49237 ok = ix86_expand_vec_perm_const_1 (&d);
49238 gcc_assert (ok);
49242 /* Expand a vector operation CODE for a V*QImode in terms of the
49243 same operation on V*HImode. */
49245 void
49246 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49248 machine_mode qimode = GET_MODE (dest);
49249 machine_mode himode;
49250 rtx (*gen_il) (rtx, rtx, rtx);
49251 rtx (*gen_ih) (rtx, rtx, rtx);
49252 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49253 struct expand_vec_perm_d d;
49254 bool ok, full_interleave;
49255 bool uns_p = false;
49256 int i;
49258 switch (qimode)
49260 case V16QImode:
49261 himode = V8HImode;
49262 gen_il = gen_vec_interleave_lowv16qi;
49263 gen_ih = gen_vec_interleave_highv16qi;
49264 break;
49265 case V32QImode:
49266 himode = V16HImode;
49267 gen_il = gen_avx2_interleave_lowv32qi;
49268 gen_ih = gen_avx2_interleave_highv32qi;
49269 break;
49270 case V64QImode:
49271 himode = V32HImode;
49272 gen_il = gen_avx512bw_interleave_lowv64qi;
49273 gen_ih = gen_avx512bw_interleave_highv64qi;
49274 break;
49275 default:
49276 gcc_unreachable ();
49279 op2_l = op2_h = op2;
49280 switch (code)
49282 case MULT:
49283 /* Unpack data such that we've got a source byte in each low byte of
49284 each word. We don't care what goes into the high byte of each word.
49285 Rather than trying to get zero in there, most convenient is to let
49286 it be a copy of the low byte. */
49287 op2_l = gen_reg_rtx (qimode);
49288 op2_h = gen_reg_rtx (qimode);
49289 emit_insn (gen_il (op2_l, op2, op2));
49290 emit_insn (gen_ih (op2_h, op2, op2));
49291 /* FALLTHRU */
49293 op1_l = gen_reg_rtx (qimode);
49294 op1_h = gen_reg_rtx (qimode);
49295 emit_insn (gen_il (op1_l, op1, op1));
49296 emit_insn (gen_ih (op1_h, op1, op1));
49297 full_interleave = qimode == V16QImode;
49298 break;
49300 case ASHIFT:
49301 case LSHIFTRT:
49302 uns_p = true;
49303 /* FALLTHRU */
49304 case ASHIFTRT:
49305 op1_l = gen_reg_rtx (himode);
49306 op1_h = gen_reg_rtx (himode);
49307 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
49308 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
49309 full_interleave = true;
49310 break;
49311 default:
49312 gcc_unreachable ();
49315 /* Perform the operation. */
49316 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
49317 1, OPTAB_DIRECT);
49318 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
49319 1, OPTAB_DIRECT);
49320 gcc_assert (res_l && res_h);
49322 /* Merge the data back into the right place. */
49323 d.target = dest;
49324 d.op0 = gen_lowpart (qimode, res_l);
49325 d.op1 = gen_lowpart (qimode, res_h);
49326 d.vmode = qimode;
49327 d.nelt = GET_MODE_NUNITS (qimode);
49328 d.one_operand_p = false;
49329 d.testing_p = false;
49331 if (full_interleave)
49333 /* For SSE2, we used an full interleave, so the desired
49334 results are in the even elements. */
49335 for (i = 0; i < 64; ++i)
49336 d.perm[i] = i * 2;
49338 else
49340 /* For AVX, the interleave used above was not cross-lane. So the
49341 extraction is evens but with the second and third quarter swapped.
49342 Happily, that is even one insn shorter than even extraction. */
49343 for (i = 0; i < 64; ++i)
49344 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
49347 ok = ix86_expand_vec_perm_const_1 (&d);
49348 gcc_assert (ok);
49350 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49351 gen_rtx_fmt_ee (code, qimode, op1, op2));
49354 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
49355 if op is CONST_VECTOR with all odd elements equal to their
49356 preceding element. */
49358 static bool
49359 const_vector_equal_evenodd_p (rtx op)
49361 machine_mode mode = GET_MODE (op);
49362 int i, nunits = GET_MODE_NUNITS (mode);
49363 if (GET_CODE (op) != CONST_VECTOR
49364 || nunits != CONST_VECTOR_NUNITS (op))
49365 return false;
49366 for (i = 0; i < nunits; i += 2)
49367 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
49368 return false;
49369 return true;
49372 void
49373 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
49374 bool uns_p, bool odd_p)
49376 machine_mode mode = GET_MODE (op1);
49377 machine_mode wmode = GET_MODE (dest);
49378 rtx x;
49379 rtx orig_op1 = op1, orig_op2 = op2;
49381 if (!nonimmediate_operand (op1, mode))
49382 op1 = force_reg (mode, op1);
49383 if (!nonimmediate_operand (op2, mode))
49384 op2 = force_reg (mode, op2);
49386 /* We only play even/odd games with vectors of SImode. */
49387 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
49389 /* If we're looking for the odd results, shift those members down to
49390 the even slots. For some cpus this is faster than a PSHUFD. */
49391 if (odd_p)
49393 /* For XOP use vpmacsdqh, but only for smult, as it is only
49394 signed. */
49395 if (TARGET_XOP && mode == V4SImode && !uns_p)
49397 x = force_reg (wmode, CONST0_RTX (wmode));
49398 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
49399 return;
49402 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
49403 if (!const_vector_equal_evenodd_p (orig_op1))
49404 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
49405 x, NULL, 1, OPTAB_DIRECT);
49406 if (!const_vector_equal_evenodd_p (orig_op2))
49407 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
49408 x, NULL, 1, OPTAB_DIRECT);
49409 op1 = gen_lowpart (mode, op1);
49410 op2 = gen_lowpart (mode, op2);
49413 if (mode == V16SImode)
49415 if (uns_p)
49416 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
49417 else
49418 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
49420 else if (mode == V8SImode)
49422 if (uns_p)
49423 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
49424 else
49425 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
49427 else if (uns_p)
49428 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
49429 else if (TARGET_SSE4_1)
49430 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
49431 else
49433 rtx s1, s2, t0, t1, t2;
49435 /* The easiest way to implement this without PMULDQ is to go through
49436 the motions as if we are performing a full 64-bit multiply. With
49437 the exception that we need to do less shuffling of the elements. */
49439 /* Compute the sign-extension, aka highparts, of the two operands. */
49440 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49441 op1, pc_rtx, pc_rtx);
49442 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49443 op2, pc_rtx, pc_rtx);
49445 /* Multiply LO(A) * HI(B), and vice-versa. */
49446 t1 = gen_reg_rtx (wmode);
49447 t2 = gen_reg_rtx (wmode);
49448 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
49449 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
49451 /* Multiply LO(A) * LO(B). */
49452 t0 = gen_reg_rtx (wmode);
49453 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
49455 /* Combine and shift the highparts into place. */
49456 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
49457 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
49458 1, OPTAB_DIRECT);
49460 /* Combine high and low parts. */
49461 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
49462 return;
49464 emit_insn (x);
49467 void
49468 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
49469 bool uns_p, bool high_p)
49471 machine_mode wmode = GET_MODE (dest);
49472 machine_mode mode = GET_MODE (op1);
49473 rtx t1, t2, t3, t4, mask;
49475 switch (mode)
49477 case V4SImode:
49478 t1 = gen_reg_rtx (mode);
49479 t2 = gen_reg_rtx (mode);
49480 if (TARGET_XOP && !uns_p)
49482 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
49483 shuffle the elements once so that all elements are in the right
49484 place for immediate use: { A C B D }. */
49485 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
49486 const1_rtx, GEN_INT (3)));
49487 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
49488 const1_rtx, GEN_INT (3)));
49490 else
49492 /* Put the elements into place for the multiply. */
49493 ix86_expand_vec_interleave (t1, op1, op1, high_p);
49494 ix86_expand_vec_interleave (t2, op2, op2, high_p);
49495 high_p = false;
49497 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
49498 break;
49500 case V8SImode:
49501 /* Shuffle the elements between the lanes. After this we
49502 have { A B E F | C D G H } for each operand. */
49503 t1 = gen_reg_rtx (V4DImode);
49504 t2 = gen_reg_rtx (V4DImode);
49505 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
49506 const0_rtx, const2_rtx,
49507 const1_rtx, GEN_INT (3)));
49508 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
49509 const0_rtx, const2_rtx,
49510 const1_rtx, GEN_INT (3)));
49512 /* Shuffle the elements within the lanes. After this we
49513 have { A A B B | C C D D } or { E E F F | G G H H }. */
49514 t3 = gen_reg_rtx (V8SImode);
49515 t4 = gen_reg_rtx (V8SImode);
49516 mask = GEN_INT (high_p
49517 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
49518 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
49519 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
49520 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
49522 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
49523 break;
49525 case V8HImode:
49526 case V16HImode:
49527 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
49528 uns_p, OPTAB_DIRECT);
49529 t2 = expand_binop (mode,
49530 uns_p ? umul_highpart_optab : smul_highpart_optab,
49531 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
49532 gcc_assert (t1 && t2);
49534 t3 = gen_reg_rtx (mode);
49535 ix86_expand_vec_interleave (t3, t1, t2, high_p);
49536 emit_move_insn (dest, gen_lowpart (wmode, t3));
49537 break;
49539 case V16QImode:
49540 case V32QImode:
49541 case V32HImode:
49542 case V16SImode:
49543 case V64QImode:
49544 t1 = gen_reg_rtx (wmode);
49545 t2 = gen_reg_rtx (wmode);
49546 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
49547 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
49549 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
49550 break;
49552 default:
49553 gcc_unreachable ();
49557 void
49558 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
49560 rtx res_1, res_2, res_3, res_4;
49562 res_1 = gen_reg_rtx (V4SImode);
49563 res_2 = gen_reg_rtx (V4SImode);
49564 res_3 = gen_reg_rtx (V2DImode);
49565 res_4 = gen_reg_rtx (V2DImode);
49566 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
49567 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
49569 /* Move the results in element 2 down to element 1; we don't care
49570 what goes in elements 2 and 3. Then we can merge the parts
49571 back together with an interleave.
49573 Note that two other sequences were tried:
49574 (1) Use interleaves at the start instead of psrldq, which allows
49575 us to use a single shufps to merge things back at the end.
49576 (2) Use shufps here to combine the two vectors, then pshufd to
49577 put the elements in the correct order.
49578 In both cases the cost of the reformatting stall was too high
49579 and the overall sequence slower. */
49581 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
49582 const0_rtx, const2_rtx,
49583 const0_rtx, const0_rtx));
49584 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
49585 const0_rtx, const2_rtx,
49586 const0_rtx, const0_rtx));
49587 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
49589 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
49592 void
49593 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
49595 machine_mode mode = GET_MODE (op0);
49596 rtx t1, t2, t3, t4, t5, t6;
49598 if (TARGET_AVX512DQ && mode == V8DImode)
49599 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
49600 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
49601 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
49602 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
49603 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
49604 else if (TARGET_XOP && mode == V2DImode)
49606 /* op1: A,B,C,D, op2: E,F,G,H */
49607 op1 = gen_lowpart (V4SImode, op1);
49608 op2 = gen_lowpart (V4SImode, op2);
49610 t1 = gen_reg_rtx (V4SImode);
49611 t2 = gen_reg_rtx (V4SImode);
49612 t3 = gen_reg_rtx (V2DImode);
49613 t4 = gen_reg_rtx (V2DImode);
49615 /* t1: B,A,D,C */
49616 emit_insn (gen_sse2_pshufd_1 (t1, op1,
49617 GEN_INT (1),
49618 GEN_INT (0),
49619 GEN_INT (3),
49620 GEN_INT (2)));
49622 /* t2: (B*E),(A*F),(D*G),(C*H) */
49623 emit_insn (gen_mulv4si3 (t2, t1, op2));
49625 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
49626 emit_insn (gen_xop_phadddq (t3, t2));
49628 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
49629 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
49631 /* Multiply lower parts and add all */
49632 t5 = gen_reg_rtx (V2DImode);
49633 emit_insn (gen_vec_widen_umult_even_v4si (t5,
49634 gen_lowpart (V4SImode, op1),
49635 gen_lowpart (V4SImode, op2)));
49636 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
49639 else
49641 machine_mode nmode;
49642 rtx (*umul) (rtx, rtx, rtx);
49644 if (mode == V2DImode)
49646 umul = gen_vec_widen_umult_even_v4si;
49647 nmode = V4SImode;
49649 else if (mode == V4DImode)
49651 umul = gen_vec_widen_umult_even_v8si;
49652 nmode = V8SImode;
49654 else if (mode == V8DImode)
49656 umul = gen_vec_widen_umult_even_v16si;
49657 nmode = V16SImode;
49659 else
49660 gcc_unreachable ();
49663 /* Multiply low parts. */
49664 t1 = gen_reg_rtx (mode);
49665 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
49667 /* Shift input vectors right 32 bits so we can multiply high parts. */
49668 t6 = GEN_INT (32);
49669 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
49670 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
49672 /* Multiply high parts by low parts. */
49673 t4 = gen_reg_rtx (mode);
49674 t5 = gen_reg_rtx (mode);
49675 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
49676 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
49678 /* Combine and shift the highparts back. */
49679 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
49680 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
49682 /* Combine high and low parts. */
49683 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
49686 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49687 gen_rtx_MULT (mode, op1, op2));
49690 /* Return 1 if control tansfer instruction INSN
49691 should be encoded with bnd prefix.
49692 If insn is NULL then return 1 when control
49693 transfer instructions should be prefixed with
49694 bnd by default for current function. */
49696 bool
49697 ix86_bnd_prefixed_insn_p (rtx insn)
49699 /* For call insns check special flag. */
49700 if (insn && CALL_P (insn))
49702 rtx call = get_call_rtx_from (insn);
49703 if (call)
49704 return CALL_EXPR_WITH_BOUNDS_P (call);
49707 /* All other insns are prefixed only if function is instrumented. */
49708 return chkp_function_instrumented_p (current_function_decl);
49711 /* Calculate integer abs() using only SSE2 instructions. */
49713 void
49714 ix86_expand_sse2_abs (rtx target, rtx input)
49716 machine_mode mode = GET_MODE (target);
49717 rtx tmp0, tmp1, x;
49719 switch (mode)
49721 /* For 32-bit signed integer X, the best way to calculate the absolute
49722 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
49723 case V4SImode:
49724 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
49725 GEN_INT (GET_MODE_BITSIZE
49726 (GET_MODE_INNER (mode)) - 1),
49727 NULL, 0, OPTAB_DIRECT);
49728 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
49729 NULL, 0, OPTAB_DIRECT);
49730 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
49731 target, 0, OPTAB_DIRECT);
49732 break;
49734 /* For 16-bit signed integer X, the best way to calculate the absolute
49735 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
49736 case V8HImode:
49737 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
49739 x = expand_simple_binop (mode, SMAX, tmp0, input,
49740 target, 0, OPTAB_DIRECT);
49741 break;
49743 /* For 8-bit signed integer X, the best way to calculate the absolute
49744 value of X is min ((unsigned char) X, (unsigned char) (-X)),
49745 as SSE2 provides the PMINUB insn. */
49746 case V16QImode:
49747 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
49749 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
49750 target, 0, OPTAB_DIRECT);
49751 break;
49753 default:
49754 gcc_unreachable ();
49757 if (x != target)
49758 emit_move_insn (target, x);
49761 /* Expand an insert into a vector register through pinsr insn.
49762 Return true if successful. */
49764 bool
49765 ix86_expand_pinsr (rtx *operands)
49767 rtx dst = operands[0];
49768 rtx src = operands[3];
49770 unsigned int size = INTVAL (operands[1]);
49771 unsigned int pos = INTVAL (operands[2]);
49773 if (GET_CODE (dst) == SUBREG)
49775 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
49776 dst = SUBREG_REG (dst);
49779 if (GET_CODE (src) == SUBREG)
49780 src = SUBREG_REG (src);
49782 switch (GET_MODE (dst))
49784 case V16QImode:
49785 case V8HImode:
49786 case V4SImode:
49787 case V2DImode:
49789 machine_mode srcmode, dstmode;
49790 rtx (*pinsr)(rtx, rtx, rtx, rtx);
49792 srcmode = mode_for_size (size, MODE_INT, 0);
49794 switch (srcmode)
49796 case QImode:
49797 if (!TARGET_SSE4_1)
49798 return false;
49799 dstmode = V16QImode;
49800 pinsr = gen_sse4_1_pinsrb;
49801 break;
49803 case HImode:
49804 if (!TARGET_SSE2)
49805 return false;
49806 dstmode = V8HImode;
49807 pinsr = gen_sse2_pinsrw;
49808 break;
49810 case SImode:
49811 if (!TARGET_SSE4_1)
49812 return false;
49813 dstmode = V4SImode;
49814 pinsr = gen_sse4_1_pinsrd;
49815 break;
49817 case DImode:
49818 gcc_assert (TARGET_64BIT);
49819 if (!TARGET_SSE4_1)
49820 return false;
49821 dstmode = V2DImode;
49822 pinsr = gen_sse4_1_pinsrq;
49823 break;
49825 default:
49826 return false;
49829 rtx d = dst;
49830 if (GET_MODE (dst) != dstmode)
49831 d = gen_reg_rtx (dstmode);
49832 src = gen_lowpart (srcmode, src);
49834 pos /= size;
49836 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
49837 GEN_INT (1 << pos)));
49838 if (d != dst)
49839 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
49840 return true;
49843 default:
49844 return false;
49848 /* This function returns the calling abi specific va_list type node.
49849 It returns the FNDECL specific va_list type. */
49851 static tree
49852 ix86_fn_abi_va_list (tree fndecl)
49854 if (!TARGET_64BIT)
49855 return va_list_type_node;
49856 gcc_assert (fndecl != NULL_TREE);
49858 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
49859 return ms_va_list_type_node;
49860 else
49861 return sysv_va_list_type_node;
49864 /* Returns the canonical va_list type specified by TYPE. If there
49865 is no valid TYPE provided, it return NULL_TREE. */
49867 static tree
49868 ix86_canonical_va_list_type (tree type)
49870 tree wtype, htype;
49872 /* Resolve references and pointers to va_list type. */
49873 if (TREE_CODE (type) == MEM_REF)
49874 type = TREE_TYPE (type);
49875 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
49876 type = TREE_TYPE (type);
49877 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
49878 type = TREE_TYPE (type);
49880 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
49882 wtype = va_list_type_node;
49883 gcc_assert (wtype != NULL_TREE);
49884 htype = type;
49885 if (TREE_CODE (wtype) == ARRAY_TYPE)
49887 /* If va_list is an array type, the argument may have decayed
49888 to a pointer type, e.g. by being passed to another function.
49889 In that case, unwrap both types so that we can compare the
49890 underlying records. */
49891 if (TREE_CODE (htype) == ARRAY_TYPE
49892 || POINTER_TYPE_P (htype))
49894 wtype = TREE_TYPE (wtype);
49895 htype = TREE_TYPE (htype);
49898 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
49899 return va_list_type_node;
49900 wtype = sysv_va_list_type_node;
49901 gcc_assert (wtype != NULL_TREE);
49902 htype = type;
49903 if (TREE_CODE (wtype) == ARRAY_TYPE)
49905 /* If va_list is an array type, the argument may have decayed
49906 to a pointer type, e.g. by being passed to another function.
49907 In that case, unwrap both types so that we can compare the
49908 underlying records. */
49909 if (TREE_CODE (htype) == ARRAY_TYPE
49910 || POINTER_TYPE_P (htype))
49912 wtype = TREE_TYPE (wtype);
49913 htype = TREE_TYPE (htype);
49916 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
49917 return sysv_va_list_type_node;
49918 wtype = ms_va_list_type_node;
49919 gcc_assert (wtype != NULL_TREE);
49920 htype = type;
49921 if (TREE_CODE (wtype) == ARRAY_TYPE)
49923 /* If va_list is an array type, the argument may have decayed
49924 to a pointer type, e.g. by being passed to another function.
49925 In that case, unwrap both types so that we can compare the
49926 underlying records. */
49927 if (TREE_CODE (htype) == ARRAY_TYPE
49928 || POINTER_TYPE_P (htype))
49930 wtype = TREE_TYPE (wtype);
49931 htype = TREE_TYPE (htype);
49934 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
49935 return ms_va_list_type_node;
49936 return NULL_TREE;
49938 return std_canonical_va_list_type (type);
49941 /* Iterate through the target-specific builtin types for va_list.
49942 IDX denotes the iterator, *PTREE is set to the result type of
49943 the va_list builtin, and *PNAME to its internal type.
49944 Returns zero if there is no element for this index, otherwise
49945 IDX should be increased upon the next call.
49946 Note, do not iterate a base builtin's name like __builtin_va_list.
49947 Used from c_common_nodes_and_builtins. */
49949 static int
49950 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
49952 if (TARGET_64BIT)
49954 switch (idx)
49956 default:
49957 break;
49959 case 0:
49960 *ptree = ms_va_list_type_node;
49961 *pname = "__builtin_ms_va_list";
49962 return 1;
49964 case 1:
49965 *ptree = sysv_va_list_type_node;
49966 *pname = "__builtin_sysv_va_list";
49967 return 1;
49971 return 0;
49974 #undef TARGET_SCHED_DISPATCH
49975 #define TARGET_SCHED_DISPATCH has_dispatch
49976 #undef TARGET_SCHED_DISPATCH_DO
49977 #define TARGET_SCHED_DISPATCH_DO do_dispatch
49978 #undef TARGET_SCHED_REASSOCIATION_WIDTH
49979 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
49980 #undef TARGET_SCHED_REORDER
49981 #define TARGET_SCHED_REORDER ix86_sched_reorder
49982 #undef TARGET_SCHED_ADJUST_PRIORITY
49983 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
49984 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
49985 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
49986 ix86_dependencies_evaluation_hook
49988 /* The size of the dispatch window is the total number of bytes of
49989 object code allowed in a window. */
49990 #define DISPATCH_WINDOW_SIZE 16
49992 /* Number of dispatch windows considered for scheduling. */
49993 #define MAX_DISPATCH_WINDOWS 3
49995 /* Maximum number of instructions in a window. */
49996 #define MAX_INSN 4
49998 /* Maximum number of immediate operands in a window. */
49999 #define MAX_IMM 4
50001 /* Maximum number of immediate bits allowed in a window. */
50002 #define MAX_IMM_SIZE 128
50004 /* Maximum number of 32 bit immediates allowed in a window. */
50005 #define MAX_IMM_32 4
50007 /* Maximum number of 64 bit immediates allowed in a window. */
50008 #define MAX_IMM_64 2
50010 /* Maximum total of loads or prefetches allowed in a window. */
50011 #define MAX_LOAD 2
50013 /* Maximum total of stores allowed in a window. */
50014 #define MAX_STORE 1
50016 #undef BIG
50017 #define BIG 100
50020 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
50021 enum dispatch_group {
50022 disp_no_group = 0,
50023 disp_load,
50024 disp_store,
50025 disp_load_store,
50026 disp_prefetch,
50027 disp_imm,
50028 disp_imm_32,
50029 disp_imm_64,
50030 disp_branch,
50031 disp_cmp,
50032 disp_jcc,
50033 disp_last
50036 /* Number of allowable groups in a dispatch window. It is an array
50037 indexed by dispatch_group enum. 100 is used as a big number,
50038 because the number of these kind of operations does not have any
50039 effect in dispatch window, but we need them for other reasons in
50040 the table. */
50041 static unsigned int num_allowable_groups[disp_last] = {
50042 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
50045 char group_name[disp_last + 1][16] = {
50046 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
50047 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
50048 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
50051 /* Instruction path. */
50052 enum insn_path {
50053 no_path = 0,
50054 path_single, /* Single micro op. */
50055 path_double, /* Double micro op. */
50056 path_multi, /* Instructions with more than 2 micro op.. */
50057 last_path
50060 /* sched_insn_info defines a window to the instructions scheduled in
50061 the basic block. It contains a pointer to the insn_info table and
50062 the instruction scheduled.
50064 Windows are allocated for each basic block and are linked
50065 together. */
50066 typedef struct sched_insn_info_s {
50067 rtx insn;
50068 enum dispatch_group group;
50069 enum insn_path path;
50070 int byte_len;
50071 int imm_bytes;
50072 } sched_insn_info;
50074 /* Linked list of dispatch windows. This is a two way list of
50075 dispatch windows of a basic block. It contains information about
50076 the number of uops in the window and the total number of
50077 instructions and of bytes in the object code for this dispatch
50078 window. */
50079 typedef struct dispatch_windows_s {
50080 int num_insn; /* Number of insn in the window. */
50081 int num_uops; /* Number of uops in the window. */
50082 int window_size; /* Number of bytes in the window. */
50083 int window_num; /* Window number between 0 or 1. */
50084 int num_imm; /* Number of immediates in an insn. */
50085 int num_imm_32; /* Number of 32 bit immediates in an insn. */
50086 int num_imm_64; /* Number of 64 bit immediates in an insn. */
50087 int imm_size; /* Total immediates in the window. */
50088 int num_loads; /* Total memory loads in the window. */
50089 int num_stores; /* Total memory stores in the window. */
50090 int violation; /* Violation exists in window. */
50091 sched_insn_info *window; /* Pointer to the window. */
50092 struct dispatch_windows_s *next;
50093 struct dispatch_windows_s *prev;
50094 } dispatch_windows;
50096 /* Immediate valuse used in an insn. */
50097 typedef struct imm_info_s
50099 int imm;
50100 int imm32;
50101 int imm64;
50102 } imm_info;
50104 static dispatch_windows *dispatch_window_list;
50105 static dispatch_windows *dispatch_window_list1;
50107 /* Get dispatch group of insn. */
50109 static enum dispatch_group
50110 get_mem_group (rtx_insn *insn)
50112 enum attr_memory memory;
50114 if (INSN_CODE (insn) < 0)
50115 return disp_no_group;
50116 memory = get_attr_memory (insn);
50117 if (memory == MEMORY_STORE)
50118 return disp_store;
50120 if (memory == MEMORY_LOAD)
50121 return disp_load;
50123 if (memory == MEMORY_BOTH)
50124 return disp_load_store;
50126 return disp_no_group;
50129 /* Return true if insn is a compare instruction. */
50131 static bool
50132 is_cmp (rtx_insn *insn)
50134 enum attr_type type;
50136 type = get_attr_type (insn);
50137 return (type == TYPE_TEST
50138 || type == TYPE_ICMP
50139 || type == TYPE_FCMP
50140 || GET_CODE (PATTERN (insn)) == COMPARE);
50143 /* Return true if a dispatch violation encountered. */
50145 static bool
50146 dispatch_violation (void)
50148 if (dispatch_window_list->next)
50149 return dispatch_window_list->next->violation;
50150 return dispatch_window_list->violation;
50153 /* Return true if insn is a branch instruction. */
50155 static bool
50156 is_branch (rtx insn)
50158 return (CALL_P (insn) || JUMP_P (insn));
50161 /* Return true if insn is a prefetch instruction. */
50163 static bool
50164 is_prefetch (rtx insn)
50166 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
50169 /* This function initializes a dispatch window and the list container holding a
50170 pointer to the window. */
50172 static void
50173 init_window (int window_num)
50175 int i;
50176 dispatch_windows *new_list;
50178 if (window_num == 0)
50179 new_list = dispatch_window_list;
50180 else
50181 new_list = dispatch_window_list1;
50183 new_list->num_insn = 0;
50184 new_list->num_uops = 0;
50185 new_list->window_size = 0;
50186 new_list->next = NULL;
50187 new_list->prev = NULL;
50188 new_list->window_num = window_num;
50189 new_list->num_imm = 0;
50190 new_list->num_imm_32 = 0;
50191 new_list->num_imm_64 = 0;
50192 new_list->imm_size = 0;
50193 new_list->num_loads = 0;
50194 new_list->num_stores = 0;
50195 new_list->violation = false;
50197 for (i = 0; i < MAX_INSN; i++)
50199 new_list->window[i].insn = NULL;
50200 new_list->window[i].group = disp_no_group;
50201 new_list->window[i].path = no_path;
50202 new_list->window[i].byte_len = 0;
50203 new_list->window[i].imm_bytes = 0;
50205 return;
50208 /* This function allocates and initializes a dispatch window and the
50209 list container holding a pointer to the window. */
50211 static dispatch_windows *
50212 allocate_window (void)
50214 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
50215 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
50217 return new_list;
50220 /* This routine initializes the dispatch scheduling information. It
50221 initiates building dispatch scheduler tables and constructs the
50222 first dispatch window. */
50224 static void
50225 init_dispatch_sched (void)
50227 /* Allocate a dispatch list and a window. */
50228 dispatch_window_list = allocate_window ();
50229 dispatch_window_list1 = allocate_window ();
50230 init_window (0);
50231 init_window (1);
50234 /* This function returns true if a branch is detected. End of a basic block
50235 does not have to be a branch, but here we assume only branches end a
50236 window. */
50238 static bool
50239 is_end_basic_block (enum dispatch_group group)
50241 return group == disp_branch;
50244 /* This function is called when the end of a window processing is reached. */
50246 static void
50247 process_end_window (void)
50249 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
50250 if (dispatch_window_list->next)
50252 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
50253 gcc_assert (dispatch_window_list->window_size
50254 + dispatch_window_list1->window_size <= 48);
50255 init_window (1);
50257 init_window (0);
50260 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
50261 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
50262 for 48 bytes of instructions. Note that these windows are not dispatch
50263 windows that their sizes are DISPATCH_WINDOW_SIZE. */
50265 static dispatch_windows *
50266 allocate_next_window (int window_num)
50268 if (window_num == 0)
50270 if (dispatch_window_list->next)
50271 init_window (1);
50272 init_window (0);
50273 return dispatch_window_list;
50276 dispatch_window_list->next = dispatch_window_list1;
50277 dispatch_window_list1->prev = dispatch_window_list;
50279 return dispatch_window_list1;
50282 /* Compute number of immediate operands of an instruction. */
50284 static void
50285 find_constant (rtx in_rtx, imm_info *imm_values)
50287 if (INSN_P (in_rtx))
50288 in_rtx = PATTERN (in_rtx);
50289 subrtx_iterator::array_type array;
50290 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
50291 if (const_rtx x = *iter)
50292 switch (GET_CODE (x))
50294 case CONST:
50295 case SYMBOL_REF:
50296 case CONST_INT:
50297 (imm_values->imm)++;
50298 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
50299 (imm_values->imm32)++;
50300 else
50301 (imm_values->imm64)++;
50302 break;
50304 case CONST_DOUBLE:
50305 (imm_values->imm)++;
50306 (imm_values->imm64)++;
50307 break;
50309 case CODE_LABEL:
50310 if (LABEL_KIND (x) == LABEL_NORMAL)
50312 (imm_values->imm)++;
50313 (imm_values->imm32)++;
50315 break;
50317 default:
50318 break;
50322 /* Return total size of immediate operands of an instruction along with number
50323 of corresponding immediate-operands. It initializes its parameters to zero
50324 befor calling FIND_CONSTANT.
50325 INSN is the input instruction. IMM is the total of immediates.
50326 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
50327 bit immediates. */
50329 static int
50330 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
50332 imm_info imm_values = {0, 0, 0};
50334 find_constant (insn, &imm_values);
50335 *imm = imm_values.imm;
50336 *imm32 = imm_values.imm32;
50337 *imm64 = imm_values.imm64;
50338 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
50341 /* This function indicates if an operand of an instruction is an
50342 immediate. */
50344 static bool
50345 has_immediate (rtx insn)
50347 int num_imm_operand;
50348 int num_imm32_operand;
50349 int num_imm64_operand;
50351 if (insn)
50352 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50353 &num_imm64_operand);
50354 return false;
50357 /* Return single or double path for instructions. */
50359 static enum insn_path
50360 get_insn_path (rtx_insn *insn)
50362 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
50364 if ((int)path == 0)
50365 return path_single;
50367 if ((int)path == 1)
50368 return path_double;
50370 return path_multi;
50373 /* Return insn dispatch group. */
50375 static enum dispatch_group
50376 get_insn_group (rtx_insn *insn)
50378 enum dispatch_group group = get_mem_group (insn);
50379 if (group)
50380 return group;
50382 if (is_branch (insn))
50383 return disp_branch;
50385 if (is_cmp (insn))
50386 return disp_cmp;
50388 if (has_immediate (insn))
50389 return disp_imm;
50391 if (is_prefetch (insn))
50392 return disp_prefetch;
50394 return disp_no_group;
50397 /* Count number of GROUP restricted instructions in a dispatch
50398 window WINDOW_LIST. */
50400 static int
50401 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
50403 enum dispatch_group group = get_insn_group (insn);
50404 int imm_size;
50405 int num_imm_operand;
50406 int num_imm32_operand;
50407 int num_imm64_operand;
50409 if (group == disp_no_group)
50410 return 0;
50412 if (group == disp_imm)
50414 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50415 &num_imm64_operand);
50416 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
50417 || num_imm_operand + window_list->num_imm > MAX_IMM
50418 || (num_imm32_operand > 0
50419 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
50420 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
50421 || (num_imm64_operand > 0
50422 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
50423 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
50424 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
50425 && num_imm64_operand > 0
50426 && ((window_list->num_imm_64 > 0
50427 && window_list->num_insn >= 2)
50428 || window_list->num_insn >= 3)))
50429 return BIG;
50431 return 1;
50434 if ((group == disp_load_store
50435 && (window_list->num_loads >= MAX_LOAD
50436 || window_list->num_stores >= MAX_STORE))
50437 || ((group == disp_load
50438 || group == disp_prefetch)
50439 && window_list->num_loads >= MAX_LOAD)
50440 || (group == disp_store
50441 && window_list->num_stores >= MAX_STORE))
50442 return BIG;
50444 return 1;
50447 /* This function returns true if insn satisfies dispatch rules on the
50448 last window scheduled. */
50450 static bool
50451 fits_dispatch_window (rtx_insn *insn)
50453 dispatch_windows *window_list = dispatch_window_list;
50454 dispatch_windows *window_list_next = dispatch_window_list->next;
50455 unsigned int num_restrict;
50456 enum dispatch_group group = get_insn_group (insn);
50457 enum insn_path path = get_insn_path (insn);
50458 int sum;
50460 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
50461 instructions should be given the lowest priority in the
50462 scheduling process in Haifa scheduler to make sure they will be
50463 scheduled in the same dispatch window as the reference to them. */
50464 if (group == disp_jcc || group == disp_cmp)
50465 return false;
50467 /* Check nonrestricted. */
50468 if (group == disp_no_group || group == disp_branch)
50469 return true;
50471 /* Get last dispatch window. */
50472 if (window_list_next)
50473 window_list = window_list_next;
50475 if (window_list->window_num == 1)
50477 sum = window_list->prev->window_size + window_list->window_size;
50479 if (sum == 32
50480 || (min_insn_size (insn) + sum) >= 48)
50481 /* Window 1 is full. Go for next window. */
50482 return true;
50485 num_restrict = count_num_restricted (insn, window_list);
50487 if (num_restrict > num_allowable_groups[group])
50488 return false;
50490 /* See if it fits in the first window. */
50491 if (window_list->window_num == 0)
50493 /* The first widow should have only single and double path
50494 uops. */
50495 if (path == path_double
50496 && (window_list->num_uops + 2) > MAX_INSN)
50497 return false;
50498 else if (path != path_single)
50499 return false;
50501 return true;
50504 /* Add an instruction INSN with NUM_UOPS micro-operations to the
50505 dispatch window WINDOW_LIST. */
50507 static void
50508 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
50510 int byte_len = min_insn_size (insn);
50511 int num_insn = window_list->num_insn;
50512 int imm_size;
50513 sched_insn_info *window = window_list->window;
50514 enum dispatch_group group = get_insn_group (insn);
50515 enum insn_path path = get_insn_path (insn);
50516 int num_imm_operand;
50517 int num_imm32_operand;
50518 int num_imm64_operand;
50520 if (!window_list->violation && group != disp_cmp
50521 && !fits_dispatch_window (insn))
50522 window_list->violation = true;
50524 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50525 &num_imm64_operand);
50527 /* Initialize window with new instruction. */
50528 window[num_insn].insn = insn;
50529 window[num_insn].byte_len = byte_len;
50530 window[num_insn].group = group;
50531 window[num_insn].path = path;
50532 window[num_insn].imm_bytes = imm_size;
50534 window_list->window_size += byte_len;
50535 window_list->num_insn = num_insn + 1;
50536 window_list->num_uops = window_list->num_uops + num_uops;
50537 window_list->imm_size += imm_size;
50538 window_list->num_imm += num_imm_operand;
50539 window_list->num_imm_32 += num_imm32_operand;
50540 window_list->num_imm_64 += num_imm64_operand;
50542 if (group == disp_store)
50543 window_list->num_stores += 1;
50544 else if (group == disp_load
50545 || group == disp_prefetch)
50546 window_list->num_loads += 1;
50547 else if (group == disp_load_store)
50549 window_list->num_stores += 1;
50550 window_list->num_loads += 1;
50554 /* Adds a scheduled instruction, INSN, to the current dispatch window.
50555 If the total bytes of instructions or the number of instructions in
50556 the window exceed allowable, it allocates a new window. */
50558 static void
50559 add_to_dispatch_window (rtx_insn *insn)
50561 int byte_len;
50562 dispatch_windows *window_list;
50563 dispatch_windows *next_list;
50564 dispatch_windows *window0_list;
50565 enum insn_path path;
50566 enum dispatch_group insn_group;
50567 bool insn_fits;
50568 int num_insn;
50569 int num_uops;
50570 int window_num;
50571 int insn_num_uops;
50572 int sum;
50574 if (INSN_CODE (insn) < 0)
50575 return;
50577 byte_len = min_insn_size (insn);
50578 window_list = dispatch_window_list;
50579 next_list = window_list->next;
50580 path = get_insn_path (insn);
50581 insn_group = get_insn_group (insn);
50583 /* Get the last dispatch window. */
50584 if (next_list)
50585 window_list = dispatch_window_list->next;
50587 if (path == path_single)
50588 insn_num_uops = 1;
50589 else if (path == path_double)
50590 insn_num_uops = 2;
50591 else
50592 insn_num_uops = (int) path;
50594 /* If current window is full, get a new window.
50595 Window number zero is full, if MAX_INSN uops are scheduled in it.
50596 Window number one is full, if window zero's bytes plus window
50597 one's bytes is 32, or if the bytes of the new instruction added
50598 to the total makes it greater than 48, or it has already MAX_INSN
50599 instructions in it. */
50600 num_insn = window_list->num_insn;
50601 num_uops = window_list->num_uops;
50602 window_num = window_list->window_num;
50603 insn_fits = fits_dispatch_window (insn);
50605 if (num_insn >= MAX_INSN
50606 || num_uops + insn_num_uops > MAX_INSN
50607 || !(insn_fits))
50609 window_num = ~window_num & 1;
50610 window_list = allocate_next_window (window_num);
50613 if (window_num == 0)
50615 add_insn_window (insn, window_list, insn_num_uops);
50616 if (window_list->num_insn >= MAX_INSN
50617 && insn_group == disp_branch)
50619 process_end_window ();
50620 return;
50623 else if (window_num == 1)
50625 window0_list = window_list->prev;
50626 sum = window0_list->window_size + window_list->window_size;
50627 if (sum == 32
50628 || (byte_len + sum) >= 48)
50630 process_end_window ();
50631 window_list = dispatch_window_list;
50634 add_insn_window (insn, window_list, insn_num_uops);
50636 else
50637 gcc_unreachable ();
50639 if (is_end_basic_block (insn_group))
50641 /* End of basic block is reached do end-basic-block process. */
50642 process_end_window ();
50643 return;
50647 /* Print the dispatch window, WINDOW_NUM, to FILE. */
50649 DEBUG_FUNCTION static void
50650 debug_dispatch_window_file (FILE *file, int window_num)
50652 dispatch_windows *list;
50653 int i;
50655 if (window_num == 0)
50656 list = dispatch_window_list;
50657 else
50658 list = dispatch_window_list1;
50660 fprintf (file, "Window #%d:\n", list->window_num);
50661 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
50662 list->num_insn, list->num_uops, list->window_size);
50663 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
50664 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
50666 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
50667 list->num_stores);
50668 fprintf (file, " insn info:\n");
50670 for (i = 0; i < MAX_INSN; i++)
50672 if (!list->window[i].insn)
50673 break;
50674 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
50675 i, group_name[list->window[i].group],
50676 i, (void *)list->window[i].insn,
50677 i, list->window[i].path,
50678 i, list->window[i].byte_len,
50679 i, list->window[i].imm_bytes);
50683 /* Print to stdout a dispatch window. */
50685 DEBUG_FUNCTION void
50686 debug_dispatch_window (int window_num)
50688 debug_dispatch_window_file (stdout, window_num);
50691 /* Print INSN dispatch information to FILE. */
50693 DEBUG_FUNCTION static void
50694 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
50696 int byte_len;
50697 enum insn_path path;
50698 enum dispatch_group group;
50699 int imm_size;
50700 int num_imm_operand;
50701 int num_imm32_operand;
50702 int num_imm64_operand;
50704 if (INSN_CODE (insn) < 0)
50705 return;
50707 byte_len = min_insn_size (insn);
50708 path = get_insn_path (insn);
50709 group = get_insn_group (insn);
50710 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50711 &num_imm64_operand);
50713 fprintf (file, " insn info:\n");
50714 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
50715 group_name[group], path, byte_len);
50716 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
50717 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
50720 /* Print to STDERR the status of the ready list with respect to
50721 dispatch windows. */
50723 DEBUG_FUNCTION void
50724 debug_ready_dispatch (void)
50726 int i;
50727 int no_ready = number_in_ready ();
50729 fprintf (stdout, "Number of ready: %d\n", no_ready);
50731 for (i = 0; i < no_ready; i++)
50732 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
50735 /* This routine is the driver of the dispatch scheduler. */
50737 static void
50738 do_dispatch (rtx_insn *insn, int mode)
50740 if (mode == DISPATCH_INIT)
50741 init_dispatch_sched ();
50742 else if (mode == ADD_TO_DISPATCH_WINDOW)
50743 add_to_dispatch_window (insn);
50746 /* Return TRUE if Dispatch Scheduling is supported. */
50748 static bool
50749 has_dispatch (rtx_insn *insn, int action)
50751 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
50752 && flag_dispatch_scheduler)
50753 switch (action)
50755 default:
50756 return false;
50758 case IS_DISPATCH_ON:
50759 return true;
50760 break;
50762 case IS_CMP:
50763 return is_cmp (insn);
50765 case DISPATCH_VIOLATION:
50766 return dispatch_violation ();
50768 case FITS_DISPATCH_WINDOW:
50769 return fits_dispatch_window (insn);
50772 return false;
50775 /* Implementation of reassociation_width target hook used by
50776 reassoc phase to identify parallelism level in reassociated
50777 tree. Statements tree_code is passed in OPC. Arguments type
50778 is passed in MODE.
50780 Currently parallel reassociation is enabled for Atom
50781 processors only and we set reassociation width to be 2
50782 because Atom may issue up to 2 instructions per cycle.
50784 Return value should be fixed if parallel reassociation is
50785 enabled for other processors. */
50787 static int
50788 ix86_reassociation_width (unsigned int, machine_mode mode)
50790 int res = 1;
50792 /* Vector part. */
50793 if (VECTOR_MODE_P (mode))
50795 if (TARGET_VECTOR_PARALLEL_EXECUTION)
50796 return 2;
50797 else
50798 return 1;
50801 /* Scalar part. */
50802 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
50803 res = 2;
50804 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
50805 res = 2;
50807 return res;
50810 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
50811 place emms and femms instructions. */
50813 static machine_mode
50814 ix86_preferred_simd_mode (machine_mode mode)
50816 if (!TARGET_SSE)
50817 return word_mode;
50819 switch (mode)
50821 case QImode:
50822 return TARGET_AVX512BW ? V64QImode :
50823 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
50824 case HImode:
50825 return TARGET_AVX512BW ? V32HImode :
50826 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
50827 case SImode:
50828 return TARGET_AVX512F ? V16SImode :
50829 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
50830 case DImode:
50831 return TARGET_AVX512F ? V8DImode :
50832 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
50834 case SFmode:
50835 if (TARGET_AVX512F)
50836 return V16SFmode;
50837 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50838 return V8SFmode;
50839 else
50840 return V4SFmode;
50842 case DFmode:
50843 if (!TARGET_VECTORIZE_DOUBLE)
50844 return word_mode;
50845 else if (TARGET_AVX512F)
50846 return V8DFmode;
50847 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50848 return V4DFmode;
50849 else if (TARGET_SSE2)
50850 return V2DFmode;
50851 /* FALLTHRU */
50853 default:
50854 return word_mode;
50858 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
50859 vectors. If AVX512F is enabled then try vectorizing with 512bit,
50860 256bit and 128bit vectors. */
50862 static unsigned int
50863 ix86_autovectorize_vector_sizes (void)
50865 return TARGET_AVX512F ? 64 | 32 | 16 :
50866 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
50871 /* Return class of registers which could be used for pseudo of MODE
50872 and of class RCLASS for spilling instead of memory. Return NO_REGS
50873 if it is not possible or non-profitable. */
50874 static reg_class_t
50875 ix86_spill_class (reg_class_t rclass, machine_mode mode)
50877 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
50878 && (mode == SImode || (TARGET_64BIT && mode == DImode))
50879 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
50880 return ALL_SSE_REGS;
50881 return NO_REGS;
50884 /* Implement targetm.vectorize.init_cost. */
50886 static void *
50887 ix86_init_cost (struct loop *)
50889 unsigned *cost = XNEWVEC (unsigned, 3);
50890 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
50891 return cost;
50894 /* Implement targetm.vectorize.add_stmt_cost. */
50896 static unsigned
50897 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
50898 struct _stmt_vec_info *stmt_info, int misalign,
50899 enum vect_cost_model_location where)
50901 unsigned *cost = (unsigned *) data;
50902 unsigned retval = 0;
50904 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
50905 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
50907 /* Statements in an inner loop relative to the loop being
50908 vectorized are weighted more heavily. The value here is
50909 arbitrary and could potentially be improved with analysis. */
50910 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
50911 count *= 50; /* FIXME. */
50913 retval = (unsigned) (count * stmt_cost);
50915 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
50916 for Silvermont as it has out of order integer pipeline and can execute
50917 2 scalar instruction per tick, but has in order SIMD pipeline. */
50918 if (TARGET_SILVERMONT || TARGET_INTEL)
50919 if (stmt_info && stmt_info->stmt)
50921 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
50922 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
50923 retval = (retval * 17) / 10;
50926 cost[where] += retval;
50928 return retval;
50931 /* Implement targetm.vectorize.finish_cost. */
50933 static void
50934 ix86_finish_cost (void *data, unsigned *prologue_cost,
50935 unsigned *body_cost, unsigned *epilogue_cost)
50937 unsigned *cost = (unsigned *) data;
50938 *prologue_cost = cost[vect_prologue];
50939 *body_cost = cost[vect_body];
50940 *epilogue_cost = cost[vect_epilogue];
50943 /* Implement targetm.vectorize.destroy_cost_data. */
50945 static void
50946 ix86_destroy_cost_data (void *data)
50948 free (data);
50951 /* Validate target specific memory model bits in VAL. */
50953 static unsigned HOST_WIDE_INT
50954 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
50956 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
50957 bool strong;
50959 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
50960 |MEMMODEL_MASK)
50961 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
50963 warning (OPT_Winvalid_memory_model,
50964 "Unknown architecture specific memory model");
50965 return MEMMODEL_SEQ_CST;
50967 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
50968 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
50970 warning (OPT_Winvalid_memory_model,
50971 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
50972 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
50974 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
50976 warning (OPT_Winvalid_memory_model,
50977 "HLE_RELEASE not used with RELEASE or stronger memory model");
50978 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
50980 return val;
50983 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
50984 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
50985 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
50986 or number of vecsize_mangle variants that should be emitted. */
50988 static int
50989 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
50990 struct cgraph_simd_clone *clonei,
50991 tree base_type, int num)
50993 int ret = 1;
50995 if (clonei->simdlen
50996 && (clonei->simdlen < 2
50997 || clonei->simdlen > 16
50998 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
51000 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51001 "unsupported simdlen %d", clonei->simdlen);
51002 return 0;
51005 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
51006 if (TREE_CODE (ret_type) != VOID_TYPE)
51007 switch (TYPE_MODE (ret_type))
51009 case QImode:
51010 case HImode:
51011 case SImode:
51012 case DImode:
51013 case SFmode:
51014 case DFmode:
51015 /* case SCmode: */
51016 /* case DCmode: */
51017 break;
51018 default:
51019 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51020 "unsupported return type %qT for simd\n", ret_type);
51021 return 0;
51024 tree t;
51025 int i;
51027 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
51028 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
51029 switch (TYPE_MODE (TREE_TYPE (t)))
51031 case QImode:
51032 case HImode:
51033 case SImode:
51034 case DImode:
51035 case SFmode:
51036 case DFmode:
51037 /* case SCmode: */
51038 /* case DCmode: */
51039 break;
51040 default:
51041 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51042 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
51043 return 0;
51046 if (clonei->cilk_elemental)
51048 /* Parse here processor clause. If not present, default to 'b'. */
51049 clonei->vecsize_mangle = 'b';
51051 else if (!TREE_PUBLIC (node->decl))
51053 /* If the function isn't exported, we can pick up just one ISA
51054 for the clones. */
51055 if (TARGET_AVX2)
51056 clonei->vecsize_mangle = 'd';
51057 else if (TARGET_AVX)
51058 clonei->vecsize_mangle = 'c';
51059 else
51060 clonei->vecsize_mangle = 'b';
51061 ret = 1;
51063 else
51065 clonei->vecsize_mangle = "bcd"[num];
51066 ret = 3;
51068 switch (clonei->vecsize_mangle)
51070 case 'b':
51071 clonei->vecsize_int = 128;
51072 clonei->vecsize_float = 128;
51073 break;
51074 case 'c':
51075 clonei->vecsize_int = 128;
51076 clonei->vecsize_float = 256;
51077 break;
51078 case 'd':
51079 clonei->vecsize_int = 256;
51080 clonei->vecsize_float = 256;
51081 break;
51083 if (clonei->simdlen == 0)
51085 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
51086 clonei->simdlen = clonei->vecsize_int;
51087 else
51088 clonei->simdlen = clonei->vecsize_float;
51089 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
51090 if (clonei->simdlen > 16)
51091 clonei->simdlen = 16;
51093 return ret;
51096 /* Add target attribute to SIMD clone NODE if needed. */
51098 static void
51099 ix86_simd_clone_adjust (struct cgraph_node *node)
51101 const char *str = NULL;
51102 gcc_assert (node->decl == cfun->decl);
51103 switch (node->simdclone->vecsize_mangle)
51105 case 'b':
51106 if (!TARGET_SSE2)
51107 str = "sse2";
51108 break;
51109 case 'c':
51110 if (!TARGET_AVX)
51111 str = "avx";
51112 break;
51113 case 'd':
51114 if (!TARGET_AVX2)
51115 str = "avx2";
51116 break;
51117 default:
51118 gcc_unreachable ();
51120 if (str == NULL)
51121 return;
51122 push_cfun (NULL);
51123 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
51124 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
51125 gcc_assert (ok);
51126 pop_cfun ();
51127 ix86_reset_previous_fndecl ();
51128 ix86_set_current_function (node->decl);
51131 /* If SIMD clone NODE can't be used in a vectorized loop
51132 in current function, return -1, otherwise return a badness of using it
51133 (0 if it is most desirable from vecsize_mangle point of view, 1
51134 slightly less desirable, etc.). */
51136 static int
51137 ix86_simd_clone_usable (struct cgraph_node *node)
51139 switch (node->simdclone->vecsize_mangle)
51141 case 'b':
51142 if (!TARGET_SSE2)
51143 return -1;
51144 if (!TARGET_AVX)
51145 return 0;
51146 return TARGET_AVX2 ? 2 : 1;
51147 case 'c':
51148 if (!TARGET_AVX)
51149 return -1;
51150 return TARGET_AVX2 ? 1 : 0;
51151 break;
51152 case 'd':
51153 if (!TARGET_AVX2)
51154 return -1;
51155 return 0;
51156 default:
51157 gcc_unreachable ();
51161 /* This function adjusts the unroll factor based on
51162 the hardware capabilities. For ex, bdver3 has
51163 a loop buffer which makes unrolling of smaller
51164 loops less important. This function decides the
51165 unroll factor using number of memory references
51166 (value 32 is used) as a heuristic. */
51168 static unsigned
51169 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
51171 basic_block *bbs;
51172 rtx_insn *insn;
51173 unsigned i;
51174 unsigned mem_count = 0;
51176 if (!TARGET_ADJUST_UNROLL)
51177 return nunroll;
51179 /* Count the number of memory references within the loop body.
51180 This value determines the unrolling factor for bdver3 and bdver4
51181 architectures. */
51182 subrtx_iterator::array_type array;
51183 bbs = get_loop_body (loop);
51184 for (i = 0; i < loop->num_nodes; i++)
51185 FOR_BB_INSNS (bbs[i], insn)
51186 if (NONDEBUG_INSN_P (insn))
51187 FOR_EACH_SUBRTX (iter, array, insn, NONCONST)
51188 if (const_rtx x = *iter)
51189 if (MEM_P (x))
51191 machine_mode mode = GET_MODE (x);
51192 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
51193 if (n_words > 4)
51194 mem_count += 2;
51195 else
51196 mem_count += 1;
51198 free (bbs);
51200 if (mem_count && mem_count <=32)
51201 return 32/mem_count;
51203 return nunroll;
51207 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
51209 static bool
51210 ix86_float_exceptions_rounding_supported_p (void)
51212 /* For x87 floating point with standard excess precision handling,
51213 there is no adddf3 pattern (since x87 floating point only has
51214 XFmode operations) so the default hook implementation gets this
51215 wrong. */
51216 return TARGET_80387 || TARGET_SSE_MATH;
51219 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
51221 static void
51222 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
51224 if (!TARGET_80387 && !TARGET_SSE_MATH)
51225 return;
51226 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
51227 if (TARGET_80387)
51229 tree fenv_index_type = build_index_type (size_int (6));
51230 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
51231 tree fenv_var = create_tmp_var (fenv_type, NULL);
51232 mark_addressable (fenv_var);
51233 tree fenv_ptr = build_pointer_type (fenv_type);
51234 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
51235 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
51236 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
51237 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
51238 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
51239 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
51240 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
51241 tree hold_fnclex = build_call_expr (fnclex, 0);
51242 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
51243 hold_fnclex);
51244 *clear = build_call_expr (fnclex, 0);
51245 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
51246 tree fnstsw_call = build_call_expr (fnstsw, 0);
51247 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
51248 sw_var, fnstsw_call);
51249 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
51250 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
51251 exceptions_var, exceptions_x87);
51252 *update = build2 (COMPOUND_EXPR, integer_type_node,
51253 sw_mod, update_mod);
51254 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
51255 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
51257 if (TARGET_SSE_MATH)
51259 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
51260 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
51261 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
51262 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
51263 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
51264 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
51265 mxcsr_orig_var, stmxcsr_hold_call);
51266 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
51267 mxcsr_orig_var,
51268 build_int_cst (unsigned_type_node, 0x1f80));
51269 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
51270 build_int_cst (unsigned_type_node, 0xffffffc0));
51271 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
51272 mxcsr_mod_var, hold_mod_val);
51273 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51274 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
51275 hold_assign_orig, hold_assign_mod);
51276 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
51277 ldmxcsr_hold_call);
51278 if (*hold)
51279 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
51280 else
51281 *hold = hold_all;
51282 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51283 if (*clear)
51284 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
51285 ldmxcsr_clear_call);
51286 else
51287 *clear = ldmxcsr_clear_call;
51288 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
51289 tree exceptions_sse = fold_convert (integer_type_node,
51290 stxmcsr_update_call);
51291 if (*update)
51293 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
51294 exceptions_var, exceptions_sse);
51295 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
51296 exceptions_var, exceptions_mod);
51297 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
51298 exceptions_assign);
51300 else
51301 *update = build2 (MODIFY_EXPR, integer_type_node,
51302 exceptions_var, exceptions_sse);
51303 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
51304 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51305 ldmxcsr_update_call);
51307 tree atomic_feraiseexcept
51308 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
51309 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
51310 1, exceptions_var);
51311 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51312 atomic_feraiseexcept_call);
51315 /* Return mode to be used for bounds or VOIDmode
51316 if bounds are not supported. */
51318 static enum machine_mode
51319 ix86_mpx_bound_mode ()
51321 /* Do not support pointer checker if MPX
51322 is not enabled. */
51323 if (!TARGET_MPX)
51325 if (flag_check_pointer_bounds)
51326 warning (0, "Pointer Checker requires MPX support on this target."
51327 " Use -mmpx options to enable MPX.");
51328 return VOIDmode;
51331 return BNDmode;
51334 /* Return constant used to statically initialize constant bounds.
51336 This function is used to create special bound values. For now
51337 only INIT bounds and NONE bounds are expected. More special
51338 values may be added later. */
51340 static tree
51341 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
51343 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
51344 : build_zero_cst (pointer_sized_int_node);
51345 tree high = ub ? build_zero_cst (pointer_sized_int_node)
51346 : build_minus_one_cst (pointer_sized_int_node);
51348 /* This function is supposed to be used to create INIT and
51349 NONE bounds only. */
51350 gcc_assert ((lb == 0 && ub == -1)
51351 || (lb == -1 && ub == 0));
51353 return build_complex (NULL, low, high);
51356 /* Generate a list of statements STMTS to initialize pointer bounds
51357 variable VAR with bounds LB and UB. Return the number of generated
51358 statements. */
51360 static int
51361 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
51363 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
51364 tree lhs, modify, var_p;
51366 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
51367 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
51369 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
51370 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
51371 append_to_statement_list (modify, stmts);
51373 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
51374 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
51375 TYPE_SIZE_UNIT (pointer_sized_int_node)));
51376 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
51377 append_to_statement_list (modify, stmts);
51379 return 2;
51382 /* Initialize the GCC target structure. */
51383 #undef TARGET_RETURN_IN_MEMORY
51384 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
51386 #undef TARGET_LEGITIMIZE_ADDRESS
51387 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
51389 #undef TARGET_ATTRIBUTE_TABLE
51390 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
51391 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
51392 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
51393 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51394 # undef TARGET_MERGE_DECL_ATTRIBUTES
51395 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
51396 #endif
51398 #undef TARGET_COMP_TYPE_ATTRIBUTES
51399 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
51401 #undef TARGET_INIT_BUILTINS
51402 #define TARGET_INIT_BUILTINS ix86_init_builtins
51403 #undef TARGET_BUILTIN_DECL
51404 #define TARGET_BUILTIN_DECL ix86_builtin_decl
51405 #undef TARGET_EXPAND_BUILTIN
51406 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
51408 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
51409 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
51410 ix86_builtin_vectorized_function
51412 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
51413 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
51415 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
51416 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
51418 #undef TARGET_VECTORIZE_BUILTIN_GATHER
51419 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
51421 #undef TARGET_BUILTIN_RECIPROCAL
51422 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
51424 #undef TARGET_ASM_FUNCTION_EPILOGUE
51425 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
51427 #undef TARGET_ENCODE_SECTION_INFO
51428 #ifndef SUBTARGET_ENCODE_SECTION_INFO
51429 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
51430 #else
51431 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
51432 #endif
51434 #undef TARGET_ASM_OPEN_PAREN
51435 #define TARGET_ASM_OPEN_PAREN ""
51436 #undef TARGET_ASM_CLOSE_PAREN
51437 #define TARGET_ASM_CLOSE_PAREN ""
51439 #undef TARGET_ASM_BYTE_OP
51440 #define TARGET_ASM_BYTE_OP ASM_BYTE
51442 #undef TARGET_ASM_ALIGNED_HI_OP
51443 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
51444 #undef TARGET_ASM_ALIGNED_SI_OP
51445 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
51446 #ifdef ASM_QUAD
51447 #undef TARGET_ASM_ALIGNED_DI_OP
51448 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
51449 #endif
51451 #undef TARGET_PROFILE_BEFORE_PROLOGUE
51452 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
51454 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
51455 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
51457 #undef TARGET_ASM_UNALIGNED_HI_OP
51458 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
51459 #undef TARGET_ASM_UNALIGNED_SI_OP
51460 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
51461 #undef TARGET_ASM_UNALIGNED_DI_OP
51462 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
51464 #undef TARGET_PRINT_OPERAND
51465 #define TARGET_PRINT_OPERAND ix86_print_operand
51466 #undef TARGET_PRINT_OPERAND_ADDRESS
51467 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
51468 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
51469 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
51470 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
51471 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
51473 #undef TARGET_SCHED_INIT_GLOBAL
51474 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
51475 #undef TARGET_SCHED_ADJUST_COST
51476 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
51477 #undef TARGET_SCHED_ISSUE_RATE
51478 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
51479 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
51480 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
51481 ia32_multipass_dfa_lookahead
51482 #undef TARGET_SCHED_MACRO_FUSION_P
51483 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
51484 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
51485 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
51487 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
51488 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
51490 #undef TARGET_MEMMODEL_CHECK
51491 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
51493 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
51494 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
51496 #ifdef HAVE_AS_TLS
51497 #undef TARGET_HAVE_TLS
51498 #define TARGET_HAVE_TLS true
51499 #endif
51500 #undef TARGET_CANNOT_FORCE_CONST_MEM
51501 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
51502 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
51503 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
51505 #undef TARGET_DELEGITIMIZE_ADDRESS
51506 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
51508 #undef TARGET_MS_BITFIELD_LAYOUT_P
51509 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
51511 #if TARGET_MACHO
51512 #undef TARGET_BINDS_LOCAL_P
51513 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
51514 #endif
51515 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51516 #undef TARGET_BINDS_LOCAL_P
51517 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
51518 #endif
51520 #undef TARGET_ASM_OUTPUT_MI_THUNK
51521 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
51522 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
51523 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
51525 #undef TARGET_ASM_FILE_START
51526 #define TARGET_ASM_FILE_START x86_file_start
51528 #undef TARGET_OPTION_OVERRIDE
51529 #define TARGET_OPTION_OVERRIDE ix86_option_override
51531 #undef TARGET_REGISTER_MOVE_COST
51532 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
51533 #undef TARGET_MEMORY_MOVE_COST
51534 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
51535 #undef TARGET_RTX_COSTS
51536 #define TARGET_RTX_COSTS ix86_rtx_costs
51537 #undef TARGET_ADDRESS_COST
51538 #define TARGET_ADDRESS_COST ix86_address_cost
51540 #undef TARGET_FIXED_CONDITION_CODE_REGS
51541 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
51542 #undef TARGET_CC_MODES_COMPATIBLE
51543 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
51545 #undef TARGET_MACHINE_DEPENDENT_REORG
51546 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
51548 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
51549 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
51551 #undef TARGET_BUILD_BUILTIN_VA_LIST
51552 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
51554 #undef TARGET_FOLD_BUILTIN
51555 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
51557 #undef TARGET_COMPARE_VERSION_PRIORITY
51558 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
51560 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
51561 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
51562 ix86_generate_version_dispatcher_body
51564 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
51565 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
51566 ix86_get_function_versions_dispatcher
51568 #undef TARGET_ENUM_VA_LIST_P
51569 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
51571 #undef TARGET_FN_ABI_VA_LIST
51572 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
51574 #undef TARGET_CANONICAL_VA_LIST_TYPE
51575 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
51577 #undef TARGET_EXPAND_BUILTIN_VA_START
51578 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
51580 #undef TARGET_MD_ASM_CLOBBERS
51581 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
51583 #undef TARGET_PROMOTE_PROTOTYPES
51584 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
51585 #undef TARGET_SETUP_INCOMING_VARARGS
51586 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
51587 #undef TARGET_MUST_PASS_IN_STACK
51588 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
51589 #undef TARGET_FUNCTION_ARG_ADVANCE
51590 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
51591 #undef TARGET_FUNCTION_ARG
51592 #define TARGET_FUNCTION_ARG ix86_function_arg
51593 #undef TARGET_INIT_PIC_REG
51594 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
51595 #undef TARGET_USE_PSEUDO_PIC_REG
51596 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
51597 #undef TARGET_FUNCTION_ARG_BOUNDARY
51598 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
51599 #undef TARGET_PASS_BY_REFERENCE
51600 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
51601 #undef TARGET_INTERNAL_ARG_POINTER
51602 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
51603 #undef TARGET_UPDATE_STACK_BOUNDARY
51604 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
51605 #undef TARGET_GET_DRAP_RTX
51606 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
51607 #undef TARGET_STRICT_ARGUMENT_NAMING
51608 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
51609 #undef TARGET_STATIC_CHAIN
51610 #define TARGET_STATIC_CHAIN ix86_static_chain
51611 #undef TARGET_TRAMPOLINE_INIT
51612 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
51613 #undef TARGET_RETURN_POPS_ARGS
51614 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
51616 #undef TARGET_LEGITIMATE_COMBINED_INSN
51617 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
51619 #undef TARGET_ASAN_SHADOW_OFFSET
51620 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
51622 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
51623 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
51625 #undef TARGET_SCALAR_MODE_SUPPORTED_P
51626 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
51628 #undef TARGET_VECTOR_MODE_SUPPORTED_P
51629 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
51631 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
51632 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
51633 ix86_libgcc_floating_mode_supported_p
51635 #undef TARGET_C_MODE_FOR_SUFFIX
51636 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
51638 #ifdef HAVE_AS_TLS
51639 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
51640 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
51641 #endif
51643 #ifdef SUBTARGET_INSERT_ATTRIBUTES
51644 #undef TARGET_INSERT_ATTRIBUTES
51645 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
51646 #endif
51648 #undef TARGET_MANGLE_TYPE
51649 #define TARGET_MANGLE_TYPE ix86_mangle_type
51651 #if !TARGET_MACHO
51652 #undef TARGET_STACK_PROTECT_FAIL
51653 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
51654 #endif
51656 #undef TARGET_FUNCTION_VALUE
51657 #define TARGET_FUNCTION_VALUE ix86_function_value
51659 #undef TARGET_FUNCTION_VALUE_REGNO_P
51660 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
51662 #undef TARGET_PROMOTE_FUNCTION_MODE
51663 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
51665 #undef TARGET_MEMBER_TYPE_FORCES_BLK
51666 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
51668 #undef TARGET_INSTANTIATE_DECLS
51669 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
51671 #undef TARGET_SECONDARY_RELOAD
51672 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
51674 #undef TARGET_CLASS_MAX_NREGS
51675 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
51677 #undef TARGET_PREFERRED_RELOAD_CLASS
51678 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
51679 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
51680 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
51681 #undef TARGET_CLASS_LIKELY_SPILLED_P
51682 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
51684 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
51685 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
51686 ix86_builtin_vectorization_cost
51687 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
51688 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
51689 ix86_vectorize_vec_perm_const_ok
51690 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
51691 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
51692 ix86_preferred_simd_mode
51693 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
51694 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
51695 ix86_autovectorize_vector_sizes
51696 #undef TARGET_VECTORIZE_INIT_COST
51697 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
51698 #undef TARGET_VECTORIZE_ADD_STMT_COST
51699 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
51700 #undef TARGET_VECTORIZE_FINISH_COST
51701 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
51702 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
51703 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
51705 #undef TARGET_SET_CURRENT_FUNCTION
51706 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
51708 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
51709 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
51711 #undef TARGET_OPTION_SAVE
51712 #define TARGET_OPTION_SAVE ix86_function_specific_save
51714 #undef TARGET_OPTION_RESTORE
51715 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
51717 #undef TARGET_OPTION_PRINT
51718 #define TARGET_OPTION_PRINT ix86_function_specific_print
51720 #undef TARGET_OPTION_FUNCTION_VERSIONS
51721 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
51723 #undef TARGET_CAN_INLINE_P
51724 #define TARGET_CAN_INLINE_P ix86_can_inline_p
51726 #undef TARGET_EXPAND_TO_RTL_HOOK
51727 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
51729 #undef TARGET_LEGITIMATE_ADDRESS_P
51730 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
51732 #undef TARGET_LRA_P
51733 #define TARGET_LRA_P hook_bool_void_true
51735 #undef TARGET_REGISTER_PRIORITY
51736 #define TARGET_REGISTER_PRIORITY ix86_register_priority
51738 #undef TARGET_REGISTER_USAGE_LEVELING_P
51739 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
51741 #undef TARGET_LEGITIMATE_CONSTANT_P
51742 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
51744 #undef TARGET_FRAME_POINTER_REQUIRED
51745 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
51747 #undef TARGET_CAN_ELIMINATE
51748 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
51750 #undef TARGET_EXTRA_LIVE_ON_ENTRY
51751 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
51753 #undef TARGET_ASM_CODE_END
51754 #define TARGET_ASM_CODE_END ix86_code_end
51756 #undef TARGET_CONDITIONAL_REGISTER_USAGE
51757 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
51759 #if TARGET_MACHO
51760 #undef TARGET_INIT_LIBFUNCS
51761 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
51762 #endif
51764 #undef TARGET_LOOP_UNROLL_ADJUST
51765 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
51767 #undef TARGET_SPILL_CLASS
51768 #define TARGET_SPILL_CLASS ix86_spill_class
51770 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
51771 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
51772 ix86_simd_clone_compute_vecsize_and_simdlen
51774 #undef TARGET_SIMD_CLONE_ADJUST
51775 #define TARGET_SIMD_CLONE_ADJUST \
51776 ix86_simd_clone_adjust
51778 #undef TARGET_SIMD_CLONE_USABLE
51779 #define TARGET_SIMD_CLONE_USABLE \
51780 ix86_simd_clone_usable
51782 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
51783 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
51784 ix86_float_exceptions_rounding_supported_p
51786 #undef TARGET_MODE_EMIT
51787 #define TARGET_MODE_EMIT ix86_emit_mode_set
51789 #undef TARGET_MODE_NEEDED
51790 #define TARGET_MODE_NEEDED ix86_mode_needed
51792 #undef TARGET_MODE_AFTER
51793 #define TARGET_MODE_AFTER ix86_mode_after
51795 #undef TARGET_MODE_ENTRY
51796 #define TARGET_MODE_ENTRY ix86_mode_entry
51798 #undef TARGET_MODE_EXIT
51799 #define TARGET_MODE_EXIT ix86_mode_exit
51801 #undef TARGET_MODE_PRIORITY
51802 #define TARGET_MODE_PRIORITY ix86_mode_priority
51804 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
51805 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
51807 #undef TARGET_LOAD_BOUNDS_FOR_ARG
51808 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
51810 #undef TARGET_STORE_BOUNDS_FOR_ARG
51811 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
51813 #undef TARGET_LOAD_RETURNED_BOUNDS
51814 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
51816 #undef TARGET_STORE_RETURNED_BOUNDS
51817 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
51819 #undef TARGET_CHKP_BOUND_MODE
51820 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
51822 #undef TARGET_BUILTIN_CHKP_FUNCTION
51823 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
51825 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
51826 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
51828 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
51829 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
51831 #undef TARGET_CHKP_INITIALIZE_BOUNDS
51832 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
51834 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
51835 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
51837 #undef TARGET_OFFLOAD_OPTIONS
51838 #define TARGET_OFFLOAD_OPTIONS \
51839 ix86_offload_options
51841 struct gcc_target targetm = TARGET_INITIALIZER;
51843 #include "gt-i386.h"