2014-12-19 Andrew MacLeod <amacleod@redhat.com>
[official-gcc.git] / gcc / config / i386 / i386.c
blob32762759dc311e1618d28229ab56c5b9349dae88
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "hashtab.h"
42 #include "hash-set.h"
43 #include "vec.h"
44 #include "machmode.h"
45 #include "input.h"
46 #include "function.h"
47 #include "recog.h"
48 #include "expr.h"
49 #include "optabs.h"
50 #include "diagnostic-core.h"
51 #include "toplev.h"
52 #include "predict.h"
53 #include "dominance.h"
54 #include "cfg.h"
55 #include "cfgrtl.h"
56 #include "cfganal.h"
57 #include "lcm.h"
58 #include "cfgbuild.h"
59 #include "cfgcleanup.h"
60 #include "basic-block.h"
61 #include "ggc.h"
62 #include "target.h"
63 #include "target-def.h"
64 #include "common/common-target.h"
65 #include "langhooks.h"
66 #include "reload.h"
67 #include "hash-map.h"
68 #include "is-a.h"
69 #include "plugin-api.h"
70 #include "ipa-ref.h"
71 #include "cgraph.h"
72 #include "hash-table.h"
73 #include "tree-ssa-alias.h"
74 #include "internal-fn.h"
75 #include "gimple-fold.h"
76 #include "tree-eh.h"
77 #include "gimple-expr.h"
78 #include "gimple.h"
79 #include "gimplify.h"
80 #include "cfgloop.h"
81 #include "dwarf2.h"
82 #include "df.h"
83 #include "tm-constrs.h"
84 #include "params.h"
85 #include "cselib.h"
86 #include "debug.h"
87 #include "sched-int.h"
88 #include "sbitmap.h"
89 #include "fibheap.h"
90 #include "opts.h"
91 #include "diagnostic.h"
92 #include "dumpfile.h"
93 #include "tree-pass.h"
94 #include "wide-int.h"
95 #include "context.h"
96 #include "pass_manager.h"
97 #include "target-globals.h"
98 #include "tree-vectorizer.h"
99 #include "shrink-wrap.h"
100 #include "builtins.h"
101 #include "rtl-iter.h"
102 #include "tree-iterator.h"
103 #include "tree-chkp.h"
104 #include "rtl-chkp.h"
106 static rtx legitimize_dllimport_symbol (rtx, bool);
107 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
108 static rtx legitimize_pe_coff_symbol (rtx, bool);
110 #ifndef CHECK_STACK_LIMIT
111 #define CHECK_STACK_LIMIT (-1)
112 #endif
114 /* Return index of given mode in mult and division cost tables. */
115 #define MODE_INDEX(mode) \
116 ((mode) == QImode ? 0 \
117 : (mode) == HImode ? 1 \
118 : (mode) == SImode ? 2 \
119 : (mode) == DImode ? 3 \
120 : 4)
122 /* Processor costs (relative to an add) */
123 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
124 #define COSTS_N_BYTES(N) ((N) * 2)
126 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
128 static stringop_algs ix86_size_memcpy[2] = {
129 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
130 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
131 static stringop_algs ix86_size_memset[2] = {
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
135 const
136 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
137 COSTS_N_BYTES (2), /* cost of an add instruction */
138 COSTS_N_BYTES (3), /* cost of a lea instruction */
139 COSTS_N_BYTES (2), /* variable shift costs */
140 COSTS_N_BYTES (3), /* constant shift costs */
141 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
142 COSTS_N_BYTES (3), /* HI */
143 COSTS_N_BYTES (3), /* SI */
144 COSTS_N_BYTES (3), /* DI */
145 COSTS_N_BYTES (5)}, /* other */
146 0, /* cost of multiply per each bit set */
147 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
148 COSTS_N_BYTES (3), /* HI */
149 COSTS_N_BYTES (3), /* SI */
150 COSTS_N_BYTES (3), /* DI */
151 COSTS_N_BYTES (5)}, /* other */
152 COSTS_N_BYTES (3), /* cost of movsx */
153 COSTS_N_BYTES (3), /* cost of movzx */
154 0, /* "large" insn */
155 2, /* MOVE_RATIO */
156 2, /* cost for loading QImode using movzbl */
157 {2, 2, 2}, /* cost of loading integer registers
158 in QImode, HImode and SImode.
159 Relative to reg-reg move (2). */
160 {2, 2, 2}, /* cost of storing integer registers */
161 2, /* cost of reg,reg fld/fst */
162 {2, 2, 2}, /* cost of loading fp registers
163 in SFmode, DFmode and XFmode */
164 {2, 2, 2}, /* cost of storing fp registers
165 in SFmode, DFmode and XFmode */
166 3, /* cost of moving MMX register */
167 {3, 3}, /* cost of loading MMX registers
168 in SImode and DImode */
169 {3, 3}, /* cost of storing MMX registers
170 in SImode and DImode */
171 3, /* cost of moving SSE register */
172 {3, 3, 3}, /* cost of loading SSE registers
173 in SImode, DImode and TImode */
174 {3, 3, 3}, /* cost of storing SSE registers
175 in SImode, DImode and TImode */
176 3, /* MMX or SSE register to integer */
177 0, /* size of l1 cache */
178 0, /* size of l2 cache */
179 0, /* size of prefetch block */
180 0, /* number of parallel prefetches */
181 2, /* Branch cost */
182 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
183 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
184 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
185 COSTS_N_BYTES (2), /* cost of FABS instruction. */
186 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
187 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
188 ix86_size_memcpy,
189 ix86_size_memset,
190 1, /* scalar_stmt_cost. */
191 1, /* scalar load_cost. */
192 1, /* scalar_store_cost. */
193 1, /* vec_stmt_cost. */
194 1, /* vec_to_scalar_cost. */
195 1, /* scalar_to_vec_cost. */
196 1, /* vec_align_load_cost. */
197 1, /* vec_unalign_load_cost. */
198 1, /* vec_store_cost. */
199 1, /* cond_taken_branch_cost. */
200 1, /* cond_not_taken_branch_cost. */
203 /* Processor costs (relative to an add) */
204 static stringop_algs i386_memcpy[2] = {
205 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
206 DUMMY_STRINGOP_ALGS};
207 static stringop_algs i386_memset[2] = {
208 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
209 DUMMY_STRINGOP_ALGS};
211 static const
212 struct processor_costs i386_cost = { /* 386 specific costs */
213 COSTS_N_INSNS (1), /* cost of an add instruction */
214 COSTS_N_INSNS (1), /* cost of a lea instruction */
215 COSTS_N_INSNS (3), /* variable shift costs */
216 COSTS_N_INSNS (2), /* constant shift costs */
217 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
218 COSTS_N_INSNS (6), /* HI */
219 COSTS_N_INSNS (6), /* SI */
220 COSTS_N_INSNS (6), /* DI */
221 COSTS_N_INSNS (6)}, /* other */
222 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
223 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
224 COSTS_N_INSNS (23), /* HI */
225 COSTS_N_INSNS (23), /* SI */
226 COSTS_N_INSNS (23), /* DI */
227 COSTS_N_INSNS (23)}, /* other */
228 COSTS_N_INSNS (3), /* cost of movsx */
229 COSTS_N_INSNS (2), /* cost of movzx */
230 15, /* "large" insn */
231 3, /* MOVE_RATIO */
232 4, /* cost for loading QImode using movzbl */
233 {2, 4, 2}, /* cost of loading integer registers
234 in QImode, HImode and SImode.
235 Relative to reg-reg move (2). */
236 {2, 4, 2}, /* cost of storing integer registers */
237 2, /* cost of reg,reg fld/fst */
238 {8, 8, 8}, /* cost of loading fp registers
239 in SFmode, DFmode and XFmode */
240 {8, 8, 8}, /* cost of storing fp registers
241 in SFmode, DFmode and XFmode */
242 2, /* cost of moving MMX register */
243 {4, 8}, /* cost of loading MMX registers
244 in SImode and DImode */
245 {4, 8}, /* cost of storing MMX registers
246 in SImode and DImode */
247 2, /* cost of moving SSE register */
248 {4, 8, 16}, /* cost of loading SSE registers
249 in SImode, DImode and TImode */
250 {4, 8, 16}, /* cost of storing SSE registers
251 in SImode, DImode and TImode */
252 3, /* MMX or SSE register to integer */
253 0, /* size of l1 cache */
254 0, /* size of l2 cache */
255 0, /* size of prefetch block */
256 0, /* number of parallel prefetches */
257 1, /* Branch cost */
258 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
259 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
260 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
261 COSTS_N_INSNS (22), /* cost of FABS instruction. */
262 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
263 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
264 i386_memcpy,
265 i386_memset,
266 1, /* scalar_stmt_cost. */
267 1, /* scalar load_cost. */
268 1, /* scalar_store_cost. */
269 1, /* vec_stmt_cost. */
270 1, /* vec_to_scalar_cost. */
271 1, /* scalar_to_vec_cost. */
272 1, /* vec_align_load_cost. */
273 2, /* vec_unalign_load_cost. */
274 1, /* vec_store_cost. */
275 3, /* cond_taken_branch_cost. */
276 1, /* cond_not_taken_branch_cost. */
279 static stringop_algs i486_memcpy[2] = {
280 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
281 DUMMY_STRINGOP_ALGS};
282 static stringop_algs i486_memset[2] = {
283 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
284 DUMMY_STRINGOP_ALGS};
286 static const
287 struct processor_costs i486_cost = { /* 486 specific costs */
288 COSTS_N_INSNS (1), /* cost of an add instruction */
289 COSTS_N_INSNS (1), /* cost of a lea instruction */
290 COSTS_N_INSNS (3), /* variable shift costs */
291 COSTS_N_INSNS (2), /* constant shift costs */
292 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
293 COSTS_N_INSNS (12), /* HI */
294 COSTS_N_INSNS (12), /* SI */
295 COSTS_N_INSNS (12), /* DI */
296 COSTS_N_INSNS (12)}, /* other */
297 1, /* cost of multiply per each bit set */
298 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
299 COSTS_N_INSNS (40), /* HI */
300 COSTS_N_INSNS (40), /* SI */
301 COSTS_N_INSNS (40), /* DI */
302 COSTS_N_INSNS (40)}, /* other */
303 COSTS_N_INSNS (3), /* cost of movsx */
304 COSTS_N_INSNS (2), /* cost of movzx */
305 15, /* "large" insn */
306 3, /* MOVE_RATIO */
307 4, /* cost for loading QImode using movzbl */
308 {2, 4, 2}, /* cost of loading integer registers
309 in QImode, HImode and SImode.
310 Relative to reg-reg move (2). */
311 {2, 4, 2}, /* cost of storing integer registers */
312 2, /* cost of reg,reg fld/fst */
313 {8, 8, 8}, /* cost of loading fp registers
314 in SFmode, DFmode and XFmode */
315 {8, 8, 8}, /* cost of storing fp registers
316 in SFmode, DFmode and XFmode */
317 2, /* cost of moving MMX register */
318 {4, 8}, /* cost of loading MMX registers
319 in SImode and DImode */
320 {4, 8}, /* cost of storing MMX registers
321 in SImode and DImode */
322 2, /* cost of moving SSE register */
323 {4, 8, 16}, /* cost of loading SSE registers
324 in SImode, DImode and TImode */
325 {4, 8, 16}, /* cost of storing SSE registers
326 in SImode, DImode and TImode */
327 3, /* MMX or SSE register to integer */
328 4, /* size of l1 cache. 486 has 8kB cache
329 shared for code and data, so 4kB is
330 not really precise. */
331 4, /* size of l2 cache */
332 0, /* size of prefetch block */
333 0, /* number of parallel prefetches */
334 1, /* Branch cost */
335 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
336 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
337 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
338 COSTS_N_INSNS (3), /* cost of FABS instruction. */
339 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
340 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
341 i486_memcpy,
342 i486_memset,
343 1, /* scalar_stmt_cost. */
344 1, /* scalar load_cost. */
345 1, /* scalar_store_cost. */
346 1, /* vec_stmt_cost. */
347 1, /* vec_to_scalar_cost. */
348 1, /* scalar_to_vec_cost. */
349 1, /* vec_align_load_cost. */
350 2, /* vec_unalign_load_cost. */
351 1, /* vec_store_cost. */
352 3, /* cond_taken_branch_cost. */
353 1, /* cond_not_taken_branch_cost. */
356 static stringop_algs pentium_memcpy[2] = {
357 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
358 DUMMY_STRINGOP_ALGS};
359 static stringop_algs pentium_memset[2] = {
360 {libcall, {{-1, rep_prefix_4_byte, false}}},
361 DUMMY_STRINGOP_ALGS};
363 static const
364 struct processor_costs pentium_cost = {
365 COSTS_N_INSNS (1), /* cost of an add instruction */
366 COSTS_N_INSNS (1), /* cost of a lea instruction */
367 COSTS_N_INSNS (4), /* variable shift costs */
368 COSTS_N_INSNS (1), /* constant shift costs */
369 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
370 COSTS_N_INSNS (11), /* HI */
371 COSTS_N_INSNS (11), /* SI */
372 COSTS_N_INSNS (11), /* DI */
373 COSTS_N_INSNS (11)}, /* other */
374 0, /* cost of multiply per each bit set */
375 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
376 COSTS_N_INSNS (25), /* HI */
377 COSTS_N_INSNS (25), /* SI */
378 COSTS_N_INSNS (25), /* DI */
379 COSTS_N_INSNS (25)}, /* other */
380 COSTS_N_INSNS (3), /* cost of movsx */
381 COSTS_N_INSNS (2), /* cost of movzx */
382 8, /* "large" insn */
383 6, /* MOVE_RATIO */
384 6, /* cost for loading QImode using movzbl */
385 {2, 4, 2}, /* cost of loading integer registers
386 in QImode, HImode and SImode.
387 Relative to reg-reg move (2). */
388 {2, 4, 2}, /* cost of storing integer registers */
389 2, /* cost of reg,reg fld/fst */
390 {2, 2, 6}, /* cost of loading fp registers
391 in SFmode, DFmode and XFmode */
392 {4, 4, 6}, /* cost of storing fp registers
393 in SFmode, DFmode and XFmode */
394 8, /* cost of moving MMX register */
395 {8, 8}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {8, 8}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 8, 16}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 8, 16}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 3, /* MMX or SSE register to integer */
405 8, /* size of l1 cache. */
406 8, /* size of l2 cache */
407 0, /* size of prefetch block */
408 0, /* number of parallel prefetches */
409 2, /* Branch cost */
410 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
411 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
412 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
413 COSTS_N_INSNS (1), /* cost of FABS instruction. */
414 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
415 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
416 pentium_memcpy,
417 pentium_memset,
418 1, /* scalar_stmt_cost. */
419 1, /* scalar load_cost. */
420 1, /* scalar_store_cost. */
421 1, /* vec_stmt_cost. */
422 1, /* vec_to_scalar_cost. */
423 1, /* scalar_to_vec_cost. */
424 1, /* vec_align_load_cost. */
425 2, /* vec_unalign_load_cost. */
426 1, /* vec_store_cost. */
427 3, /* cond_taken_branch_cost. */
428 1, /* cond_not_taken_branch_cost. */
431 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
432 (we ensure the alignment). For small blocks inline loop is still a
433 noticeable win, for bigger blocks either rep movsl or rep movsb is
434 way to go. Rep movsb has apparently more expensive startup time in CPU,
435 but after 4K the difference is down in the noise. */
436 static stringop_algs pentiumpro_memcpy[2] = {
437 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
438 {8192, rep_prefix_4_byte, false},
439 {-1, rep_prefix_1_byte, false}}},
440 DUMMY_STRINGOP_ALGS};
441 static stringop_algs pentiumpro_memset[2] = {
442 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
443 {8192, rep_prefix_4_byte, false},
444 {-1, libcall, false}}},
445 DUMMY_STRINGOP_ALGS};
446 static const
447 struct processor_costs pentiumpro_cost = {
448 COSTS_N_INSNS (1), /* cost of an add instruction */
449 COSTS_N_INSNS (1), /* cost of a lea instruction */
450 COSTS_N_INSNS (1), /* variable shift costs */
451 COSTS_N_INSNS (1), /* constant shift costs */
452 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
453 COSTS_N_INSNS (4), /* HI */
454 COSTS_N_INSNS (4), /* SI */
455 COSTS_N_INSNS (4), /* DI */
456 COSTS_N_INSNS (4)}, /* other */
457 0, /* cost of multiply per each bit set */
458 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
459 COSTS_N_INSNS (17), /* HI */
460 COSTS_N_INSNS (17), /* SI */
461 COSTS_N_INSNS (17), /* DI */
462 COSTS_N_INSNS (17)}, /* other */
463 COSTS_N_INSNS (1), /* cost of movsx */
464 COSTS_N_INSNS (1), /* cost of movzx */
465 8, /* "large" insn */
466 6, /* MOVE_RATIO */
467 2, /* cost for loading QImode using movzbl */
468 {4, 4, 4}, /* cost of loading integer registers
469 in QImode, HImode and SImode.
470 Relative to reg-reg move (2). */
471 {2, 2, 2}, /* cost of storing integer registers */
472 2, /* cost of reg,reg fld/fst */
473 {2, 2, 6}, /* cost of loading fp registers
474 in SFmode, DFmode and XFmode */
475 {4, 4, 6}, /* cost of storing fp registers
476 in SFmode, DFmode and XFmode */
477 2, /* cost of moving MMX register */
478 {2, 2}, /* cost of loading MMX registers
479 in SImode and DImode */
480 {2, 2}, /* cost of storing MMX registers
481 in SImode and DImode */
482 2, /* cost of moving SSE register */
483 {2, 2, 8}, /* cost of loading SSE registers
484 in SImode, DImode and TImode */
485 {2, 2, 8}, /* cost of storing SSE registers
486 in SImode, DImode and TImode */
487 3, /* MMX or SSE register to integer */
488 8, /* size of l1 cache. */
489 256, /* size of l2 cache */
490 32, /* size of prefetch block */
491 6, /* number of parallel prefetches */
492 2, /* Branch cost */
493 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
494 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
495 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
496 COSTS_N_INSNS (2), /* cost of FABS instruction. */
497 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
498 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
499 pentiumpro_memcpy,
500 pentiumpro_memset,
501 1, /* scalar_stmt_cost. */
502 1, /* scalar load_cost. */
503 1, /* scalar_store_cost. */
504 1, /* vec_stmt_cost. */
505 1, /* vec_to_scalar_cost. */
506 1, /* scalar_to_vec_cost. */
507 1, /* vec_align_load_cost. */
508 2, /* vec_unalign_load_cost. */
509 1, /* vec_store_cost. */
510 3, /* cond_taken_branch_cost. */
511 1, /* cond_not_taken_branch_cost. */
514 static stringop_algs geode_memcpy[2] = {
515 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
516 DUMMY_STRINGOP_ALGS};
517 static stringop_algs geode_memset[2] = {
518 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
519 DUMMY_STRINGOP_ALGS};
520 static const
521 struct processor_costs geode_cost = {
522 COSTS_N_INSNS (1), /* cost of an add instruction */
523 COSTS_N_INSNS (1), /* cost of a lea instruction */
524 COSTS_N_INSNS (2), /* variable shift costs */
525 COSTS_N_INSNS (1), /* constant shift costs */
526 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
527 COSTS_N_INSNS (4), /* HI */
528 COSTS_N_INSNS (7), /* SI */
529 COSTS_N_INSNS (7), /* DI */
530 COSTS_N_INSNS (7)}, /* other */
531 0, /* cost of multiply per each bit set */
532 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
533 COSTS_N_INSNS (23), /* HI */
534 COSTS_N_INSNS (39), /* SI */
535 COSTS_N_INSNS (39), /* DI */
536 COSTS_N_INSNS (39)}, /* other */
537 COSTS_N_INSNS (1), /* cost of movsx */
538 COSTS_N_INSNS (1), /* cost of movzx */
539 8, /* "large" insn */
540 4, /* MOVE_RATIO */
541 1, /* cost for loading QImode using movzbl */
542 {1, 1, 1}, /* cost of loading integer registers
543 in QImode, HImode and SImode.
544 Relative to reg-reg move (2). */
545 {1, 1, 1}, /* cost of storing integer registers */
546 1, /* cost of reg,reg fld/fst */
547 {1, 1, 1}, /* cost of loading fp registers
548 in SFmode, DFmode and XFmode */
549 {4, 6, 6}, /* cost of storing fp registers
550 in SFmode, DFmode and XFmode */
552 1, /* cost of moving MMX register */
553 {1, 1}, /* cost of loading MMX registers
554 in SImode and DImode */
555 {1, 1}, /* cost of storing MMX registers
556 in SImode and DImode */
557 1, /* cost of moving SSE register */
558 {1, 1, 1}, /* cost of loading SSE registers
559 in SImode, DImode and TImode */
560 {1, 1, 1}, /* cost of storing SSE registers
561 in SImode, DImode and TImode */
562 1, /* MMX or SSE register to integer */
563 64, /* size of l1 cache. */
564 128, /* size of l2 cache. */
565 32, /* size of prefetch block */
566 1, /* number of parallel prefetches */
567 1, /* Branch cost */
568 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
569 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
570 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
571 COSTS_N_INSNS (1), /* cost of FABS instruction. */
572 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
573 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
574 geode_memcpy,
575 geode_memset,
576 1, /* scalar_stmt_cost. */
577 1, /* scalar load_cost. */
578 1, /* scalar_store_cost. */
579 1, /* vec_stmt_cost. */
580 1, /* vec_to_scalar_cost. */
581 1, /* scalar_to_vec_cost. */
582 1, /* vec_align_load_cost. */
583 2, /* vec_unalign_load_cost. */
584 1, /* vec_store_cost. */
585 3, /* cond_taken_branch_cost. */
586 1, /* cond_not_taken_branch_cost. */
589 static stringop_algs k6_memcpy[2] = {
590 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
591 DUMMY_STRINGOP_ALGS};
592 static stringop_algs k6_memset[2] = {
593 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
594 DUMMY_STRINGOP_ALGS};
595 static const
596 struct processor_costs k6_cost = {
597 COSTS_N_INSNS (1), /* cost of an add instruction */
598 COSTS_N_INSNS (2), /* cost of a lea instruction */
599 COSTS_N_INSNS (1), /* variable shift costs */
600 COSTS_N_INSNS (1), /* constant shift costs */
601 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
602 COSTS_N_INSNS (3), /* HI */
603 COSTS_N_INSNS (3), /* SI */
604 COSTS_N_INSNS (3), /* DI */
605 COSTS_N_INSNS (3)}, /* other */
606 0, /* cost of multiply per each bit set */
607 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
608 COSTS_N_INSNS (18), /* HI */
609 COSTS_N_INSNS (18), /* SI */
610 COSTS_N_INSNS (18), /* DI */
611 COSTS_N_INSNS (18)}, /* other */
612 COSTS_N_INSNS (2), /* cost of movsx */
613 COSTS_N_INSNS (2), /* cost of movzx */
614 8, /* "large" insn */
615 4, /* MOVE_RATIO */
616 3, /* cost for loading QImode using movzbl */
617 {4, 5, 4}, /* cost of loading integer registers
618 in QImode, HImode and SImode.
619 Relative to reg-reg move (2). */
620 {2, 3, 2}, /* cost of storing integer registers */
621 4, /* cost of reg,reg fld/fst */
622 {6, 6, 6}, /* cost of loading fp registers
623 in SFmode, DFmode and XFmode */
624 {4, 4, 4}, /* cost of storing fp registers
625 in SFmode, DFmode and XFmode */
626 2, /* cost of moving MMX register */
627 {2, 2}, /* cost of loading MMX registers
628 in SImode and DImode */
629 {2, 2}, /* cost of storing MMX registers
630 in SImode and DImode */
631 2, /* cost of moving SSE register */
632 {2, 2, 8}, /* cost of loading SSE registers
633 in SImode, DImode and TImode */
634 {2, 2, 8}, /* cost of storing SSE registers
635 in SImode, DImode and TImode */
636 6, /* MMX or SSE register to integer */
637 32, /* size of l1 cache. */
638 32, /* size of l2 cache. Some models
639 have integrated l2 cache, but
640 optimizing for k6 is not important
641 enough to worry about that. */
642 32, /* size of prefetch block */
643 1, /* number of parallel prefetches */
644 1, /* Branch cost */
645 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
646 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
647 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
648 COSTS_N_INSNS (2), /* cost of FABS instruction. */
649 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
650 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
651 k6_memcpy,
652 k6_memset,
653 1, /* scalar_stmt_cost. */
654 1, /* scalar load_cost. */
655 1, /* scalar_store_cost. */
656 1, /* vec_stmt_cost. */
657 1, /* vec_to_scalar_cost. */
658 1, /* scalar_to_vec_cost. */
659 1, /* vec_align_load_cost. */
660 2, /* vec_unalign_load_cost. */
661 1, /* vec_store_cost. */
662 3, /* cond_taken_branch_cost. */
663 1, /* cond_not_taken_branch_cost. */
666 /* For some reason, Athlon deals better with REP prefix (relative to loops)
667 compared to K8. Alignment becomes important after 8 bytes for memcpy and
668 128 bytes for memset. */
669 static stringop_algs athlon_memcpy[2] = {
670 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
671 DUMMY_STRINGOP_ALGS};
672 static stringop_algs athlon_memset[2] = {
673 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
674 DUMMY_STRINGOP_ALGS};
675 static const
676 struct processor_costs athlon_cost = {
677 COSTS_N_INSNS (1), /* cost of an add instruction */
678 COSTS_N_INSNS (2), /* cost of a lea instruction */
679 COSTS_N_INSNS (1), /* variable shift costs */
680 COSTS_N_INSNS (1), /* constant shift costs */
681 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
682 COSTS_N_INSNS (5), /* HI */
683 COSTS_N_INSNS (5), /* SI */
684 COSTS_N_INSNS (5), /* DI */
685 COSTS_N_INSNS (5)}, /* other */
686 0, /* cost of multiply per each bit set */
687 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
688 COSTS_N_INSNS (26), /* HI */
689 COSTS_N_INSNS (42), /* SI */
690 COSTS_N_INSNS (74), /* DI */
691 COSTS_N_INSNS (74)}, /* other */
692 COSTS_N_INSNS (1), /* cost of movsx */
693 COSTS_N_INSNS (1), /* cost of movzx */
694 8, /* "large" insn */
695 9, /* MOVE_RATIO */
696 4, /* cost for loading QImode using movzbl */
697 {3, 4, 3}, /* cost of loading integer registers
698 in QImode, HImode and SImode.
699 Relative to reg-reg move (2). */
700 {3, 4, 3}, /* cost of storing integer registers */
701 4, /* cost of reg,reg fld/fst */
702 {4, 4, 12}, /* cost of loading fp registers
703 in SFmode, DFmode and XFmode */
704 {6, 6, 8}, /* cost of storing fp registers
705 in SFmode, DFmode and XFmode */
706 2, /* cost of moving MMX register */
707 {4, 4}, /* cost of loading MMX registers
708 in SImode and DImode */
709 {4, 4}, /* cost of storing MMX registers
710 in SImode and DImode */
711 2, /* cost of moving SSE register */
712 {4, 4, 6}, /* cost of loading SSE registers
713 in SImode, DImode and TImode */
714 {4, 4, 5}, /* cost of storing SSE registers
715 in SImode, DImode and TImode */
716 5, /* MMX or SSE register to integer */
717 64, /* size of l1 cache. */
718 256, /* size of l2 cache. */
719 64, /* size of prefetch block */
720 6, /* number of parallel prefetches */
721 5, /* Branch cost */
722 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
723 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
724 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
725 COSTS_N_INSNS (2), /* cost of FABS instruction. */
726 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
727 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
728 athlon_memcpy,
729 athlon_memset,
730 1, /* scalar_stmt_cost. */
731 1, /* scalar load_cost. */
732 1, /* scalar_store_cost. */
733 1, /* vec_stmt_cost. */
734 1, /* vec_to_scalar_cost. */
735 1, /* scalar_to_vec_cost. */
736 1, /* vec_align_load_cost. */
737 2, /* vec_unalign_load_cost. */
738 1, /* vec_store_cost. */
739 3, /* cond_taken_branch_cost. */
740 1, /* cond_not_taken_branch_cost. */
743 /* K8 has optimized REP instruction for medium sized blocks, but for very
744 small blocks it is better to use loop. For large blocks, libcall can
745 do nontemporary accesses and beat inline considerably. */
746 static stringop_algs k8_memcpy[2] = {
747 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
748 {-1, rep_prefix_4_byte, false}}},
749 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
750 {-1, libcall, false}}}};
751 static stringop_algs k8_memset[2] = {
752 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
753 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
754 {libcall, {{48, unrolled_loop, false},
755 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
756 static const
757 struct processor_costs k8_cost = {
758 COSTS_N_INSNS (1), /* cost of an add instruction */
759 COSTS_N_INSNS (2), /* cost of a lea instruction */
760 COSTS_N_INSNS (1), /* variable shift costs */
761 COSTS_N_INSNS (1), /* constant shift costs */
762 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
763 COSTS_N_INSNS (4), /* HI */
764 COSTS_N_INSNS (3), /* SI */
765 COSTS_N_INSNS (4), /* DI */
766 COSTS_N_INSNS (5)}, /* other */
767 0, /* cost of multiply per each bit set */
768 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
769 COSTS_N_INSNS (26), /* HI */
770 COSTS_N_INSNS (42), /* SI */
771 COSTS_N_INSNS (74), /* DI */
772 COSTS_N_INSNS (74)}, /* other */
773 COSTS_N_INSNS (1), /* cost of movsx */
774 COSTS_N_INSNS (1), /* cost of movzx */
775 8, /* "large" insn */
776 9, /* MOVE_RATIO */
777 4, /* cost for loading QImode using movzbl */
778 {3, 4, 3}, /* cost of loading integer registers
779 in QImode, HImode and SImode.
780 Relative to reg-reg move (2). */
781 {3, 4, 3}, /* cost of storing integer registers */
782 4, /* cost of reg,reg fld/fst */
783 {4, 4, 12}, /* cost of loading fp registers
784 in SFmode, DFmode and XFmode */
785 {6, 6, 8}, /* cost of storing fp registers
786 in SFmode, DFmode and XFmode */
787 2, /* cost of moving MMX register */
788 {3, 3}, /* cost of loading MMX registers
789 in SImode and DImode */
790 {4, 4}, /* cost of storing MMX registers
791 in SImode and DImode */
792 2, /* cost of moving SSE register */
793 {4, 3, 6}, /* cost of loading SSE registers
794 in SImode, DImode and TImode */
795 {4, 4, 5}, /* cost of storing SSE registers
796 in SImode, DImode and TImode */
797 5, /* MMX or SSE register to integer */
798 64, /* size of l1 cache. */
799 512, /* size of l2 cache. */
800 64, /* size of prefetch block */
801 /* New AMD processors never drop prefetches; if they cannot be performed
802 immediately, they are queued. We set number of simultaneous prefetches
803 to a large constant to reflect this (it probably is not a good idea not
804 to limit number of prefetches at all, as their execution also takes some
805 time). */
806 100, /* number of parallel prefetches */
807 3, /* Branch cost */
808 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
809 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
810 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
811 COSTS_N_INSNS (2), /* cost of FABS instruction. */
812 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
813 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
815 k8_memcpy,
816 k8_memset,
817 4, /* scalar_stmt_cost. */
818 2, /* scalar load_cost. */
819 2, /* scalar_store_cost. */
820 5, /* vec_stmt_cost. */
821 0, /* vec_to_scalar_cost. */
822 2, /* scalar_to_vec_cost. */
823 2, /* vec_align_load_cost. */
824 3, /* vec_unalign_load_cost. */
825 3, /* vec_store_cost. */
826 3, /* cond_taken_branch_cost. */
827 2, /* cond_not_taken_branch_cost. */
830 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
831 very small blocks it is better to use loop. For large blocks, libcall can
832 do nontemporary accesses and beat inline considerably. */
833 static stringop_algs amdfam10_memcpy[2] = {
834 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
835 {-1, rep_prefix_4_byte, false}}},
836 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
837 {-1, libcall, false}}}};
838 static stringop_algs amdfam10_memset[2] = {
839 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
840 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
841 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
842 {-1, libcall, false}}}};
843 struct processor_costs amdfam10_cost = {
844 COSTS_N_INSNS (1), /* cost of an add instruction */
845 COSTS_N_INSNS (2), /* cost of a lea instruction */
846 COSTS_N_INSNS (1), /* variable shift costs */
847 COSTS_N_INSNS (1), /* constant shift costs */
848 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
849 COSTS_N_INSNS (4), /* HI */
850 COSTS_N_INSNS (3), /* SI */
851 COSTS_N_INSNS (4), /* DI */
852 COSTS_N_INSNS (5)}, /* other */
853 0, /* cost of multiply per each bit set */
854 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
855 COSTS_N_INSNS (35), /* HI */
856 COSTS_N_INSNS (51), /* SI */
857 COSTS_N_INSNS (83), /* DI */
858 COSTS_N_INSNS (83)}, /* other */
859 COSTS_N_INSNS (1), /* cost of movsx */
860 COSTS_N_INSNS (1), /* cost of movzx */
861 8, /* "large" insn */
862 9, /* MOVE_RATIO */
863 4, /* cost for loading QImode using movzbl */
864 {3, 4, 3}, /* cost of loading integer registers
865 in QImode, HImode and SImode.
866 Relative to reg-reg move (2). */
867 {3, 4, 3}, /* cost of storing integer registers */
868 4, /* cost of reg,reg fld/fst */
869 {4, 4, 12}, /* cost of loading fp registers
870 in SFmode, DFmode and XFmode */
871 {6, 6, 8}, /* cost of storing fp registers
872 in SFmode, DFmode and XFmode */
873 2, /* cost of moving MMX register */
874 {3, 3}, /* cost of loading MMX registers
875 in SImode and DImode */
876 {4, 4}, /* cost of storing MMX registers
877 in SImode and DImode */
878 2, /* cost of moving SSE register */
879 {4, 4, 3}, /* cost of loading SSE registers
880 in SImode, DImode and TImode */
881 {4, 4, 5}, /* cost of storing SSE registers
882 in SImode, DImode and TImode */
883 3, /* MMX or SSE register to integer */
884 /* On K8:
885 MOVD reg64, xmmreg Double FSTORE 4
886 MOVD reg32, xmmreg Double FSTORE 4
887 On AMDFAM10:
888 MOVD reg64, xmmreg Double FADD 3
889 1/1 1/1
890 MOVD reg32, xmmreg Double FADD 3
891 1/1 1/1 */
892 64, /* size of l1 cache. */
893 512, /* size of l2 cache. */
894 64, /* size of prefetch block */
895 /* New AMD processors never drop prefetches; if they cannot be performed
896 immediately, they are queued. We set number of simultaneous prefetches
897 to a large constant to reflect this (it probably is not a good idea not
898 to limit number of prefetches at all, as their execution also takes some
899 time). */
900 100, /* number of parallel prefetches */
901 2, /* Branch cost */
902 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
903 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
904 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
905 COSTS_N_INSNS (2), /* cost of FABS instruction. */
906 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
907 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
909 amdfam10_memcpy,
910 amdfam10_memset,
911 4, /* scalar_stmt_cost. */
912 2, /* scalar load_cost. */
913 2, /* scalar_store_cost. */
914 6, /* vec_stmt_cost. */
915 0, /* vec_to_scalar_cost. */
916 2, /* scalar_to_vec_cost. */
917 2, /* vec_align_load_cost. */
918 2, /* vec_unalign_load_cost. */
919 2, /* vec_store_cost. */
920 2, /* cond_taken_branch_cost. */
921 1, /* cond_not_taken_branch_cost. */
924 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
925 very small blocks it is better to use loop. For large blocks, libcall
926 can do nontemporary accesses and beat inline considerably. */
927 static stringop_algs bdver1_memcpy[2] = {
928 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
929 {-1, rep_prefix_4_byte, false}}},
930 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
931 {-1, libcall, false}}}};
932 static stringop_algs bdver1_memset[2] = {
933 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
934 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
935 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
936 {-1, libcall, false}}}};
938 const struct processor_costs bdver1_cost = {
939 COSTS_N_INSNS (1), /* cost of an add instruction */
940 COSTS_N_INSNS (1), /* cost of a lea instruction */
941 COSTS_N_INSNS (1), /* variable shift costs */
942 COSTS_N_INSNS (1), /* constant shift costs */
943 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
944 COSTS_N_INSNS (4), /* HI */
945 COSTS_N_INSNS (4), /* SI */
946 COSTS_N_INSNS (6), /* DI */
947 COSTS_N_INSNS (6)}, /* other */
948 0, /* cost of multiply per each bit set */
949 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
950 COSTS_N_INSNS (35), /* HI */
951 COSTS_N_INSNS (51), /* SI */
952 COSTS_N_INSNS (83), /* DI */
953 COSTS_N_INSNS (83)}, /* other */
954 COSTS_N_INSNS (1), /* cost of movsx */
955 COSTS_N_INSNS (1), /* cost of movzx */
956 8, /* "large" insn */
957 9, /* MOVE_RATIO */
958 4, /* cost for loading QImode using movzbl */
959 {5, 5, 4}, /* cost of loading integer registers
960 in QImode, HImode and SImode.
961 Relative to reg-reg move (2). */
962 {4, 4, 4}, /* cost of storing integer registers */
963 2, /* cost of reg,reg fld/fst */
964 {5, 5, 12}, /* cost of loading fp registers
965 in SFmode, DFmode and XFmode */
966 {4, 4, 8}, /* cost of storing fp registers
967 in SFmode, DFmode and XFmode */
968 2, /* cost of moving MMX register */
969 {4, 4}, /* cost of loading MMX registers
970 in SImode and DImode */
971 {4, 4}, /* cost of storing MMX registers
972 in SImode and DImode */
973 2, /* cost of moving SSE register */
974 {4, 4, 4}, /* cost of loading SSE registers
975 in SImode, DImode and TImode */
976 {4, 4, 4}, /* cost of storing SSE registers
977 in SImode, DImode and TImode */
978 2, /* MMX or SSE register to integer */
979 /* On K8:
980 MOVD reg64, xmmreg Double FSTORE 4
981 MOVD reg32, xmmreg Double FSTORE 4
982 On AMDFAM10:
983 MOVD reg64, xmmreg Double FADD 3
984 1/1 1/1
985 MOVD reg32, xmmreg Double FADD 3
986 1/1 1/1 */
987 16, /* size of l1 cache. */
988 2048, /* size of l2 cache. */
989 64, /* size of prefetch block */
990 /* New AMD processors never drop prefetches; if they cannot be performed
991 immediately, they are queued. We set number of simultaneous prefetches
992 to a large constant to reflect this (it probably is not a good idea not
993 to limit number of prefetches at all, as their execution also takes some
994 time). */
995 100, /* number of parallel prefetches */
996 2, /* Branch cost */
997 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
998 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
999 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1000 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1001 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1002 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1004 bdver1_memcpy,
1005 bdver1_memset,
1006 6, /* scalar_stmt_cost. */
1007 4, /* scalar load_cost. */
1008 4, /* scalar_store_cost. */
1009 6, /* vec_stmt_cost. */
1010 0, /* vec_to_scalar_cost. */
1011 2, /* scalar_to_vec_cost. */
1012 4, /* vec_align_load_cost. */
1013 4, /* vec_unalign_load_cost. */
1014 4, /* vec_store_cost. */
1015 2, /* cond_taken_branch_cost. */
1016 1, /* cond_not_taken_branch_cost. */
1019 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1020 very small blocks it is better to use loop. For large blocks, libcall
1021 can do nontemporary accesses and beat inline considerably. */
1023 static stringop_algs bdver2_memcpy[2] = {
1024 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1025 {-1, rep_prefix_4_byte, false}}},
1026 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1027 {-1, libcall, false}}}};
1028 static stringop_algs bdver2_memset[2] = {
1029 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1030 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1031 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1032 {-1, libcall, false}}}};
1034 const struct processor_costs bdver2_cost = {
1035 COSTS_N_INSNS (1), /* cost of an add instruction */
1036 COSTS_N_INSNS (1), /* cost of a lea instruction */
1037 COSTS_N_INSNS (1), /* variable shift costs */
1038 COSTS_N_INSNS (1), /* constant shift costs */
1039 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1040 COSTS_N_INSNS (4), /* HI */
1041 COSTS_N_INSNS (4), /* SI */
1042 COSTS_N_INSNS (6), /* DI */
1043 COSTS_N_INSNS (6)}, /* other */
1044 0, /* cost of multiply per each bit set */
1045 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1046 COSTS_N_INSNS (35), /* HI */
1047 COSTS_N_INSNS (51), /* SI */
1048 COSTS_N_INSNS (83), /* DI */
1049 COSTS_N_INSNS (83)}, /* other */
1050 COSTS_N_INSNS (1), /* cost of movsx */
1051 COSTS_N_INSNS (1), /* cost of movzx */
1052 8, /* "large" insn */
1053 9, /* MOVE_RATIO */
1054 4, /* cost for loading QImode using movzbl */
1055 {5, 5, 4}, /* cost of loading integer registers
1056 in QImode, HImode and SImode.
1057 Relative to reg-reg move (2). */
1058 {4, 4, 4}, /* cost of storing integer registers */
1059 2, /* cost of reg,reg fld/fst */
1060 {5, 5, 12}, /* cost of loading fp registers
1061 in SFmode, DFmode and XFmode */
1062 {4, 4, 8}, /* cost of storing fp registers
1063 in SFmode, DFmode and XFmode */
1064 2, /* cost of moving MMX register */
1065 {4, 4}, /* cost of loading MMX registers
1066 in SImode and DImode */
1067 {4, 4}, /* cost of storing MMX registers
1068 in SImode and DImode */
1069 2, /* cost of moving SSE register */
1070 {4, 4, 4}, /* cost of loading SSE registers
1071 in SImode, DImode and TImode */
1072 {4, 4, 4}, /* cost of storing SSE registers
1073 in SImode, DImode and TImode */
1074 2, /* MMX or SSE register to integer */
1075 /* On K8:
1076 MOVD reg64, xmmreg Double FSTORE 4
1077 MOVD reg32, xmmreg Double FSTORE 4
1078 On AMDFAM10:
1079 MOVD reg64, xmmreg Double FADD 3
1080 1/1 1/1
1081 MOVD reg32, xmmreg Double FADD 3
1082 1/1 1/1 */
1083 16, /* size of l1 cache. */
1084 2048, /* size of l2 cache. */
1085 64, /* size of prefetch block */
1086 /* New AMD processors never drop prefetches; if they cannot be performed
1087 immediately, they are queued. We set number of simultaneous prefetches
1088 to a large constant to reflect this (it probably is not a good idea not
1089 to limit number of prefetches at all, as their execution also takes some
1090 time). */
1091 100, /* number of parallel prefetches */
1092 2, /* Branch cost */
1093 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1094 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1095 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1096 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1097 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1098 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1100 bdver2_memcpy,
1101 bdver2_memset,
1102 6, /* scalar_stmt_cost. */
1103 4, /* scalar load_cost. */
1104 4, /* scalar_store_cost. */
1105 6, /* vec_stmt_cost. */
1106 0, /* vec_to_scalar_cost. */
1107 2, /* scalar_to_vec_cost. */
1108 4, /* vec_align_load_cost. */
1109 4, /* vec_unalign_load_cost. */
1110 4, /* vec_store_cost. */
1111 2, /* cond_taken_branch_cost. */
1112 1, /* cond_not_taken_branch_cost. */
1116 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1117 very small blocks it is better to use loop. For large blocks, libcall
1118 can do nontemporary accesses and beat inline considerably. */
1119 static stringop_algs bdver3_memcpy[2] = {
1120 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1121 {-1, rep_prefix_4_byte, false}}},
1122 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1123 {-1, libcall, false}}}};
1124 static stringop_algs bdver3_memset[2] = {
1125 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1126 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1127 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1128 {-1, libcall, false}}}};
1129 struct processor_costs bdver3_cost = {
1130 COSTS_N_INSNS (1), /* cost of an add instruction */
1131 COSTS_N_INSNS (1), /* cost of a lea instruction */
1132 COSTS_N_INSNS (1), /* variable shift costs */
1133 COSTS_N_INSNS (1), /* constant shift costs */
1134 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1135 COSTS_N_INSNS (4), /* HI */
1136 COSTS_N_INSNS (4), /* SI */
1137 COSTS_N_INSNS (6), /* DI */
1138 COSTS_N_INSNS (6)}, /* other */
1139 0, /* cost of multiply per each bit set */
1140 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1141 COSTS_N_INSNS (35), /* HI */
1142 COSTS_N_INSNS (51), /* SI */
1143 COSTS_N_INSNS (83), /* DI */
1144 COSTS_N_INSNS (83)}, /* other */
1145 COSTS_N_INSNS (1), /* cost of movsx */
1146 COSTS_N_INSNS (1), /* cost of movzx */
1147 8, /* "large" insn */
1148 9, /* MOVE_RATIO */
1149 4, /* cost for loading QImode using movzbl */
1150 {5, 5, 4}, /* cost of loading integer registers
1151 in QImode, HImode and SImode.
1152 Relative to reg-reg move (2). */
1153 {4, 4, 4}, /* cost of storing integer registers */
1154 2, /* cost of reg,reg fld/fst */
1155 {5, 5, 12}, /* cost of loading fp registers
1156 in SFmode, DFmode and XFmode */
1157 {4, 4, 8}, /* cost of storing fp registers
1158 in SFmode, DFmode and XFmode */
1159 2, /* cost of moving MMX register */
1160 {4, 4}, /* cost of loading MMX registers
1161 in SImode and DImode */
1162 {4, 4}, /* cost of storing MMX registers
1163 in SImode and DImode */
1164 2, /* cost of moving SSE register */
1165 {4, 4, 4}, /* cost of loading SSE registers
1166 in SImode, DImode and TImode */
1167 {4, 4, 4}, /* cost of storing SSE registers
1168 in SImode, DImode and TImode */
1169 2, /* MMX or SSE register to integer */
1170 16, /* size of l1 cache. */
1171 2048, /* size of l2 cache. */
1172 64, /* size of prefetch block */
1173 /* New AMD processors never drop prefetches; if they cannot be performed
1174 immediately, they are queued. We set number of simultaneous prefetches
1175 to a large constant to reflect this (it probably is not a good idea not
1176 to limit number of prefetches at all, as their execution also takes some
1177 time). */
1178 100, /* number of parallel prefetches */
1179 2, /* Branch cost */
1180 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1181 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1182 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1183 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1184 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1185 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1187 bdver3_memcpy,
1188 bdver3_memset,
1189 6, /* scalar_stmt_cost. */
1190 4, /* scalar load_cost. */
1191 4, /* scalar_store_cost. */
1192 6, /* vec_stmt_cost. */
1193 0, /* vec_to_scalar_cost. */
1194 2, /* scalar_to_vec_cost. */
1195 4, /* vec_align_load_cost. */
1196 4, /* vec_unalign_load_cost. */
1197 4, /* vec_store_cost. */
1198 2, /* cond_taken_branch_cost. */
1199 1, /* cond_not_taken_branch_cost. */
1202 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1203 very small blocks it is better to use loop. For large blocks, libcall
1204 can do nontemporary accesses and beat inline considerably. */
1205 static stringop_algs bdver4_memcpy[2] = {
1206 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1207 {-1, rep_prefix_4_byte, false}}},
1208 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1209 {-1, libcall, false}}}};
1210 static stringop_algs bdver4_memset[2] = {
1211 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1212 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1213 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1214 {-1, libcall, false}}}};
1215 struct processor_costs bdver4_cost = {
1216 COSTS_N_INSNS (1), /* cost of an add instruction */
1217 COSTS_N_INSNS (1), /* cost of a lea instruction */
1218 COSTS_N_INSNS (1), /* variable shift costs */
1219 COSTS_N_INSNS (1), /* constant shift costs */
1220 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1221 COSTS_N_INSNS (4), /* HI */
1222 COSTS_N_INSNS (4), /* SI */
1223 COSTS_N_INSNS (6), /* DI */
1224 COSTS_N_INSNS (6)}, /* other */
1225 0, /* cost of multiply per each bit set */
1226 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1227 COSTS_N_INSNS (35), /* HI */
1228 COSTS_N_INSNS (51), /* SI */
1229 COSTS_N_INSNS (83), /* DI */
1230 COSTS_N_INSNS (83)}, /* other */
1231 COSTS_N_INSNS (1), /* cost of movsx */
1232 COSTS_N_INSNS (1), /* cost of movzx */
1233 8, /* "large" insn */
1234 9, /* MOVE_RATIO */
1235 4, /* cost for loading QImode using movzbl */
1236 {5, 5, 4}, /* cost of loading integer registers
1237 in QImode, HImode and SImode.
1238 Relative to reg-reg move (2). */
1239 {4, 4, 4}, /* cost of storing integer registers */
1240 2, /* cost of reg,reg fld/fst */
1241 {5, 5, 12}, /* cost of loading fp registers
1242 in SFmode, DFmode and XFmode */
1243 {4, 4, 8}, /* cost of storing fp registers
1244 in SFmode, DFmode and XFmode */
1245 2, /* cost of moving MMX register */
1246 {4, 4}, /* cost of loading MMX registers
1247 in SImode and DImode */
1248 {4, 4}, /* cost of storing MMX registers
1249 in SImode and DImode */
1250 2, /* cost of moving SSE register */
1251 {4, 4, 4}, /* cost of loading SSE registers
1252 in SImode, DImode and TImode */
1253 {4, 4, 4}, /* cost of storing SSE registers
1254 in SImode, DImode and TImode */
1255 2, /* MMX or SSE register to integer */
1256 16, /* size of l1 cache. */
1257 2048, /* size of l2 cache. */
1258 64, /* size of prefetch block */
1259 /* New AMD processors never drop prefetches; if they cannot be performed
1260 immediately, they are queued. We set number of simultaneous prefetches
1261 to a large constant to reflect this (it probably is not a good idea not
1262 to limit number of prefetches at all, as their execution also takes some
1263 time). */
1264 100, /* number of parallel prefetches */
1265 2, /* Branch cost */
1266 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1267 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1268 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1269 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1270 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1271 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1273 bdver4_memcpy,
1274 bdver4_memset,
1275 6, /* scalar_stmt_cost. */
1276 4, /* scalar load_cost. */
1277 4, /* scalar_store_cost. */
1278 6, /* vec_stmt_cost. */
1279 0, /* vec_to_scalar_cost. */
1280 2, /* scalar_to_vec_cost. */
1281 4, /* vec_align_load_cost. */
1282 4, /* vec_unalign_load_cost. */
1283 4, /* vec_store_cost. */
1284 2, /* cond_taken_branch_cost. */
1285 1, /* cond_not_taken_branch_cost. */
1288 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1289 very small blocks it is better to use loop. For large blocks, libcall can
1290 do nontemporary accesses and beat inline considerably. */
1291 static stringop_algs btver1_memcpy[2] = {
1292 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1293 {-1, rep_prefix_4_byte, false}}},
1294 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1295 {-1, libcall, false}}}};
1296 static stringop_algs btver1_memset[2] = {
1297 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1298 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1299 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1300 {-1, libcall, false}}}};
1301 const struct processor_costs btver1_cost = {
1302 COSTS_N_INSNS (1), /* cost of an add instruction */
1303 COSTS_N_INSNS (2), /* cost of a lea instruction */
1304 COSTS_N_INSNS (1), /* variable shift costs */
1305 COSTS_N_INSNS (1), /* constant shift costs */
1306 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1307 COSTS_N_INSNS (4), /* HI */
1308 COSTS_N_INSNS (3), /* SI */
1309 COSTS_N_INSNS (4), /* DI */
1310 COSTS_N_INSNS (5)}, /* other */
1311 0, /* cost of multiply per each bit set */
1312 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1313 COSTS_N_INSNS (35), /* HI */
1314 COSTS_N_INSNS (51), /* SI */
1315 COSTS_N_INSNS (83), /* DI */
1316 COSTS_N_INSNS (83)}, /* other */
1317 COSTS_N_INSNS (1), /* cost of movsx */
1318 COSTS_N_INSNS (1), /* cost of movzx */
1319 8, /* "large" insn */
1320 9, /* MOVE_RATIO */
1321 4, /* cost for loading QImode using movzbl */
1322 {3, 4, 3}, /* cost of loading integer registers
1323 in QImode, HImode and SImode.
1324 Relative to reg-reg move (2). */
1325 {3, 4, 3}, /* cost of storing integer registers */
1326 4, /* cost of reg,reg fld/fst */
1327 {4, 4, 12}, /* cost of loading fp registers
1328 in SFmode, DFmode and XFmode */
1329 {6, 6, 8}, /* cost of storing fp registers
1330 in SFmode, DFmode and XFmode */
1331 2, /* cost of moving MMX register */
1332 {3, 3}, /* cost of loading MMX registers
1333 in SImode and DImode */
1334 {4, 4}, /* cost of storing MMX registers
1335 in SImode and DImode */
1336 2, /* cost of moving SSE register */
1337 {4, 4, 3}, /* cost of loading SSE registers
1338 in SImode, DImode and TImode */
1339 {4, 4, 5}, /* cost of storing SSE registers
1340 in SImode, DImode and TImode */
1341 3, /* MMX or SSE register to integer */
1342 /* On K8:
1343 MOVD reg64, xmmreg Double FSTORE 4
1344 MOVD reg32, xmmreg Double FSTORE 4
1345 On AMDFAM10:
1346 MOVD reg64, xmmreg Double FADD 3
1347 1/1 1/1
1348 MOVD reg32, xmmreg Double FADD 3
1349 1/1 1/1 */
1350 32, /* size of l1 cache. */
1351 512, /* size of l2 cache. */
1352 64, /* size of prefetch block */
1353 100, /* number of parallel prefetches */
1354 2, /* Branch cost */
1355 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1356 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1357 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1358 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1359 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1360 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1362 btver1_memcpy,
1363 btver1_memset,
1364 4, /* scalar_stmt_cost. */
1365 2, /* scalar load_cost. */
1366 2, /* scalar_store_cost. */
1367 6, /* vec_stmt_cost. */
1368 0, /* vec_to_scalar_cost. */
1369 2, /* scalar_to_vec_cost. */
1370 2, /* vec_align_load_cost. */
1371 2, /* vec_unalign_load_cost. */
1372 2, /* vec_store_cost. */
1373 2, /* cond_taken_branch_cost. */
1374 1, /* cond_not_taken_branch_cost. */
1377 static stringop_algs btver2_memcpy[2] = {
1378 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1379 {-1, rep_prefix_4_byte, false}}},
1380 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1381 {-1, libcall, false}}}};
1382 static stringop_algs btver2_memset[2] = {
1383 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1384 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1385 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1386 {-1, libcall, false}}}};
1387 const struct processor_costs btver2_cost = {
1388 COSTS_N_INSNS (1), /* cost of an add instruction */
1389 COSTS_N_INSNS (2), /* cost of a lea instruction */
1390 COSTS_N_INSNS (1), /* variable shift costs */
1391 COSTS_N_INSNS (1), /* constant shift costs */
1392 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1393 COSTS_N_INSNS (4), /* HI */
1394 COSTS_N_INSNS (3), /* SI */
1395 COSTS_N_INSNS (4), /* DI */
1396 COSTS_N_INSNS (5)}, /* other */
1397 0, /* cost of multiply per each bit set */
1398 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1399 COSTS_N_INSNS (35), /* HI */
1400 COSTS_N_INSNS (51), /* SI */
1401 COSTS_N_INSNS (83), /* DI */
1402 COSTS_N_INSNS (83)}, /* other */
1403 COSTS_N_INSNS (1), /* cost of movsx */
1404 COSTS_N_INSNS (1), /* cost of movzx */
1405 8, /* "large" insn */
1406 9, /* MOVE_RATIO */
1407 4, /* cost for loading QImode using movzbl */
1408 {3, 4, 3}, /* cost of loading integer registers
1409 in QImode, HImode and SImode.
1410 Relative to reg-reg move (2). */
1411 {3, 4, 3}, /* cost of storing integer registers */
1412 4, /* cost of reg,reg fld/fst */
1413 {4, 4, 12}, /* cost of loading fp registers
1414 in SFmode, DFmode and XFmode */
1415 {6, 6, 8}, /* cost of storing fp registers
1416 in SFmode, DFmode and XFmode */
1417 2, /* cost of moving MMX register */
1418 {3, 3}, /* cost of loading MMX registers
1419 in SImode and DImode */
1420 {4, 4}, /* cost of storing MMX registers
1421 in SImode and DImode */
1422 2, /* cost of moving SSE register */
1423 {4, 4, 3}, /* cost of loading SSE registers
1424 in SImode, DImode and TImode */
1425 {4, 4, 5}, /* cost of storing SSE registers
1426 in SImode, DImode and TImode */
1427 3, /* MMX or SSE register to integer */
1428 /* On K8:
1429 MOVD reg64, xmmreg Double FSTORE 4
1430 MOVD reg32, xmmreg Double FSTORE 4
1431 On AMDFAM10:
1432 MOVD reg64, xmmreg Double FADD 3
1433 1/1 1/1
1434 MOVD reg32, xmmreg Double FADD 3
1435 1/1 1/1 */
1436 32, /* size of l1 cache. */
1437 2048, /* size of l2 cache. */
1438 64, /* size of prefetch block */
1439 100, /* number of parallel prefetches */
1440 2, /* Branch cost */
1441 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1442 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1443 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1444 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1445 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1446 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1447 btver2_memcpy,
1448 btver2_memset,
1449 4, /* scalar_stmt_cost. */
1450 2, /* scalar load_cost. */
1451 2, /* scalar_store_cost. */
1452 6, /* vec_stmt_cost. */
1453 0, /* vec_to_scalar_cost. */
1454 2, /* scalar_to_vec_cost. */
1455 2, /* vec_align_load_cost. */
1456 2, /* vec_unalign_load_cost. */
1457 2, /* vec_store_cost. */
1458 2, /* cond_taken_branch_cost. */
1459 1, /* cond_not_taken_branch_cost. */
1462 static stringop_algs pentium4_memcpy[2] = {
1463 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1464 DUMMY_STRINGOP_ALGS};
1465 static stringop_algs pentium4_memset[2] = {
1466 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1467 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1468 DUMMY_STRINGOP_ALGS};
1470 static const
1471 struct processor_costs pentium4_cost = {
1472 COSTS_N_INSNS (1), /* cost of an add instruction */
1473 COSTS_N_INSNS (3), /* cost of a lea instruction */
1474 COSTS_N_INSNS (4), /* variable shift costs */
1475 COSTS_N_INSNS (4), /* constant shift costs */
1476 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1477 COSTS_N_INSNS (15), /* HI */
1478 COSTS_N_INSNS (15), /* SI */
1479 COSTS_N_INSNS (15), /* DI */
1480 COSTS_N_INSNS (15)}, /* other */
1481 0, /* cost of multiply per each bit set */
1482 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1483 COSTS_N_INSNS (56), /* HI */
1484 COSTS_N_INSNS (56), /* SI */
1485 COSTS_N_INSNS (56), /* DI */
1486 COSTS_N_INSNS (56)}, /* other */
1487 COSTS_N_INSNS (1), /* cost of movsx */
1488 COSTS_N_INSNS (1), /* cost of movzx */
1489 16, /* "large" insn */
1490 6, /* MOVE_RATIO */
1491 2, /* cost for loading QImode using movzbl */
1492 {4, 5, 4}, /* cost of loading integer registers
1493 in QImode, HImode and SImode.
1494 Relative to reg-reg move (2). */
1495 {2, 3, 2}, /* cost of storing integer registers */
1496 2, /* cost of reg,reg fld/fst */
1497 {2, 2, 6}, /* cost of loading fp registers
1498 in SFmode, DFmode and XFmode */
1499 {4, 4, 6}, /* cost of storing fp registers
1500 in SFmode, DFmode and XFmode */
1501 2, /* cost of moving MMX register */
1502 {2, 2}, /* cost of loading MMX registers
1503 in SImode and DImode */
1504 {2, 2}, /* cost of storing MMX registers
1505 in SImode and DImode */
1506 12, /* cost of moving SSE register */
1507 {12, 12, 12}, /* cost of loading SSE registers
1508 in SImode, DImode and TImode */
1509 {2, 2, 8}, /* cost of storing SSE registers
1510 in SImode, DImode and TImode */
1511 10, /* MMX or SSE register to integer */
1512 8, /* size of l1 cache. */
1513 256, /* size of l2 cache. */
1514 64, /* size of prefetch block */
1515 6, /* number of parallel prefetches */
1516 2, /* Branch cost */
1517 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1518 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1519 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1520 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1521 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1522 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1523 pentium4_memcpy,
1524 pentium4_memset,
1525 1, /* scalar_stmt_cost. */
1526 1, /* scalar load_cost. */
1527 1, /* scalar_store_cost. */
1528 1, /* vec_stmt_cost. */
1529 1, /* vec_to_scalar_cost. */
1530 1, /* scalar_to_vec_cost. */
1531 1, /* vec_align_load_cost. */
1532 2, /* vec_unalign_load_cost. */
1533 1, /* vec_store_cost. */
1534 3, /* cond_taken_branch_cost. */
1535 1, /* cond_not_taken_branch_cost. */
1538 static stringop_algs nocona_memcpy[2] = {
1539 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1540 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1541 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1543 static stringop_algs nocona_memset[2] = {
1544 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1545 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1546 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1547 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1549 static const
1550 struct processor_costs nocona_cost = {
1551 COSTS_N_INSNS (1), /* cost of an add instruction */
1552 COSTS_N_INSNS (1), /* cost of a lea instruction */
1553 COSTS_N_INSNS (1), /* variable shift costs */
1554 COSTS_N_INSNS (1), /* constant shift costs */
1555 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1556 COSTS_N_INSNS (10), /* HI */
1557 COSTS_N_INSNS (10), /* SI */
1558 COSTS_N_INSNS (10), /* DI */
1559 COSTS_N_INSNS (10)}, /* other */
1560 0, /* cost of multiply per each bit set */
1561 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1562 COSTS_N_INSNS (66), /* HI */
1563 COSTS_N_INSNS (66), /* SI */
1564 COSTS_N_INSNS (66), /* DI */
1565 COSTS_N_INSNS (66)}, /* other */
1566 COSTS_N_INSNS (1), /* cost of movsx */
1567 COSTS_N_INSNS (1), /* cost of movzx */
1568 16, /* "large" insn */
1569 17, /* MOVE_RATIO */
1570 4, /* cost for loading QImode using movzbl */
1571 {4, 4, 4}, /* cost of loading integer registers
1572 in QImode, HImode and SImode.
1573 Relative to reg-reg move (2). */
1574 {4, 4, 4}, /* cost of storing integer registers */
1575 3, /* cost of reg,reg fld/fst */
1576 {12, 12, 12}, /* cost of loading fp registers
1577 in SFmode, DFmode and XFmode */
1578 {4, 4, 4}, /* cost of storing fp registers
1579 in SFmode, DFmode and XFmode */
1580 6, /* cost of moving MMX register */
1581 {12, 12}, /* cost of loading MMX registers
1582 in SImode and DImode */
1583 {12, 12}, /* cost of storing MMX registers
1584 in SImode and DImode */
1585 6, /* cost of moving SSE register */
1586 {12, 12, 12}, /* cost of loading SSE registers
1587 in SImode, DImode and TImode */
1588 {12, 12, 12}, /* cost of storing SSE registers
1589 in SImode, DImode and TImode */
1590 8, /* MMX or SSE register to integer */
1591 8, /* size of l1 cache. */
1592 1024, /* size of l2 cache. */
1593 64, /* size of prefetch block */
1594 8, /* number of parallel prefetches */
1595 1, /* Branch cost */
1596 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1597 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1598 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1599 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1600 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1601 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1602 nocona_memcpy,
1603 nocona_memset,
1604 1, /* scalar_stmt_cost. */
1605 1, /* scalar load_cost. */
1606 1, /* scalar_store_cost. */
1607 1, /* vec_stmt_cost. */
1608 1, /* vec_to_scalar_cost. */
1609 1, /* scalar_to_vec_cost. */
1610 1, /* vec_align_load_cost. */
1611 2, /* vec_unalign_load_cost. */
1612 1, /* vec_store_cost. */
1613 3, /* cond_taken_branch_cost. */
1614 1, /* cond_not_taken_branch_cost. */
1617 static stringop_algs atom_memcpy[2] = {
1618 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1619 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1620 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1621 static stringop_algs atom_memset[2] = {
1622 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1623 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1624 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1625 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1626 static const
1627 struct processor_costs atom_cost = {
1628 COSTS_N_INSNS (1), /* cost of an add instruction */
1629 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1630 COSTS_N_INSNS (1), /* variable shift costs */
1631 COSTS_N_INSNS (1), /* constant shift costs */
1632 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1633 COSTS_N_INSNS (4), /* HI */
1634 COSTS_N_INSNS (3), /* SI */
1635 COSTS_N_INSNS (4), /* DI */
1636 COSTS_N_INSNS (2)}, /* other */
1637 0, /* cost of multiply per each bit set */
1638 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1639 COSTS_N_INSNS (26), /* HI */
1640 COSTS_N_INSNS (42), /* SI */
1641 COSTS_N_INSNS (74), /* DI */
1642 COSTS_N_INSNS (74)}, /* other */
1643 COSTS_N_INSNS (1), /* cost of movsx */
1644 COSTS_N_INSNS (1), /* cost of movzx */
1645 8, /* "large" insn */
1646 17, /* MOVE_RATIO */
1647 4, /* cost for loading QImode using movzbl */
1648 {4, 4, 4}, /* cost of loading integer registers
1649 in QImode, HImode and SImode.
1650 Relative to reg-reg move (2). */
1651 {4, 4, 4}, /* cost of storing integer registers */
1652 4, /* cost of reg,reg fld/fst */
1653 {12, 12, 12}, /* cost of loading fp registers
1654 in SFmode, DFmode and XFmode */
1655 {6, 6, 8}, /* cost of storing fp registers
1656 in SFmode, DFmode and XFmode */
1657 2, /* cost of moving MMX register */
1658 {8, 8}, /* cost of loading MMX registers
1659 in SImode and DImode */
1660 {8, 8}, /* cost of storing MMX registers
1661 in SImode and DImode */
1662 2, /* cost of moving SSE register */
1663 {8, 8, 8}, /* cost of loading SSE registers
1664 in SImode, DImode and TImode */
1665 {8, 8, 8}, /* cost of storing SSE registers
1666 in SImode, DImode and TImode */
1667 5, /* MMX or SSE register to integer */
1668 32, /* size of l1 cache. */
1669 256, /* size of l2 cache. */
1670 64, /* size of prefetch block */
1671 6, /* number of parallel prefetches */
1672 3, /* Branch cost */
1673 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1674 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1675 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1676 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1677 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1678 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1679 atom_memcpy,
1680 atom_memset,
1681 1, /* scalar_stmt_cost. */
1682 1, /* scalar load_cost. */
1683 1, /* scalar_store_cost. */
1684 1, /* vec_stmt_cost. */
1685 1, /* vec_to_scalar_cost. */
1686 1, /* scalar_to_vec_cost. */
1687 1, /* vec_align_load_cost. */
1688 2, /* vec_unalign_load_cost. */
1689 1, /* vec_store_cost. */
1690 3, /* cond_taken_branch_cost. */
1691 1, /* cond_not_taken_branch_cost. */
1694 static stringop_algs slm_memcpy[2] = {
1695 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1696 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1697 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1698 static stringop_algs slm_memset[2] = {
1699 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1700 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1701 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1702 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1703 static const
1704 struct processor_costs slm_cost = {
1705 COSTS_N_INSNS (1), /* cost of an add instruction */
1706 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1707 COSTS_N_INSNS (1), /* variable shift costs */
1708 COSTS_N_INSNS (1), /* constant shift costs */
1709 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1710 COSTS_N_INSNS (3), /* HI */
1711 COSTS_N_INSNS (3), /* SI */
1712 COSTS_N_INSNS (4), /* DI */
1713 COSTS_N_INSNS (2)}, /* other */
1714 0, /* cost of multiply per each bit set */
1715 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1716 COSTS_N_INSNS (26), /* HI */
1717 COSTS_N_INSNS (42), /* SI */
1718 COSTS_N_INSNS (74), /* DI */
1719 COSTS_N_INSNS (74)}, /* other */
1720 COSTS_N_INSNS (1), /* cost of movsx */
1721 COSTS_N_INSNS (1), /* cost of movzx */
1722 8, /* "large" insn */
1723 17, /* MOVE_RATIO */
1724 4, /* cost for loading QImode using movzbl */
1725 {4, 4, 4}, /* cost of loading integer registers
1726 in QImode, HImode and SImode.
1727 Relative to reg-reg move (2). */
1728 {4, 4, 4}, /* cost of storing integer registers */
1729 4, /* cost of reg,reg fld/fst */
1730 {12, 12, 12}, /* cost of loading fp registers
1731 in SFmode, DFmode and XFmode */
1732 {6, 6, 8}, /* cost of storing fp registers
1733 in SFmode, DFmode and XFmode */
1734 2, /* cost of moving MMX register */
1735 {8, 8}, /* cost of loading MMX registers
1736 in SImode and DImode */
1737 {8, 8}, /* cost of storing MMX registers
1738 in SImode and DImode */
1739 2, /* cost of moving SSE register */
1740 {8, 8, 8}, /* cost of loading SSE registers
1741 in SImode, DImode and TImode */
1742 {8, 8, 8}, /* cost of storing SSE registers
1743 in SImode, DImode and TImode */
1744 5, /* MMX or SSE register to integer */
1745 32, /* size of l1 cache. */
1746 256, /* size of l2 cache. */
1747 64, /* size of prefetch block */
1748 6, /* number of parallel prefetches */
1749 3, /* Branch cost */
1750 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1751 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1752 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1753 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1754 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1755 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1756 slm_memcpy,
1757 slm_memset,
1758 1, /* scalar_stmt_cost. */
1759 1, /* scalar load_cost. */
1760 1, /* scalar_store_cost. */
1761 1, /* vec_stmt_cost. */
1762 4, /* vec_to_scalar_cost. */
1763 1, /* scalar_to_vec_cost. */
1764 1, /* vec_align_load_cost. */
1765 2, /* vec_unalign_load_cost. */
1766 1, /* vec_store_cost. */
1767 3, /* cond_taken_branch_cost. */
1768 1, /* cond_not_taken_branch_cost. */
1771 static stringop_algs intel_memcpy[2] = {
1772 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1773 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1774 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1775 static stringop_algs intel_memset[2] = {
1776 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1777 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1778 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1779 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1780 static const
1781 struct processor_costs intel_cost = {
1782 COSTS_N_INSNS (1), /* cost of an add instruction */
1783 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1784 COSTS_N_INSNS (1), /* variable shift costs */
1785 COSTS_N_INSNS (1), /* constant shift costs */
1786 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1787 COSTS_N_INSNS (3), /* HI */
1788 COSTS_N_INSNS (3), /* SI */
1789 COSTS_N_INSNS (4), /* DI */
1790 COSTS_N_INSNS (2)}, /* other */
1791 0, /* cost of multiply per each bit set */
1792 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1793 COSTS_N_INSNS (26), /* HI */
1794 COSTS_N_INSNS (42), /* SI */
1795 COSTS_N_INSNS (74), /* DI */
1796 COSTS_N_INSNS (74)}, /* other */
1797 COSTS_N_INSNS (1), /* cost of movsx */
1798 COSTS_N_INSNS (1), /* cost of movzx */
1799 8, /* "large" insn */
1800 17, /* MOVE_RATIO */
1801 4, /* cost for loading QImode using movzbl */
1802 {4, 4, 4}, /* cost of loading integer registers
1803 in QImode, HImode and SImode.
1804 Relative to reg-reg move (2). */
1805 {4, 4, 4}, /* cost of storing integer registers */
1806 4, /* cost of reg,reg fld/fst */
1807 {12, 12, 12}, /* cost of loading fp registers
1808 in SFmode, DFmode and XFmode */
1809 {6, 6, 8}, /* cost of storing fp registers
1810 in SFmode, DFmode and XFmode */
1811 2, /* cost of moving MMX register */
1812 {8, 8}, /* cost of loading MMX registers
1813 in SImode and DImode */
1814 {8, 8}, /* cost of storing MMX registers
1815 in SImode and DImode */
1816 2, /* cost of moving SSE register */
1817 {8, 8, 8}, /* cost of loading SSE registers
1818 in SImode, DImode and TImode */
1819 {8, 8, 8}, /* cost of storing SSE registers
1820 in SImode, DImode and TImode */
1821 5, /* MMX or SSE register to integer */
1822 32, /* size of l1 cache. */
1823 256, /* size of l2 cache. */
1824 64, /* size of prefetch block */
1825 6, /* number of parallel prefetches */
1826 3, /* Branch cost */
1827 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1828 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1829 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1830 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1831 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1832 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1833 intel_memcpy,
1834 intel_memset,
1835 1, /* scalar_stmt_cost. */
1836 1, /* scalar load_cost. */
1837 1, /* scalar_store_cost. */
1838 1, /* vec_stmt_cost. */
1839 4, /* vec_to_scalar_cost. */
1840 1, /* scalar_to_vec_cost. */
1841 1, /* vec_align_load_cost. */
1842 2, /* vec_unalign_load_cost. */
1843 1, /* vec_store_cost. */
1844 3, /* cond_taken_branch_cost. */
1845 1, /* cond_not_taken_branch_cost. */
1848 /* Generic should produce code tuned for Core-i7 (and newer chips)
1849 and btver1 (and newer chips). */
1851 static stringop_algs generic_memcpy[2] = {
1852 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1853 {-1, libcall, false}}},
1854 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1855 {-1, libcall, false}}}};
1856 static stringop_algs generic_memset[2] = {
1857 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1858 {-1, libcall, false}}},
1859 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1860 {-1, libcall, false}}}};
1861 static const
1862 struct processor_costs generic_cost = {
1863 COSTS_N_INSNS (1), /* cost of an add instruction */
1864 /* On all chips taken into consideration lea is 2 cycles and more. With
1865 this cost however our current implementation of synth_mult results in
1866 use of unnecessary temporary registers causing regression on several
1867 SPECfp benchmarks. */
1868 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1869 COSTS_N_INSNS (1), /* variable shift costs */
1870 COSTS_N_INSNS (1), /* constant shift costs */
1871 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1872 COSTS_N_INSNS (4), /* HI */
1873 COSTS_N_INSNS (3), /* SI */
1874 COSTS_N_INSNS (4), /* DI */
1875 COSTS_N_INSNS (2)}, /* other */
1876 0, /* cost of multiply per each bit set */
1877 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1878 COSTS_N_INSNS (26), /* HI */
1879 COSTS_N_INSNS (42), /* SI */
1880 COSTS_N_INSNS (74), /* DI */
1881 COSTS_N_INSNS (74)}, /* other */
1882 COSTS_N_INSNS (1), /* cost of movsx */
1883 COSTS_N_INSNS (1), /* cost of movzx */
1884 8, /* "large" insn */
1885 17, /* MOVE_RATIO */
1886 4, /* cost for loading QImode using movzbl */
1887 {4, 4, 4}, /* cost of loading integer registers
1888 in QImode, HImode and SImode.
1889 Relative to reg-reg move (2). */
1890 {4, 4, 4}, /* cost of storing integer registers */
1891 4, /* cost of reg,reg fld/fst */
1892 {12, 12, 12}, /* cost of loading fp registers
1893 in SFmode, DFmode and XFmode */
1894 {6, 6, 8}, /* cost of storing fp registers
1895 in SFmode, DFmode and XFmode */
1896 2, /* cost of moving MMX register */
1897 {8, 8}, /* cost of loading MMX registers
1898 in SImode and DImode */
1899 {8, 8}, /* cost of storing MMX registers
1900 in SImode and DImode */
1901 2, /* cost of moving SSE register */
1902 {8, 8, 8}, /* cost of loading SSE registers
1903 in SImode, DImode and TImode */
1904 {8, 8, 8}, /* cost of storing SSE registers
1905 in SImode, DImode and TImode */
1906 5, /* MMX or SSE register to integer */
1907 32, /* size of l1 cache. */
1908 512, /* size of l2 cache. */
1909 64, /* size of prefetch block */
1910 6, /* number of parallel prefetches */
1911 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1912 value is increased to perhaps more appropriate value of 5. */
1913 3, /* Branch cost */
1914 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1915 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1916 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1917 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1918 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1919 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1920 generic_memcpy,
1921 generic_memset,
1922 1, /* scalar_stmt_cost. */
1923 1, /* scalar load_cost. */
1924 1, /* scalar_store_cost. */
1925 1, /* vec_stmt_cost. */
1926 1, /* vec_to_scalar_cost. */
1927 1, /* scalar_to_vec_cost. */
1928 1, /* vec_align_load_cost. */
1929 2, /* vec_unalign_load_cost. */
1930 1, /* vec_store_cost. */
1931 3, /* cond_taken_branch_cost. */
1932 1, /* cond_not_taken_branch_cost. */
1935 /* core_cost should produce code tuned for Core familly of CPUs. */
1936 static stringop_algs core_memcpy[2] = {
1937 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1938 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1939 {-1, libcall, false}}}};
1940 static stringop_algs core_memset[2] = {
1941 {libcall, {{6, loop_1_byte, true},
1942 {24, loop, true},
1943 {8192, rep_prefix_4_byte, true},
1944 {-1, libcall, false}}},
1945 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1946 {-1, libcall, false}}}};
1948 static const
1949 struct processor_costs core_cost = {
1950 COSTS_N_INSNS (1), /* cost of an add instruction */
1951 /* On all chips taken into consideration lea is 2 cycles and more. With
1952 this cost however our current implementation of synth_mult results in
1953 use of unnecessary temporary registers causing regression on several
1954 SPECfp benchmarks. */
1955 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1956 COSTS_N_INSNS (1), /* variable shift costs */
1957 COSTS_N_INSNS (1), /* constant shift costs */
1958 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1959 COSTS_N_INSNS (4), /* HI */
1960 COSTS_N_INSNS (3), /* SI */
1961 COSTS_N_INSNS (4), /* DI */
1962 COSTS_N_INSNS (2)}, /* other */
1963 0, /* cost of multiply per each bit set */
1964 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1965 COSTS_N_INSNS (26), /* HI */
1966 COSTS_N_INSNS (42), /* SI */
1967 COSTS_N_INSNS (74), /* DI */
1968 COSTS_N_INSNS (74)}, /* other */
1969 COSTS_N_INSNS (1), /* cost of movsx */
1970 COSTS_N_INSNS (1), /* cost of movzx */
1971 8, /* "large" insn */
1972 17, /* MOVE_RATIO */
1973 4, /* cost for loading QImode using movzbl */
1974 {4, 4, 4}, /* cost of loading integer registers
1975 in QImode, HImode and SImode.
1976 Relative to reg-reg move (2). */
1977 {4, 4, 4}, /* cost of storing integer registers */
1978 4, /* cost of reg,reg fld/fst */
1979 {12, 12, 12}, /* cost of loading fp registers
1980 in SFmode, DFmode and XFmode */
1981 {6, 6, 8}, /* cost of storing fp registers
1982 in SFmode, DFmode and XFmode */
1983 2, /* cost of moving MMX register */
1984 {8, 8}, /* cost of loading MMX registers
1985 in SImode and DImode */
1986 {8, 8}, /* cost of storing MMX registers
1987 in SImode and DImode */
1988 2, /* cost of moving SSE register */
1989 {8, 8, 8}, /* cost of loading SSE registers
1990 in SImode, DImode and TImode */
1991 {8, 8, 8}, /* cost of storing SSE registers
1992 in SImode, DImode and TImode */
1993 5, /* MMX or SSE register to integer */
1994 64, /* size of l1 cache. */
1995 512, /* size of l2 cache. */
1996 64, /* size of prefetch block */
1997 6, /* number of parallel prefetches */
1998 /* FIXME perhaps more appropriate value is 5. */
1999 3, /* Branch cost */
2000 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2001 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2002 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2003 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2004 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2005 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2006 core_memcpy,
2007 core_memset,
2008 1, /* scalar_stmt_cost. */
2009 1, /* scalar load_cost. */
2010 1, /* scalar_store_cost. */
2011 1, /* vec_stmt_cost. */
2012 1, /* vec_to_scalar_cost. */
2013 1, /* scalar_to_vec_cost. */
2014 1, /* vec_align_load_cost. */
2015 2, /* vec_unalign_load_cost. */
2016 1, /* vec_store_cost. */
2017 3, /* cond_taken_branch_cost. */
2018 1, /* cond_not_taken_branch_cost. */
2022 /* Set by -mtune. */
2023 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2025 /* Set by -mtune or -Os. */
2026 const struct processor_costs *ix86_cost = &pentium_cost;
2028 /* Processor feature/optimization bitmasks. */
2029 #define m_386 (1<<PROCESSOR_I386)
2030 #define m_486 (1<<PROCESSOR_I486)
2031 #define m_PENT (1<<PROCESSOR_PENTIUM)
2032 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2033 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2034 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2035 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2036 #define m_CORE2 (1<<PROCESSOR_CORE2)
2037 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2038 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2039 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2040 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2041 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2042 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2043 #define m_KNL (1<<PROCESSOR_KNL)
2044 #define m_INTEL (1<<PROCESSOR_INTEL)
2046 #define m_GEODE (1<<PROCESSOR_GEODE)
2047 #define m_K6 (1<<PROCESSOR_K6)
2048 #define m_K6_GEODE (m_K6 | m_GEODE)
2049 #define m_K8 (1<<PROCESSOR_K8)
2050 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2051 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2052 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2053 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2054 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2055 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2056 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2057 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2058 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2059 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2060 #define m_BTVER (m_BTVER1 | m_BTVER2)
2061 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2063 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2065 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2066 #undef DEF_TUNE
2067 #define DEF_TUNE(tune, name, selector) name,
2068 #include "x86-tune.def"
2069 #undef DEF_TUNE
2072 /* Feature tests against the various tunings. */
2073 unsigned char ix86_tune_features[X86_TUNE_LAST];
2075 /* Feature tests against the various tunings used to create ix86_tune_features
2076 based on the processor mask. */
2077 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2078 #undef DEF_TUNE
2079 #define DEF_TUNE(tune, name, selector) selector,
2080 #include "x86-tune.def"
2081 #undef DEF_TUNE
2084 /* Feature tests against the various architecture variations. */
2085 unsigned char ix86_arch_features[X86_ARCH_LAST];
2087 /* Feature tests against the various architecture variations, used to create
2088 ix86_arch_features based on the processor mask. */
2089 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2090 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2091 ~(m_386 | m_486 | m_PENT | m_K6),
2093 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2094 ~m_386,
2096 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2097 ~(m_386 | m_486),
2099 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2100 ~m_386,
2102 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2103 ~m_386,
2106 /* In case the average insn count for single function invocation is
2107 lower than this constant, emit fast (but longer) prologue and
2108 epilogue code. */
2109 #define FAST_PROLOGUE_INSN_COUNT 20
2111 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2112 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2113 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2114 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2116 /* Array of the smallest class containing reg number REGNO, indexed by
2117 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2119 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2121 /* ax, dx, cx, bx */
2122 AREG, DREG, CREG, BREG,
2123 /* si, di, bp, sp */
2124 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2125 /* FP registers */
2126 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2127 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2128 /* arg pointer */
2129 NON_Q_REGS,
2130 /* flags, fpsr, fpcr, frame */
2131 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2132 /* SSE registers */
2133 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2134 SSE_REGS, SSE_REGS,
2135 /* MMX registers */
2136 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2137 MMX_REGS, MMX_REGS,
2138 /* REX registers */
2139 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2140 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2141 /* SSE REX registers */
2142 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2143 SSE_REGS, SSE_REGS,
2144 /* AVX-512 SSE registers */
2145 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2146 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2147 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2148 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2149 /* Mask registers. */
2150 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2151 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2152 /* MPX bound registers */
2153 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2156 /* The "default" register map used in 32bit mode. */
2158 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2160 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2161 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2162 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2163 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2164 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2165 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2166 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2167 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2168 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2169 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2170 101, 102, 103, 104, /* bound registers */
2173 /* The "default" register map used in 64bit mode. */
2175 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2177 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2178 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2179 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2180 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2181 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2182 8,9,10,11,12,13,14,15, /* extended integer registers */
2183 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2184 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2185 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2186 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2187 126, 127, 128, 129, /* bound registers */
2190 /* Define the register numbers to be used in Dwarf debugging information.
2191 The SVR4 reference port C compiler uses the following register numbers
2192 in its Dwarf output code:
2193 0 for %eax (gcc regno = 0)
2194 1 for %ecx (gcc regno = 2)
2195 2 for %edx (gcc regno = 1)
2196 3 for %ebx (gcc regno = 3)
2197 4 for %esp (gcc regno = 7)
2198 5 for %ebp (gcc regno = 6)
2199 6 for %esi (gcc regno = 4)
2200 7 for %edi (gcc regno = 5)
2201 The following three DWARF register numbers are never generated by
2202 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2203 believes these numbers have these meanings.
2204 8 for %eip (no gcc equivalent)
2205 9 for %eflags (gcc regno = 17)
2206 10 for %trapno (no gcc equivalent)
2207 It is not at all clear how we should number the FP stack registers
2208 for the x86 architecture. If the version of SDB on x86/svr4 were
2209 a bit less brain dead with respect to floating-point then we would
2210 have a precedent to follow with respect to DWARF register numbers
2211 for x86 FP registers, but the SDB on x86/svr4 is so completely
2212 broken with respect to FP registers that it is hardly worth thinking
2213 of it as something to strive for compatibility with.
2214 The version of x86/svr4 SDB I have at the moment does (partially)
2215 seem to believe that DWARF register number 11 is associated with
2216 the x86 register %st(0), but that's about all. Higher DWARF
2217 register numbers don't seem to be associated with anything in
2218 particular, and even for DWARF regno 11, SDB only seems to under-
2219 stand that it should say that a variable lives in %st(0) (when
2220 asked via an `=' command) if we said it was in DWARF regno 11,
2221 but SDB still prints garbage when asked for the value of the
2222 variable in question (via a `/' command).
2223 (Also note that the labels SDB prints for various FP stack regs
2224 when doing an `x' command are all wrong.)
2225 Note that these problems generally don't affect the native SVR4
2226 C compiler because it doesn't allow the use of -O with -g and
2227 because when it is *not* optimizing, it allocates a memory
2228 location for each floating-point variable, and the memory
2229 location is what gets described in the DWARF AT_location
2230 attribute for the variable in question.
2231 Regardless of the severe mental illness of the x86/svr4 SDB, we
2232 do something sensible here and we use the following DWARF
2233 register numbers. Note that these are all stack-top-relative
2234 numbers.
2235 11 for %st(0) (gcc regno = 8)
2236 12 for %st(1) (gcc regno = 9)
2237 13 for %st(2) (gcc regno = 10)
2238 14 for %st(3) (gcc regno = 11)
2239 15 for %st(4) (gcc regno = 12)
2240 16 for %st(5) (gcc regno = 13)
2241 17 for %st(6) (gcc regno = 14)
2242 18 for %st(7) (gcc regno = 15)
2244 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2246 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2247 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2248 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2249 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2250 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2251 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2252 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2253 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2254 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2255 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2256 101, 102, 103, 104, /* bound registers */
2259 /* Define parameter passing and return registers. */
2261 static int const x86_64_int_parameter_registers[6] =
2263 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2266 static int const x86_64_ms_abi_int_parameter_registers[4] =
2268 CX_REG, DX_REG, R8_REG, R9_REG
2271 static int const x86_64_int_return_registers[4] =
2273 AX_REG, DX_REG, DI_REG, SI_REG
2276 /* Additional registers that are clobbered by SYSV calls. */
2278 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2280 SI_REG, DI_REG,
2281 XMM6_REG, XMM7_REG,
2282 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2283 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2286 /* Define the structure for the machine field in struct function. */
2288 struct GTY(()) stack_local_entry {
2289 unsigned short mode;
2290 unsigned short n;
2291 rtx rtl;
2292 struct stack_local_entry *next;
2295 /* Structure describing stack frame layout.
2296 Stack grows downward:
2298 [arguments]
2299 <- ARG_POINTER
2300 saved pc
2302 saved static chain if ix86_static_chain_on_stack
2304 saved frame pointer if frame_pointer_needed
2305 <- HARD_FRAME_POINTER
2306 [saved regs]
2307 <- regs_save_offset
2308 [padding0]
2310 [saved SSE regs]
2311 <- sse_regs_save_offset
2312 [padding1] |
2313 | <- FRAME_POINTER
2314 [va_arg registers] |
2316 [frame] |
2318 [padding2] | = to_allocate
2319 <- STACK_POINTER
2321 struct ix86_frame
2323 int nsseregs;
2324 int nregs;
2325 int va_arg_size;
2326 int red_zone_size;
2327 int outgoing_arguments_size;
2329 /* The offsets relative to ARG_POINTER. */
2330 HOST_WIDE_INT frame_pointer_offset;
2331 HOST_WIDE_INT hard_frame_pointer_offset;
2332 HOST_WIDE_INT stack_pointer_offset;
2333 HOST_WIDE_INT hfp_save_offset;
2334 HOST_WIDE_INT reg_save_offset;
2335 HOST_WIDE_INT sse_reg_save_offset;
2337 /* When save_regs_using_mov is set, emit prologue using
2338 move instead of push instructions. */
2339 bool save_regs_using_mov;
2342 /* Which cpu are we scheduling for. */
2343 enum attr_cpu ix86_schedule;
2345 /* Which cpu are we optimizing for. */
2346 enum processor_type ix86_tune;
2348 /* Which instruction set architecture to use. */
2349 enum processor_type ix86_arch;
2351 /* True if processor has SSE prefetch instruction. */
2352 unsigned char x86_prefetch_sse;
2354 /* -mstackrealign option */
2355 static const char ix86_force_align_arg_pointer_string[]
2356 = "force_align_arg_pointer";
2358 static rtx (*ix86_gen_leave) (void);
2359 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2360 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2361 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2362 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2363 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2364 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2365 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2366 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2367 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2368 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2369 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2371 /* Preferred alignment for stack boundary in bits. */
2372 unsigned int ix86_preferred_stack_boundary;
2374 /* Alignment for incoming stack boundary in bits specified at
2375 command line. */
2376 static unsigned int ix86_user_incoming_stack_boundary;
2378 /* Default alignment for incoming stack boundary in bits. */
2379 static unsigned int ix86_default_incoming_stack_boundary;
2381 /* Alignment for incoming stack boundary in bits. */
2382 unsigned int ix86_incoming_stack_boundary;
2384 /* Calling abi specific va_list type nodes. */
2385 static GTY(()) tree sysv_va_list_type_node;
2386 static GTY(()) tree ms_va_list_type_node;
2388 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2389 char internal_label_prefix[16];
2390 int internal_label_prefix_len;
2392 /* Fence to use after loop using movnt. */
2393 tree x86_mfence;
2395 /* Register class used for passing given 64bit part of the argument.
2396 These represent classes as documented by the PS ABI, with the exception
2397 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2398 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2400 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2401 whenever possible (upper half does contain padding). */
2402 enum x86_64_reg_class
2404 X86_64_NO_CLASS,
2405 X86_64_INTEGER_CLASS,
2406 X86_64_INTEGERSI_CLASS,
2407 X86_64_SSE_CLASS,
2408 X86_64_SSESF_CLASS,
2409 X86_64_SSEDF_CLASS,
2410 X86_64_SSEUP_CLASS,
2411 X86_64_X87_CLASS,
2412 X86_64_X87UP_CLASS,
2413 X86_64_COMPLEX_X87_CLASS,
2414 X86_64_MEMORY_CLASS
2417 #define MAX_CLASSES 8
2419 /* Table of constants used by fldpi, fldln2, etc.... */
2420 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2421 static bool ext_80387_constants_init = 0;
2424 static struct machine_function * ix86_init_machine_status (void);
2425 static rtx ix86_function_value (const_tree, const_tree, bool);
2426 static bool ix86_function_value_regno_p (const unsigned int);
2427 static unsigned int ix86_function_arg_boundary (machine_mode,
2428 const_tree);
2429 static rtx ix86_static_chain (const_tree, bool);
2430 static int ix86_function_regparm (const_tree, const_tree);
2431 static void ix86_compute_frame_layout (struct ix86_frame *);
2432 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2433 rtx, rtx, int);
2434 static void ix86_add_new_builtins (HOST_WIDE_INT);
2435 static tree ix86_canonical_va_list_type (tree);
2436 static void predict_jump (int);
2437 static unsigned int split_stack_prologue_scratch_regno (void);
2438 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2440 enum ix86_function_specific_strings
2442 IX86_FUNCTION_SPECIFIC_ARCH,
2443 IX86_FUNCTION_SPECIFIC_TUNE,
2444 IX86_FUNCTION_SPECIFIC_MAX
2447 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2448 const char *, enum fpmath_unit, bool);
2449 static void ix86_function_specific_save (struct cl_target_option *,
2450 struct gcc_options *opts);
2451 static void ix86_function_specific_restore (struct gcc_options *opts,
2452 struct cl_target_option *);
2453 static void ix86_function_specific_print (FILE *, int,
2454 struct cl_target_option *);
2455 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2456 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2457 struct gcc_options *,
2458 struct gcc_options *,
2459 struct gcc_options *);
2460 static bool ix86_can_inline_p (tree, tree);
2461 static void ix86_set_current_function (tree);
2462 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2464 static enum calling_abi ix86_function_abi (const_tree);
2467 #ifndef SUBTARGET32_DEFAULT_CPU
2468 #define SUBTARGET32_DEFAULT_CPU "i386"
2469 #endif
2471 /* Whether -mtune= or -march= were specified */
2472 static int ix86_tune_defaulted;
2473 static int ix86_arch_specified;
2475 /* Vectorization library interface and handlers. */
2476 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2478 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2479 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2481 /* Processor target table, indexed by processor number */
2482 struct ptt
2484 const char *const name; /* processor name */
2485 const struct processor_costs *cost; /* Processor costs */
2486 const int align_loop; /* Default alignments. */
2487 const int align_loop_max_skip;
2488 const int align_jump;
2489 const int align_jump_max_skip;
2490 const int align_func;
2493 /* This table must be in sync with enum processor_type in i386.h. */
2494 static const struct ptt processor_target_table[PROCESSOR_max] =
2496 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2497 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2498 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2499 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2500 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2501 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2502 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2503 {"core2", &core_cost, 16, 10, 16, 10, 16},
2504 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2505 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2506 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2507 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2508 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2509 {"knl", &slm_cost, 16, 15, 16, 7, 16},
2510 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2511 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2512 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2513 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2514 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2515 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2516 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2517 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2518 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2519 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2520 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2521 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2524 static unsigned int
2525 rest_of_handle_insert_vzeroupper (void)
2527 int i;
2529 /* vzeroupper instructions are inserted immediately after reload to
2530 account for possible spills from 256bit registers. The pass
2531 reuses mode switching infrastructure by re-running mode insertion
2532 pass, so disable entities that have already been processed. */
2533 for (i = 0; i < MAX_386_ENTITIES; i++)
2534 ix86_optimize_mode_switching[i] = 0;
2536 ix86_optimize_mode_switching[AVX_U128] = 1;
2538 /* Call optimize_mode_switching. */
2539 g->get_passes ()->execute_pass_mode_switching ();
2540 return 0;
2543 namespace {
2545 const pass_data pass_data_insert_vzeroupper =
2547 RTL_PASS, /* type */
2548 "vzeroupper", /* name */
2549 OPTGROUP_NONE, /* optinfo_flags */
2550 TV_NONE, /* tv_id */
2551 0, /* properties_required */
2552 0, /* properties_provided */
2553 0, /* properties_destroyed */
2554 0, /* todo_flags_start */
2555 TODO_df_finish, /* todo_flags_finish */
2558 class pass_insert_vzeroupper : public rtl_opt_pass
2560 public:
2561 pass_insert_vzeroupper(gcc::context *ctxt)
2562 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2565 /* opt_pass methods: */
2566 virtual bool gate (function *)
2568 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2571 virtual unsigned int execute (function *)
2573 return rest_of_handle_insert_vzeroupper ();
2576 }; // class pass_insert_vzeroupper
2578 } // anon namespace
2580 rtl_opt_pass *
2581 make_pass_insert_vzeroupper (gcc::context *ctxt)
2583 return new pass_insert_vzeroupper (ctxt);
2586 /* Return true if a red-zone is in use. */
2588 static inline bool
2589 ix86_using_red_zone (void)
2591 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2594 /* Return a string that documents the current -m options. The caller is
2595 responsible for freeing the string. */
2597 static char *
2598 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2599 const char *tune, enum fpmath_unit fpmath,
2600 bool add_nl_p)
2602 struct ix86_target_opts
2604 const char *option; /* option string */
2605 HOST_WIDE_INT mask; /* isa mask options */
2608 /* This table is ordered so that options like -msse4.2 that imply
2609 preceding options while match those first. */
2610 static struct ix86_target_opts isa_opts[] =
2612 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2613 { "-mfma", OPTION_MASK_ISA_FMA },
2614 { "-mxop", OPTION_MASK_ISA_XOP },
2615 { "-mlwp", OPTION_MASK_ISA_LWP },
2616 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2617 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2618 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2619 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2620 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2621 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2622 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2623 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2624 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2625 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2626 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2627 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2628 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2629 { "-msse3", OPTION_MASK_ISA_SSE3 },
2630 { "-msse2", OPTION_MASK_ISA_SSE2 },
2631 { "-msse", OPTION_MASK_ISA_SSE },
2632 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2633 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2634 { "-mmmx", OPTION_MASK_ISA_MMX },
2635 { "-mabm", OPTION_MASK_ISA_ABM },
2636 { "-mbmi", OPTION_MASK_ISA_BMI },
2637 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2638 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2639 { "-mhle", OPTION_MASK_ISA_HLE },
2640 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2641 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2642 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2643 { "-madx", OPTION_MASK_ISA_ADX },
2644 { "-mtbm", OPTION_MASK_ISA_TBM },
2645 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2646 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2647 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2648 { "-maes", OPTION_MASK_ISA_AES },
2649 { "-msha", OPTION_MASK_ISA_SHA },
2650 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2651 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2652 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2653 { "-mf16c", OPTION_MASK_ISA_F16C },
2654 { "-mrtm", OPTION_MASK_ISA_RTM },
2655 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2656 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2657 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2658 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2659 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2660 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2661 { "-mmpx", OPTION_MASK_ISA_MPX },
2662 { "-mclwb", OPTION_MASK_ISA_CLWB },
2663 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
2666 /* Flag options. */
2667 static struct ix86_target_opts flag_opts[] =
2669 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2670 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2671 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2672 { "-m80387", MASK_80387 },
2673 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2674 { "-malign-double", MASK_ALIGN_DOUBLE },
2675 { "-mcld", MASK_CLD },
2676 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2677 { "-mieee-fp", MASK_IEEE_FP },
2678 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2679 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2680 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2681 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2682 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2683 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2684 { "-mno-red-zone", MASK_NO_RED_ZONE },
2685 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2686 { "-mrecip", MASK_RECIP },
2687 { "-mrtd", MASK_RTD },
2688 { "-msseregparm", MASK_SSEREGPARM },
2689 { "-mstack-arg-probe", MASK_STACK_PROBE },
2690 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2691 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2692 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2693 { "-mvzeroupper", MASK_VZEROUPPER },
2694 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2695 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2696 { "-mprefer-avx128", MASK_PREFER_AVX128},
2699 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2701 char isa_other[40];
2702 char target_other[40];
2703 unsigned num = 0;
2704 unsigned i, j;
2705 char *ret;
2706 char *ptr;
2707 size_t len;
2708 size_t line_len;
2709 size_t sep_len;
2710 const char *abi;
2712 memset (opts, '\0', sizeof (opts));
2714 /* Add -march= option. */
2715 if (arch)
2717 opts[num][0] = "-march=";
2718 opts[num++][1] = arch;
2721 /* Add -mtune= option. */
2722 if (tune)
2724 opts[num][0] = "-mtune=";
2725 opts[num++][1] = tune;
2728 /* Add -m32/-m64/-mx32. */
2729 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2731 if ((isa & OPTION_MASK_ABI_64) != 0)
2732 abi = "-m64";
2733 else
2734 abi = "-mx32";
2735 isa &= ~ (OPTION_MASK_ISA_64BIT
2736 | OPTION_MASK_ABI_64
2737 | OPTION_MASK_ABI_X32);
2739 else
2740 abi = "-m32";
2741 opts[num++][0] = abi;
2743 /* Pick out the options in isa options. */
2744 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2746 if ((isa & isa_opts[i].mask) != 0)
2748 opts[num++][0] = isa_opts[i].option;
2749 isa &= ~ isa_opts[i].mask;
2753 if (isa && add_nl_p)
2755 opts[num++][0] = isa_other;
2756 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2757 isa);
2760 /* Add flag options. */
2761 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2763 if ((flags & flag_opts[i].mask) != 0)
2765 opts[num++][0] = flag_opts[i].option;
2766 flags &= ~ flag_opts[i].mask;
2770 if (flags && add_nl_p)
2772 opts[num++][0] = target_other;
2773 sprintf (target_other, "(other flags: %#x)", flags);
2776 /* Add -fpmath= option. */
2777 if (fpmath)
2779 opts[num][0] = "-mfpmath=";
2780 switch ((int) fpmath)
2782 case FPMATH_387:
2783 opts[num++][1] = "387";
2784 break;
2786 case FPMATH_SSE:
2787 opts[num++][1] = "sse";
2788 break;
2790 case FPMATH_387 | FPMATH_SSE:
2791 opts[num++][1] = "sse+387";
2792 break;
2794 default:
2795 gcc_unreachable ();
2799 /* Any options? */
2800 if (num == 0)
2801 return NULL;
2803 gcc_assert (num < ARRAY_SIZE (opts));
2805 /* Size the string. */
2806 len = 0;
2807 sep_len = (add_nl_p) ? 3 : 1;
2808 for (i = 0; i < num; i++)
2810 len += sep_len;
2811 for (j = 0; j < 2; j++)
2812 if (opts[i][j])
2813 len += strlen (opts[i][j]);
2816 /* Build the string. */
2817 ret = ptr = (char *) xmalloc (len);
2818 line_len = 0;
2820 for (i = 0; i < num; i++)
2822 size_t len2[2];
2824 for (j = 0; j < 2; j++)
2825 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2827 if (i != 0)
2829 *ptr++ = ' ';
2830 line_len++;
2832 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2834 *ptr++ = '\\';
2835 *ptr++ = '\n';
2836 line_len = 0;
2840 for (j = 0; j < 2; j++)
2841 if (opts[i][j])
2843 memcpy (ptr, opts[i][j], len2[j]);
2844 ptr += len2[j];
2845 line_len += len2[j];
2849 *ptr = '\0';
2850 gcc_assert (ret + len >= ptr);
2852 return ret;
2855 /* Return true, if profiling code should be emitted before
2856 prologue. Otherwise it returns false.
2857 Note: For x86 with "hotfix" it is sorried. */
2858 static bool
2859 ix86_profile_before_prologue (void)
2861 return flag_fentry != 0;
2864 /* Function that is callable from the debugger to print the current
2865 options. */
2866 void ATTRIBUTE_UNUSED
2867 ix86_debug_options (void)
2869 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2870 ix86_arch_string, ix86_tune_string,
2871 ix86_fpmath, true);
2873 if (opts)
2875 fprintf (stderr, "%s\n\n", opts);
2876 free (opts);
2878 else
2879 fputs ("<no options>\n\n", stderr);
2881 return;
2884 static const char *stringop_alg_names[] = {
2885 #define DEF_ENUM
2886 #define DEF_ALG(alg, name) #name,
2887 #include "stringop.def"
2888 #undef DEF_ENUM
2889 #undef DEF_ALG
2892 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2893 The string is of the following form (or comma separated list of it):
2895 strategy_alg:max_size:[align|noalign]
2897 where the full size range for the strategy is either [0, max_size] or
2898 [min_size, max_size], in which min_size is the max_size + 1 of the
2899 preceding range. The last size range must have max_size == -1.
2901 Examples:
2904 -mmemcpy-strategy=libcall:-1:noalign
2906 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2910 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2912 This is to tell the compiler to use the following strategy for memset
2913 1) when the expected size is between [1, 16], use rep_8byte strategy;
2914 2) when the size is between [17, 2048], use vector_loop;
2915 3) when the size is > 2048, use libcall. */
2917 struct stringop_size_range
2919 int max;
2920 stringop_alg alg;
2921 bool noalign;
2924 static void
2925 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2927 const struct stringop_algs *default_algs;
2928 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2929 char *curr_range_str, *next_range_str;
2930 int i = 0, n = 0;
2932 if (is_memset)
2933 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2934 else
2935 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2937 curr_range_str = strategy_str;
2941 int maxs;
2942 char alg_name[128];
2943 char align[16];
2944 next_range_str = strchr (curr_range_str, ',');
2945 if (next_range_str)
2946 *next_range_str++ = '\0';
2948 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2949 alg_name, &maxs, align))
2951 error ("wrong arg %s to option %s", curr_range_str,
2952 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2953 return;
2956 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2958 error ("size ranges of option %s should be increasing",
2959 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2960 return;
2963 for (i = 0; i < last_alg; i++)
2964 if (!strcmp (alg_name, stringop_alg_names[i]))
2965 break;
2967 if (i == last_alg)
2969 error ("wrong stringop strategy name %s specified for option %s",
2970 alg_name,
2971 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2972 return;
2975 input_ranges[n].max = maxs;
2976 input_ranges[n].alg = (stringop_alg) i;
2977 if (!strcmp (align, "align"))
2978 input_ranges[n].noalign = false;
2979 else if (!strcmp (align, "noalign"))
2980 input_ranges[n].noalign = true;
2981 else
2983 error ("unknown alignment %s specified for option %s",
2984 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2985 return;
2987 n++;
2988 curr_range_str = next_range_str;
2990 while (curr_range_str);
2992 if (input_ranges[n - 1].max != -1)
2994 error ("the max value for the last size range should be -1"
2995 " for option %s",
2996 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2997 return;
3000 if (n > MAX_STRINGOP_ALGS)
3002 error ("too many size ranges specified in option %s",
3003 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3004 return;
3007 /* Now override the default algs array. */
3008 for (i = 0; i < n; i++)
3010 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3011 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3012 = input_ranges[i].alg;
3013 *const_cast<int *>(&default_algs->size[i].noalign)
3014 = input_ranges[i].noalign;
3019 /* parse -mtune-ctrl= option. When DUMP is true,
3020 print the features that are explicitly set. */
3022 static void
3023 parse_mtune_ctrl_str (bool dump)
3025 if (!ix86_tune_ctrl_string)
3026 return;
3028 char *next_feature_string = NULL;
3029 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3030 char *orig = curr_feature_string;
3031 int i;
3034 bool clear = false;
3036 next_feature_string = strchr (curr_feature_string, ',');
3037 if (next_feature_string)
3038 *next_feature_string++ = '\0';
3039 if (*curr_feature_string == '^')
3041 curr_feature_string++;
3042 clear = true;
3044 for (i = 0; i < X86_TUNE_LAST; i++)
3046 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3048 ix86_tune_features[i] = !clear;
3049 if (dump)
3050 fprintf (stderr, "Explicitly %s feature %s\n",
3051 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3052 break;
3055 if (i == X86_TUNE_LAST)
3056 error ("Unknown parameter to option -mtune-ctrl: %s",
3057 clear ? curr_feature_string - 1 : curr_feature_string);
3058 curr_feature_string = next_feature_string;
3060 while (curr_feature_string);
3061 free (orig);
3064 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3065 processor type. */
3067 static void
3068 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3070 unsigned int ix86_tune_mask = 1u << ix86_tune;
3071 int i;
3073 for (i = 0; i < X86_TUNE_LAST; ++i)
3075 if (ix86_tune_no_default)
3076 ix86_tune_features[i] = 0;
3077 else
3078 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3081 if (dump)
3083 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3084 for (i = 0; i < X86_TUNE_LAST; i++)
3085 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3086 ix86_tune_features[i] ? "on" : "off");
3089 parse_mtune_ctrl_str (dump);
3093 /* Override various settings based on options. If MAIN_ARGS_P, the
3094 options are from the command line, otherwise they are from
3095 attributes. */
3097 static void
3098 ix86_option_override_internal (bool main_args_p,
3099 struct gcc_options *opts,
3100 struct gcc_options *opts_set)
3102 int i;
3103 unsigned int ix86_arch_mask;
3104 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3105 const char *prefix;
3106 const char *suffix;
3107 const char *sw;
3109 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3110 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3111 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3112 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3113 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3114 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3115 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3116 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3117 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3118 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3119 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3120 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3121 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3122 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3123 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3124 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3125 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3126 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3127 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3128 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3129 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3130 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3131 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3132 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3133 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3134 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3135 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3136 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3137 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3138 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3139 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3140 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3141 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3142 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3143 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3144 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3145 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3146 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3147 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3148 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3149 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3150 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3151 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3152 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3153 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3154 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3155 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3156 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3157 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3158 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3159 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3160 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3161 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3162 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3163 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3164 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3165 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
3167 #define PTA_CORE2 \
3168 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3169 | PTA_CX16 | PTA_FXSR)
3170 #define PTA_NEHALEM \
3171 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3172 #define PTA_WESTMERE \
3173 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3174 #define PTA_SANDYBRIDGE \
3175 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3176 #define PTA_IVYBRIDGE \
3177 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3178 #define PTA_HASWELL \
3179 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3180 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3181 #define PTA_BROADWELL \
3182 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3183 #define PTA_KNL \
3184 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
3185 #define PTA_BONNELL \
3186 (PTA_CORE2 | PTA_MOVBE)
3187 #define PTA_SILVERMONT \
3188 (PTA_WESTMERE | PTA_MOVBE)
3190 /* if this reaches 64, need to widen struct pta flags below */
3192 static struct pta
3194 const char *const name; /* processor name or nickname. */
3195 const enum processor_type processor;
3196 const enum attr_cpu schedule;
3197 const unsigned HOST_WIDE_INT flags;
3199 const processor_alias_table[] =
3201 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3202 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3203 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3204 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3205 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3206 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3207 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3208 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3209 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3210 PTA_MMX | PTA_SSE | PTA_FXSR},
3211 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3212 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3213 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3214 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3215 PTA_MMX | PTA_SSE | PTA_FXSR},
3216 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3217 PTA_MMX | PTA_SSE | PTA_FXSR},
3218 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3219 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3220 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3221 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3222 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3223 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3224 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3225 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3226 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3227 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3228 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3229 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3230 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3231 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3232 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3233 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3234 PTA_SANDYBRIDGE},
3235 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3236 PTA_SANDYBRIDGE},
3237 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3238 PTA_IVYBRIDGE},
3239 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3240 PTA_IVYBRIDGE},
3241 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3242 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3243 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3244 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3245 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3246 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3247 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3248 {"knl", PROCESSOR_KNL, CPU_KNL, PTA_KNL},
3249 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3250 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3251 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3252 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3253 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3254 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3255 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3256 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3257 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3258 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3259 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3260 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3261 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3262 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3263 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3264 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3265 {"x86-64", PROCESSOR_K8, CPU_K8,
3266 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3267 {"k8", PROCESSOR_K8, CPU_K8,
3268 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3269 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3270 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3271 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3272 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3273 {"opteron", PROCESSOR_K8, CPU_K8,
3274 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3275 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3276 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3277 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3278 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3279 {"athlon64", PROCESSOR_K8, CPU_K8,
3280 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3281 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3282 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3283 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3284 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3285 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3286 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3287 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3288 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3289 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3290 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3291 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3292 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3293 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3294 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3295 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3296 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3297 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3298 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3299 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3300 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3301 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3302 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3303 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3304 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3305 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3306 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3307 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3308 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3309 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3310 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3311 | PTA_XSAVEOPT | PTA_FSGSBASE},
3312 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3313 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3314 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3315 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3316 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3317 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3318 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3319 | PTA_MOVBE},
3320 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3321 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3322 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3323 | PTA_FXSR | PTA_XSAVE},
3324 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3325 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3326 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3327 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3328 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3329 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3331 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3332 PTA_64BIT
3333 | PTA_HLE /* flags are only used for -march switch. */ },
3336 /* -mrecip options. */
3337 static struct
3339 const char *string; /* option name */
3340 unsigned int mask; /* mask bits to set */
3342 const recip_options[] =
3344 { "all", RECIP_MASK_ALL },
3345 { "none", RECIP_MASK_NONE },
3346 { "div", RECIP_MASK_DIV },
3347 { "sqrt", RECIP_MASK_SQRT },
3348 { "vec-div", RECIP_MASK_VEC_DIV },
3349 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3352 int const pta_size = ARRAY_SIZE (processor_alias_table);
3354 /* Set up prefix/suffix so the error messages refer to either the command
3355 line argument, or the attribute(target). */
3356 if (main_args_p)
3358 prefix = "-m";
3359 suffix = "";
3360 sw = "switch";
3362 else
3364 prefix = "option(\"";
3365 suffix = "\")";
3366 sw = "attribute";
3369 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3370 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3371 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3372 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3373 #ifdef TARGET_BI_ARCH
3374 else
3376 #if TARGET_BI_ARCH == 1
3377 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3378 is on and OPTION_MASK_ABI_X32 is off. We turn off
3379 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3380 -mx32. */
3381 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3382 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3383 #else
3384 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3385 on and OPTION_MASK_ABI_64 is off. We turn off
3386 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3387 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3388 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3389 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3390 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3391 #endif
3393 #endif
3395 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3397 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3398 OPTION_MASK_ABI_64 for TARGET_X32. */
3399 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3400 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3402 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3403 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3404 | OPTION_MASK_ABI_X32
3405 | OPTION_MASK_ABI_64);
3406 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3408 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3409 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3410 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3411 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3414 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3415 SUBTARGET_OVERRIDE_OPTIONS;
3416 #endif
3418 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3419 SUBSUBTARGET_OVERRIDE_OPTIONS;
3420 #endif
3422 /* -fPIC is the default for x86_64. */
3423 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3424 opts->x_flag_pic = 2;
3426 /* Need to check -mtune=generic first. */
3427 if (opts->x_ix86_tune_string)
3429 /* As special support for cross compilers we read -mtune=native
3430 as -mtune=generic. With native compilers we won't see the
3431 -mtune=native, as it was changed by the driver. */
3432 if (!strcmp (opts->x_ix86_tune_string, "native"))
3434 opts->x_ix86_tune_string = "generic";
3436 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3437 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3438 "%stune=k8%s or %stune=generic%s instead as appropriate",
3439 prefix, suffix, prefix, suffix, prefix, suffix);
3441 else
3443 if (opts->x_ix86_arch_string)
3444 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3445 if (!opts->x_ix86_tune_string)
3447 opts->x_ix86_tune_string
3448 = processor_target_table[TARGET_CPU_DEFAULT].name;
3449 ix86_tune_defaulted = 1;
3452 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3453 or defaulted. We need to use a sensible tune option. */
3454 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3456 opts->x_ix86_tune_string = "generic";
3460 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3461 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3463 /* rep; movq isn't available in 32-bit code. */
3464 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3465 opts->x_ix86_stringop_alg = no_stringop;
3468 if (!opts->x_ix86_arch_string)
3469 opts->x_ix86_arch_string
3470 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3471 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3472 else
3473 ix86_arch_specified = 1;
3475 if (opts_set->x_ix86_pmode)
3477 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3478 && opts->x_ix86_pmode == PMODE_SI)
3479 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3480 && opts->x_ix86_pmode == PMODE_DI))
3481 error ("address mode %qs not supported in the %s bit mode",
3482 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3483 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3485 else
3486 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3487 ? PMODE_DI : PMODE_SI;
3489 if (!opts_set->x_ix86_abi)
3490 opts->x_ix86_abi = DEFAULT_ABI;
3492 /* For targets using ms ABI enable ms-extensions, if not
3493 explicit turned off. For non-ms ABI we turn off this
3494 option. */
3495 if (!opts_set->x_flag_ms_extensions)
3496 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3498 if (opts_set->x_ix86_cmodel)
3500 switch (opts->x_ix86_cmodel)
3502 case CM_SMALL:
3503 case CM_SMALL_PIC:
3504 if (opts->x_flag_pic)
3505 opts->x_ix86_cmodel = CM_SMALL_PIC;
3506 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3507 error ("code model %qs not supported in the %s bit mode",
3508 "small", "32");
3509 break;
3511 case CM_MEDIUM:
3512 case CM_MEDIUM_PIC:
3513 if (opts->x_flag_pic)
3514 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3515 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3516 error ("code model %qs not supported in the %s bit mode",
3517 "medium", "32");
3518 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3519 error ("code model %qs not supported in x32 mode",
3520 "medium");
3521 break;
3523 case CM_LARGE:
3524 case CM_LARGE_PIC:
3525 if (opts->x_flag_pic)
3526 opts->x_ix86_cmodel = CM_LARGE_PIC;
3527 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3528 error ("code model %qs not supported in the %s bit mode",
3529 "large", "32");
3530 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3531 error ("code model %qs not supported in x32 mode",
3532 "large");
3533 break;
3535 case CM_32:
3536 if (opts->x_flag_pic)
3537 error ("code model %s does not support PIC mode", "32");
3538 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3539 error ("code model %qs not supported in the %s bit mode",
3540 "32", "64");
3541 break;
3543 case CM_KERNEL:
3544 if (opts->x_flag_pic)
3546 error ("code model %s does not support PIC mode", "kernel");
3547 opts->x_ix86_cmodel = CM_32;
3549 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3550 error ("code model %qs not supported in the %s bit mode",
3551 "kernel", "32");
3552 break;
3554 default:
3555 gcc_unreachable ();
3558 else
3560 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3561 use of rip-relative addressing. This eliminates fixups that
3562 would otherwise be needed if this object is to be placed in a
3563 DLL, and is essentially just as efficient as direct addressing. */
3564 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3565 && (TARGET_RDOS || TARGET_PECOFF))
3566 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3567 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3568 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3569 else
3570 opts->x_ix86_cmodel = CM_32;
3572 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3574 error ("-masm=intel not supported in this configuration");
3575 opts->x_ix86_asm_dialect = ASM_ATT;
3577 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3578 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3579 sorry ("%i-bit mode not compiled in",
3580 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3582 for (i = 0; i < pta_size; i++)
3583 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3585 ix86_schedule = processor_alias_table[i].schedule;
3586 ix86_arch = processor_alias_table[i].processor;
3587 /* Default cpu tuning to the architecture. */
3588 ix86_tune = ix86_arch;
3590 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3591 && !(processor_alias_table[i].flags & PTA_64BIT))
3592 error ("CPU you selected does not support x86-64 "
3593 "instruction set");
3595 if (processor_alias_table[i].flags & PTA_MMX
3596 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3597 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3598 if (processor_alias_table[i].flags & PTA_3DNOW
3599 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3600 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3601 if (processor_alias_table[i].flags & PTA_3DNOW_A
3602 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3603 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3604 if (processor_alias_table[i].flags & PTA_SSE
3605 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3606 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3607 if (processor_alias_table[i].flags & PTA_SSE2
3608 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3609 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3610 if (processor_alias_table[i].flags & PTA_SSE3
3611 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3612 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3613 if (processor_alias_table[i].flags & PTA_SSSE3
3614 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3615 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3616 if (processor_alias_table[i].flags & PTA_SSE4_1
3617 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3618 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3619 if (processor_alias_table[i].flags & PTA_SSE4_2
3620 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3621 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3622 if (processor_alias_table[i].flags & PTA_AVX
3623 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3624 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3625 if (processor_alias_table[i].flags & PTA_AVX2
3626 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3627 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3628 if (processor_alias_table[i].flags & PTA_FMA
3629 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3630 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3631 if (processor_alias_table[i].flags & PTA_SSE4A
3632 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3633 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3634 if (processor_alias_table[i].flags & PTA_FMA4
3635 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3636 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3637 if (processor_alias_table[i].flags & PTA_XOP
3638 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3639 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3640 if (processor_alias_table[i].flags & PTA_LWP
3641 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3642 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3643 if (processor_alias_table[i].flags & PTA_ABM
3644 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3645 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3646 if (processor_alias_table[i].flags & PTA_BMI
3647 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3648 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3649 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3650 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3651 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3652 if (processor_alias_table[i].flags & PTA_TBM
3653 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3654 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3655 if (processor_alias_table[i].flags & PTA_BMI2
3656 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3657 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3658 if (processor_alias_table[i].flags & PTA_CX16
3659 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3660 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3661 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3662 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3663 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3664 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3665 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3666 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3667 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3668 if (processor_alias_table[i].flags & PTA_MOVBE
3669 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3670 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3671 if (processor_alias_table[i].flags & PTA_AES
3672 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3673 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3674 if (processor_alias_table[i].flags & PTA_SHA
3675 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3676 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3677 if (processor_alias_table[i].flags & PTA_PCLMUL
3678 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3679 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3680 if (processor_alias_table[i].flags & PTA_FSGSBASE
3681 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3682 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3683 if (processor_alias_table[i].flags & PTA_RDRND
3684 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3685 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3686 if (processor_alias_table[i].flags & PTA_F16C
3687 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3688 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3689 if (processor_alias_table[i].flags & PTA_RTM
3690 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3691 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3692 if (processor_alias_table[i].flags & PTA_HLE
3693 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3694 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3695 if (processor_alias_table[i].flags & PTA_PRFCHW
3696 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3697 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3698 if (processor_alias_table[i].flags & PTA_RDSEED
3699 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3700 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3701 if (processor_alias_table[i].flags & PTA_ADX
3702 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3703 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3704 if (processor_alias_table[i].flags & PTA_FXSR
3705 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3706 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3707 if (processor_alias_table[i].flags & PTA_XSAVE
3708 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3709 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3710 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3711 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3712 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3713 if (processor_alias_table[i].flags & PTA_AVX512F
3714 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3715 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3716 if (processor_alias_table[i].flags & PTA_AVX512ER
3717 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3718 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3719 if (processor_alias_table[i].flags & PTA_AVX512PF
3720 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3721 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3722 if (processor_alias_table[i].flags & PTA_AVX512CD
3723 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3724 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3725 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3726 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3727 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3728 if (processor_alias_table[i].flags & PTA_PCOMMIT
3729 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
3730 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
3731 if (processor_alias_table[i].flags & PTA_CLWB
3732 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
3733 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
3734 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3735 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3736 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3737 if (processor_alias_table[i].flags & PTA_XSAVEC
3738 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3739 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3740 if (processor_alias_table[i].flags & PTA_XSAVES
3741 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3742 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3743 if (processor_alias_table[i].flags & PTA_AVX512DQ
3744 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3745 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3746 if (processor_alias_table[i].flags & PTA_AVX512BW
3747 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3748 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3749 if (processor_alias_table[i].flags & PTA_AVX512VL
3750 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3751 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3752 if (processor_alias_table[i].flags & PTA_MPX
3753 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3754 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3755 if (processor_alias_table[i].flags & PTA_AVX512VBMI
3756 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
3757 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
3758 if (processor_alias_table[i].flags & PTA_AVX512IFMA
3759 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
3760 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
3761 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3762 x86_prefetch_sse = true;
3764 break;
3767 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3768 error ("Intel MPX does not support x32");
3770 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3771 error ("Intel MPX does not support x32");
3773 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3774 error ("generic CPU can be used only for %stune=%s %s",
3775 prefix, suffix, sw);
3776 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3777 error ("intel CPU can be used only for %stune=%s %s",
3778 prefix, suffix, sw);
3779 else if (i == pta_size)
3780 error ("bad value (%s) for %sarch=%s %s",
3781 opts->x_ix86_arch_string, prefix, suffix, sw);
3783 ix86_arch_mask = 1u << ix86_arch;
3784 for (i = 0; i < X86_ARCH_LAST; ++i)
3785 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3787 for (i = 0; i < pta_size; i++)
3788 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3790 ix86_schedule = processor_alias_table[i].schedule;
3791 ix86_tune = processor_alias_table[i].processor;
3792 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3794 if (!(processor_alias_table[i].flags & PTA_64BIT))
3796 if (ix86_tune_defaulted)
3798 opts->x_ix86_tune_string = "x86-64";
3799 for (i = 0; i < pta_size; i++)
3800 if (! strcmp (opts->x_ix86_tune_string,
3801 processor_alias_table[i].name))
3802 break;
3803 ix86_schedule = processor_alias_table[i].schedule;
3804 ix86_tune = processor_alias_table[i].processor;
3806 else
3807 error ("CPU you selected does not support x86-64 "
3808 "instruction set");
3811 /* Intel CPUs have always interpreted SSE prefetch instructions as
3812 NOPs; so, we can enable SSE prefetch instructions even when
3813 -mtune (rather than -march) points us to a processor that has them.
3814 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3815 higher processors. */
3816 if (TARGET_CMOV
3817 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3818 x86_prefetch_sse = true;
3819 break;
3822 if (ix86_tune_specified && i == pta_size)
3823 error ("bad value (%s) for %stune=%s %s",
3824 opts->x_ix86_tune_string, prefix, suffix, sw);
3826 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3828 #ifndef USE_IX86_FRAME_POINTER
3829 #define USE_IX86_FRAME_POINTER 0
3830 #endif
3832 #ifndef USE_X86_64_FRAME_POINTER
3833 #define USE_X86_64_FRAME_POINTER 0
3834 #endif
3836 /* Set the default values for switches whose default depends on TARGET_64BIT
3837 in case they weren't overwritten by command line options. */
3838 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3840 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3841 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3842 if (opts->x_flag_asynchronous_unwind_tables
3843 && !opts_set->x_flag_unwind_tables
3844 && TARGET_64BIT_MS_ABI)
3845 opts->x_flag_unwind_tables = 1;
3846 if (opts->x_flag_asynchronous_unwind_tables == 2)
3847 opts->x_flag_unwind_tables
3848 = opts->x_flag_asynchronous_unwind_tables = 1;
3849 if (opts->x_flag_pcc_struct_return == 2)
3850 opts->x_flag_pcc_struct_return = 0;
3852 else
3854 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3855 opts->x_flag_omit_frame_pointer
3856 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3857 if (opts->x_flag_asynchronous_unwind_tables == 2)
3858 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3859 if (opts->x_flag_pcc_struct_return == 2)
3860 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3863 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3864 if (opts->x_optimize_size)
3865 ix86_cost = &ix86_size_cost;
3866 else
3867 ix86_cost = ix86_tune_cost;
3869 /* Arrange to set up i386_stack_locals for all functions. */
3870 init_machine_status = ix86_init_machine_status;
3872 /* Validate -mregparm= value. */
3873 if (opts_set->x_ix86_regparm)
3875 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3876 warning (0, "-mregparm is ignored in 64-bit mode");
3877 if (opts->x_ix86_regparm > REGPARM_MAX)
3879 error ("-mregparm=%d is not between 0 and %d",
3880 opts->x_ix86_regparm, REGPARM_MAX);
3881 opts->x_ix86_regparm = 0;
3884 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3885 opts->x_ix86_regparm = REGPARM_MAX;
3887 /* Default align_* from the processor table. */
3888 if (opts->x_align_loops == 0)
3890 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3891 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3893 if (opts->x_align_jumps == 0)
3895 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3896 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3898 if (opts->x_align_functions == 0)
3900 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3903 /* Provide default for -mbranch-cost= value. */
3904 if (!opts_set->x_ix86_branch_cost)
3905 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3907 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3909 opts->x_target_flags
3910 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3912 /* Enable by default the SSE and MMX builtins. Do allow the user to
3913 explicitly disable any of these. In particular, disabling SSE and
3914 MMX for kernel code is extremely useful. */
3915 if (!ix86_arch_specified)
3916 opts->x_ix86_isa_flags
3917 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3918 | TARGET_SUBTARGET64_ISA_DEFAULT)
3919 & ~opts->x_ix86_isa_flags_explicit);
3921 if (TARGET_RTD_P (opts->x_target_flags))
3922 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3924 else
3926 opts->x_target_flags
3927 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3929 if (!ix86_arch_specified)
3930 opts->x_ix86_isa_flags
3931 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3933 /* i386 ABI does not specify red zone. It still makes sense to use it
3934 when programmer takes care to stack from being destroyed. */
3935 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3936 opts->x_target_flags |= MASK_NO_RED_ZONE;
3939 /* Keep nonleaf frame pointers. */
3940 if (opts->x_flag_omit_frame_pointer)
3941 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3942 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3943 opts->x_flag_omit_frame_pointer = 1;
3945 /* If we're doing fast math, we don't care about comparison order
3946 wrt NaNs. This lets us use a shorter comparison sequence. */
3947 if (opts->x_flag_finite_math_only)
3948 opts->x_target_flags &= ~MASK_IEEE_FP;
3950 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3951 since the insns won't need emulation. */
3952 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3953 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3955 /* Likewise, if the target doesn't have a 387, or we've specified
3956 software floating point, don't use 387 inline intrinsics. */
3957 if (!TARGET_80387_P (opts->x_target_flags))
3958 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3960 /* Turn on MMX builtins for -msse. */
3961 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3962 opts->x_ix86_isa_flags
3963 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3965 /* Enable SSE prefetch. */
3966 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3967 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3968 x86_prefetch_sse = true;
3970 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3971 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3972 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3973 opts->x_ix86_isa_flags
3974 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3976 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3977 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3978 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3979 opts->x_ix86_isa_flags
3980 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3982 /* Enable lzcnt instruction for -mabm. */
3983 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3984 opts->x_ix86_isa_flags
3985 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3987 /* Validate -mpreferred-stack-boundary= value or default it to
3988 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3989 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3990 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3992 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3993 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3994 int max = (TARGET_SEH ? 4 : 12);
3996 if (opts->x_ix86_preferred_stack_boundary_arg < min
3997 || opts->x_ix86_preferred_stack_boundary_arg > max)
3999 if (min == max)
4000 error ("-mpreferred-stack-boundary is not supported "
4001 "for this target");
4002 else
4003 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4004 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4006 else
4007 ix86_preferred_stack_boundary
4008 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4011 /* Set the default value for -mstackrealign. */
4012 if (opts->x_ix86_force_align_arg_pointer == -1)
4013 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4015 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4017 /* Validate -mincoming-stack-boundary= value or default it to
4018 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4019 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4020 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4022 if (opts->x_ix86_incoming_stack_boundary_arg
4023 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
4024 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4025 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4026 opts->x_ix86_incoming_stack_boundary_arg,
4027 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
4028 else
4030 ix86_user_incoming_stack_boundary
4031 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4032 ix86_incoming_stack_boundary
4033 = ix86_user_incoming_stack_boundary;
4037 #ifndef NO_PROFILE_COUNTERS
4038 if (flag_nop_mcount)
4039 error ("-mnop-mcount is not compatible with this target");
4040 #endif
4041 if (flag_nop_mcount && flag_pic)
4042 error ("-mnop-mcount is not implemented for -fPIC");
4044 /* Accept -msseregparm only if at least SSE support is enabled. */
4045 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4046 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4047 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4049 if (opts_set->x_ix86_fpmath)
4051 if (opts->x_ix86_fpmath & FPMATH_SSE)
4053 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4055 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4056 opts->x_ix86_fpmath = FPMATH_387;
4058 else if ((opts->x_ix86_fpmath & FPMATH_387)
4059 && !TARGET_80387_P (opts->x_target_flags))
4061 warning (0, "387 instruction set disabled, using SSE arithmetics");
4062 opts->x_ix86_fpmath = FPMATH_SSE;
4066 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4067 fpmath=387. The second is however default at many targets since the
4068 extra 80bit precision of temporaries is considered to be part of ABI.
4069 Overwrite the default at least for -ffast-math.
4070 TODO: -mfpmath=both seems to produce same performing code with bit
4071 smaller binaries. It is however not clear if register allocation is
4072 ready for this setting.
4073 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4074 codegen. We may switch to 387 with -ffast-math for size optimized
4075 functions. */
4076 else if (fast_math_flags_set_p (&global_options)
4077 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4078 opts->x_ix86_fpmath = FPMATH_SSE;
4079 else
4080 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4082 /* If the i387 is disabled, then do not return values in it. */
4083 if (!TARGET_80387_P (opts->x_target_flags))
4084 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4086 /* Use external vectorized library in vectorizing intrinsics. */
4087 if (opts_set->x_ix86_veclibabi_type)
4088 switch (opts->x_ix86_veclibabi_type)
4090 case ix86_veclibabi_type_svml:
4091 ix86_veclib_handler = ix86_veclibabi_svml;
4092 break;
4094 case ix86_veclibabi_type_acml:
4095 ix86_veclib_handler = ix86_veclibabi_acml;
4096 break;
4098 default:
4099 gcc_unreachable ();
4102 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4103 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4104 && !opts->x_optimize_size)
4105 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4107 /* If stack probes are required, the space used for large function
4108 arguments on the stack must also be probed, so enable
4109 -maccumulate-outgoing-args so this happens in the prologue. */
4110 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4111 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4113 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4114 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4115 "for correctness", prefix, suffix);
4116 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4119 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4121 char *p;
4122 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4123 p = strchr (internal_label_prefix, 'X');
4124 internal_label_prefix_len = p - internal_label_prefix;
4125 *p = '\0';
4128 /* When scheduling description is not available, disable scheduler pass
4129 so it won't slow down the compilation and make x87 code slower. */
4130 if (!TARGET_SCHEDULE)
4131 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4133 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4134 ix86_tune_cost->simultaneous_prefetches,
4135 opts->x_param_values,
4136 opts_set->x_param_values);
4137 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4138 ix86_tune_cost->prefetch_block,
4139 opts->x_param_values,
4140 opts_set->x_param_values);
4141 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4142 ix86_tune_cost->l1_cache_size,
4143 opts->x_param_values,
4144 opts_set->x_param_values);
4145 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4146 ix86_tune_cost->l2_cache_size,
4147 opts->x_param_values,
4148 opts_set->x_param_values);
4150 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4151 if (opts->x_flag_prefetch_loop_arrays < 0
4152 && HAVE_prefetch
4153 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4154 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4155 opts->x_flag_prefetch_loop_arrays = 1;
4157 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4158 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4159 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4160 targetm.expand_builtin_va_start = NULL;
4162 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4164 ix86_gen_leave = gen_leave_rex64;
4165 if (Pmode == DImode)
4167 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4168 ix86_gen_tls_local_dynamic_base_64
4169 = gen_tls_local_dynamic_base_64_di;
4171 else
4173 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4174 ix86_gen_tls_local_dynamic_base_64
4175 = gen_tls_local_dynamic_base_64_si;
4178 else
4179 ix86_gen_leave = gen_leave;
4181 if (Pmode == DImode)
4183 ix86_gen_add3 = gen_adddi3;
4184 ix86_gen_sub3 = gen_subdi3;
4185 ix86_gen_sub3_carry = gen_subdi3_carry;
4186 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4187 ix86_gen_andsp = gen_anddi3;
4188 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4189 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4190 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4191 ix86_gen_monitor = gen_sse3_monitor_di;
4193 else
4195 ix86_gen_add3 = gen_addsi3;
4196 ix86_gen_sub3 = gen_subsi3;
4197 ix86_gen_sub3_carry = gen_subsi3_carry;
4198 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4199 ix86_gen_andsp = gen_andsi3;
4200 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4201 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4202 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4203 ix86_gen_monitor = gen_sse3_monitor_si;
4206 #ifdef USE_IX86_CLD
4207 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4208 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4209 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4210 #endif
4212 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4214 if (opts->x_flag_fentry > 0)
4215 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4216 "with -fpic");
4217 opts->x_flag_fentry = 0;
4219 else if (TARGET_SEH)
4221 if (opts->x_flag_fentry == 0)
4222 sorry ("-mno-fentry isn%'t compatible with SEH");
4223 opts->x_flag_fentry = 1;
4225 else if (opts->x_flag_fentry < 0)
4227 #if defined(PROFILE_BEFORE_PROLOGUE)
4228 opts->x_flag_fentry = 1;
4229 #else
4230 opts->x_flag_fentry = 0;
4231 #endif
4234 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4235 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4236 AVX unaligned load/store. */
4237 if (!opts->x_optimize_size)
4239 if (flag_expensive_optimizations
4240 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4241 opts->x_target_flags |= MASK_VZEROUPPER;
4242 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4243 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4244 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4245 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4246 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4247 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4248 /* Enable 128-bit AVX instruction generation
4249 for the auto-vectorizer. */
4250 if (TARGET_AVX128_OPTIMAL
4251 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4252 opts->x_target_flags |= MASK_PREFER_AVX128;
4255 if (opts->x_ix86_recip_name)
4257 char *p = ASTRDUP (opts->x_ix86_recip_name);
4258 char *q;
4259 unsigned int mask, i;
4260 bool invert;
4262 while ((q = strtok (p, ",")) != NULL)
4264 p = NULL;
4265 if (*q == '!')
4267 invert = true;
4268 q++;
4270 else
4271 invert = false;
4273 if (!strcmp (q, "default"))
4274 mask = RECIP_MASK_ALL;
4275 else
4277 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4278 if (!strcmp (q, recip_options[i].string))
4280 mask = recip_options[i].mask;
4281 break;
4284 if (i == ARRAY_SIZE (recip_options))
4286 error ("unknown option for -mrecip=%s", q);
4287 invert = false;
4288 mask = RECIP_MASK_NONE;
4292 opts->x_recip_mask_explicit |= mask;
4293 if (invert)
4294 opts->x_recip_mask &= ~mask;
4295 else
4296 opts->x_recip_mask |= mask;
4300 if (TARGET_RECIP_P (opts->x_target_flags))
4301 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4302 else if (opts_set->x_target_flags & MASK_RECIP)
4303 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4305 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4306 for 64-bit Bionic. */
4307 if (TARGET_HAS_BIONIC
4308 && !(opts_set->x_target_flags
4309 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4310 opts->x_target_flags |= (TARGET_64BIT
4311 ? MASK_LONG_DOUBLE_128
4312 : MASK_LONG_DOUBLE_64);
4314 /* Only one of them can be active. */
4315 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4316 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4318 /* Save the initial options in case the user does function specific
4319 options. */
4320 if (main_args_p)
4321 target_option_default_node = target_option_current_node
4322 = build_target_option_node (opts);
4324 /* Handle stack protector */
4325 if (!opts_set->x_ix86_stack_protector_guard)
4326 opts->x_ix86_stack_protector_guard
4327 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4329 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4330 if (opts->x_ix86_tune_memcpy_strategy)
4332 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4333 ix86_parse_stringop_strategy_string (str, false);
4334 free (str);
4337 if (opts->x_ix86_tune_memset_strategy)
4339 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4340 ix86_parse_stringop_strategy_string (str, true);
4341 free (str);
4345 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4347 static void
4348 ix86_option_override (void)
4350 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4351 struct register_pass_info insert_vzeroupper_info
4352 = { pass_insert_vzeroupper, "reload",
4353 1, PASS_POS_INSERT_AFTER
4356 ix86_option_override_internal (true, &global_options, &global_options_set);
4359 /* This needs to be done at start up. It's convenient to do it here. */
4360 register_pass (&insert_vzeroupper_info);
4363 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4364 static char *
4365 ix86_offload_options (void)
4367 if (TARGET_LP64)
4368 return xstrdup ("-foffload-abi=lp64");
4369 return xstrdup ("-foffload-abi=ilp32");
4372 /* Update register usage after having seen the compiler flags. */
4374 static void
4375 ix86_conditional_register_usage (void)
4377 int i, c_mask;
4378 unsigned int j;
4380 /* The PIC register, if it exists, is fixed. */
4381 j = PIC_OFFSET_TABLE_REGNUM;
4382 if (j != INVALID_REGNUM)
4383 fixed_regs[j] = call_used_regs[j] = 1;
4385 /* For 32-bit targets, squash the REX registers. */
4386 if (! TARGET_64BIT)
4388 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4389 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4390 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4391 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4392 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4393 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4396 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4397 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4398 : TARGET_64BIT ? (1 << 2)
4399 : (1 << 1));
4401 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4403 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4405 /* Set/reset conditionally defined registers from
4406 CALL_USED_REGISTERS initializer. */
4407 if (call_used_regs[i] > 1)
4408 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4410 /* Calculate registers of CLOBBERED_REGS register set
4411 as call used registers from GENERAL_REGS register set. */
4412 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4413 && call_used_regs[i])
4414 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4417 /* If MMX is disabled, squash the registers. */
4418 if (! TARGET_MMX)
4419 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4420 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4421 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4423 /* If SSE is disabled, squash the registers. */
4424 if (! TARGET_SSE)
4425 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4426 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4427 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4429 /* If the FPU is disabled, squash the registers. */
4430 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4431 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4432 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4433 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4435 /* If AVX512F is disabled, squash the registers. */
4436 if (! TARGET_AVX512F)
4438 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4439 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4441 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4442 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4445 /* If MPX is disabled, squash the registers. */
4446 if (! TARGET_MPX)
4447 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4448 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4452 /* Save the current options */
4454 static void
4455 ix86_function_specific_save (struct cl_target_option *ptr,
4456 struct gcc_options *opts)
4458 ptr->arch = ix86_arch;
4459 ptr->schedule = ix86_schedule;
4460 ptr->tune = ix86_tune;
4461 ptr->branch_cost = ix86_branch_cost;
4462 ptr->tune_defaulted = ix86_tune_defaulted;
4463 ptr->arch_specified = ix86_arch_specified;
4464 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4465 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4466 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4467 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4468 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4469 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4470 ptr->x_ix86_abi = opts->x_ix86_abi;
4471 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4472 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4473 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4474 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4475 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4476 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4477 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4478 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4479 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4480 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4481 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4482 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4483 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4484 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4485 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4486 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4487 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4488 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4489 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4490 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4492 /* The fields are char but the variables are not; make sure the
4493 values fit in the fields. */
4494 gcc_assert (ptr->arch == ix86_arch);
4495 gcc_assert (ptr->schedule == ix86_schedule);
4496 gcc_assert (ptr->tune == ix86_tune);
4497 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4500 /* Restore the current options */
4502 static void
4503 ix86_function_specific_restore (struct gcc_options *opts,
4504 struct cl_target_option *ptr)
4506 enum processor_type old_tune = ix86_tune;
4507 enum processor_type old_arch = ix86_arch;
4508 unsigned int ix86_arch_mask;
4509 int i;
4511 /* We don't change -fPIC. */
4512 opts->x_flag_pic = flag_pic;
4514 ix86_arch = (enum processor_type) ptr->arch;
4515 ix86_schedule = (enum attr_cpu) ptr->schedule;
4516 ix86_tune = (enum processor_type) ptr->tune;
4517 opts->x_ix86_branch_cost = ptr->branch_cost;
4518 ix86_tune_defaulted = ptr->tune_defaulted;
4519 ix86_arch_specified = ptr->arch_specified;
4520 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4521 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4522 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4523 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4524 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4525 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4526 opts->x_ix86_abi = ptr->x_ix86_abi;
4527 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4528 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4529 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4530 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4531 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4532 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4533 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4534 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4535 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4536 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4537 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4538 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4539 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4540 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4541 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4542 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4543 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4544 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4545 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4546 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4548 /* Recreate the arch feature tests if the arch changed */
4549 if (old_arch != ix86_arch)
4551 ix86_arch_mask = 1u << ix86_arch;
4552 for (i = 0; i < X86_ARCH_LAST; ++i)
4553 ix86_arch_features[i]
4554 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4557 /* Recreate the tune optimization tests */
4558 if (old_tune != ix86_tune)
4559 set_ix86_tune_features (ix86_tune, false);
4562 /* Print the current options */
4564 static void
4565 ix86_function_specific_print (FILE *file, int indent,
4566 struct cl_target_option *ptr)
4568 char *target_string
4569 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4570 NULL, NULL, ptr->x_ix86_fpmath, false);
4572 gcc_assert (ptr->arch < PROCESSOR_max);
4573 fprintf (file, "%*sarch = %d (%s)\n",
4574 indent, "",
4575 ptr->arch, processor_target_table[ptr->arch].name);
4577 gcc_assert (ptr->tune < PROCESSOR_max);
4578 fprintf (file, "%*stune = %d (%s)\n",
4579 indent, "",
4580 ptr->tune, processor_target_table[ptr->tune].name);
4582 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4584 if (target_string)
4586 fprintf (file, "%*s%s\n", indent, "", target_string);
4587 free (target_string);
4592 /* Inner function to process the attribute((target(...))), take an argument and
4593 set the current options from the argument. If we have a list, recursively go
4594 over the list. */
4596 static bool
4597 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4598 struct gcc_options *opts,
4599 struct gcc_options *opts_set,
4600 struct gcc_options *enum_opts_set)
4602 char *next_optstr;
4603 bool ret = true;
4605 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4606 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4607 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4608 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4609 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4611 enum ix86_opt_type
4613 ix86_opt_unknown,
4614 ix86_opt_yes,
4615 ix86_opt_no,
4616 ix86_opt_str,
4617 ix86_opt_enum,
4618 ix86_opt_isa
4621 static const struct
4623 const char *string;
4624 size_t len;
4625 enum ix86_opt_type type;
4626 int opt;
4627 int mask;
4628 } attrs[] = {
4629 /* isa options */
4630 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4631 IX86_ATTR_ISA ("abm", OPT_mabm),
4632 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4633 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4634 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4635 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4636 IX86_ATTR_ISA ("aes", OPT_maes),
4637 IX86_ATTR_ISA ("sha", OPT_msha),
4638 IX86_ATTR_ISA ("avx", OPT_mavx),
4639 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4640 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4641 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4642 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4643 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4644 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4645 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4646 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4647 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4648 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4649 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4650 IX86_ATTR_ISA ("sse", OPT_msse),
4651 IX86_ATTR_ISA ("sse2", OPT_msse2),
4652 IX86_ATTR_ISA ("sse3", OPT_msse3),
4653 IX86_ATTR_ISA ("sse4", OPT_msse4),
4654 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4655 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4656 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4657 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4658 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4659 IX86_ATTR_ISA ("fma", OPT_mfma),
4660 IX86_ATTR_ISA ("xop", OPT_mxop),
4661 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4662 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4663 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4664 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4665 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4666 IX86_ATTR_ISA ("hle", OPT_mhle),
4667 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4668 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4669 IX86_ATTR_ISA ("adx", OPT_madx),
4670 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4671 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4672 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4673 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4674 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4675 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4676 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4677 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
4678 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
4679 IX86_ATTR_ISA ("clwb", OPT_mclwb),
4680 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
4682 /* enum options */
4683 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4685 /* string options */
4686 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4687 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4689 /* flag options */
4690 IX86_ATTR_YES ("cld",
4691 OPT_mcld,
4692 MASK_CLD),
4694 IX86_ATTR_NO ("fancy-math-387",
4695 OPT_mfancy_math_387,
4696 MASK_NO_FANCY_MATH_387),
4698 IX86_ATTR_YES ("ieee-fp",
4699 OPT_mieee_fp,
4700 MASK_IEEE_FP),
4702 IX86_ATTR_YES ("inline-all-stringops",
4703 OPT_minline_all_stringops,
4704 MASK_INLINE_ALL_STRINGOPS),
4706 IX86_ATTR_YES ("inline-stringops-dynamically",
4707 OPT_minline_stringops_dynamically,
4708 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4710 IX86_ATTR_NO ("align-stringops",
4711 OPT_mno_align_stringops,
4712 MASK_NO_ALIGN_STRINGOPS),
4714 IX86_ATTR_YES ("recip",
4715 OPT_mrecip,
4716 MASK_RECIP),
4720 /* If this is a list, recurse to get the options. */
4721 if (TREE_CODE (args) == TREE_LIST)
4723 bool ret = true;
4725 for (; args; args = TREE_CHAIN (args))
4726 if (TREE_VALUE (args)
4727 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4728 p_strings, opts, opts_set,
4729 enum_opts_set))
4730 ret = false;
4732 return ret;
4735 else if (TREE_CODE (args) != STRING_CST)
4737 error ("attribute %<target%> argument not a string");
4738 return false;
4741 /* Handle multiple arguments separated by commas. */
4742 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4744 while (next_optstr && *next_optstr != '\0')
4746 char *p = next_optstr;
4747 char *orig_p = p;
4748 char *comma = strchr (next_optstr, ',');
4749 const char *opt_string;
4750 size_t len, opt_len;
4751 int opt;
4752 bool opt_set_p;
4753 char ch;
4754 unsigned i;
4755 enum ix86_opt_type type = ix86_opt_unknown;
4756 int mask = 0;
4758 if (comma)
4760 *comma = '\0';
4761 len = comma - next_optstr;
4762 next_optstr = comma + 1;
4764 else
4766 len = strlen (p);
4767 next_optstr = NULL;
4770 /* Recognize no-xxx. */
4771 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4773 opt_set_p = false;
4774 p += 3;
4775 len -= 3;
4777 else
4778 opt_set_p = true;
4780 /* Find the option. */
4781 ch = *p;
4782 opt = N_OPTS;
4783 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4785 type = attrs[i].type;
4786 opt_len = attrs[i].len;
4787 if (ch == attrs[i].string[0]
4788 && ((type != ix86_opt_str && type != ix86_opt_enum)
4789 ? len == opt_len
4790 : len > opt_len)
4791 && memcmp (p, attrs[i].string, opt_len) == 0)
4793 opt = attrs[i].opt;
4794 mask = attrs[i].mask;
4795 opt_string = attrs[i].string;
4796 break;
4800 /* Process the option. */
4801 if (opt == N_OPTS)
4803 error ("attribute(target(\"%s\")) is unknown", orig_p);
4804 ret = false;
4807 else if (type == ix86_opt_isa)
4809 struct cl_decoded_option decoded;
4811 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4812 ix86_handle_option (opts, opts_set,
4813 &decoded, input_location);
4816 else if (type == ix86_opt_yes || type == ix86_opt_no)
4818 if (type == ix86_opt_no)
4819 opt_set_p = !opt_set_p;
4821 if (opt_set_p)
4822 opts->x_target_flags |= mask;
4823 else
4824 opts->x_target_flags &= ~mask;
4827 else if (type == ix86_opt_str)
4829 if (p_strings[opt])
4831 error ("option(\"%s\") was already specified", opt_string);
4832 ret = false;
4834 else
4835 p_strings[opt] = xstrdup (p + opt_len);
4838 else if (type == ix86_opt_enum)
4840 bool arg_ok;
4841 int value;
4843 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4844 if (arg_ok)
4845 set_option (opts, enum_opts_set, opt, value,
4846 p + opt_len, DK_UNSPECIFIED, input_location,
4847 global_dc);
4848 else
4850 error ("attribute(target(\"%s\")) is unknown", orig_p);
4851 ret = false;
4855 else
4856 gcc_unreachable ();
4859 return ret;
4862 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4864 tree
4865 ix86_valid_target_attribute_tree (tree args,
4866 struct gcc_options *opts,
4867 struct gcc_options *opts_set)
4869 const char *orig_arch_string = opts->x_ix86_arch_string;
4870 const char *orig_tune_string = opts->x_ix86_tune_string;
4871 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4872 int orig_tune_defaulted = ix86_tune_defaulted;
4873 int orig_arch_specified = ix86_arch_specified;
4874 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4875 tree t = NULL_TREE;
4876 int i;
4877 struct cl_target_option *def
4878 = TREE_TARGET_OPTION (target_option_default_node);
4879 struct gcc_options enum_opts_set;
4881 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4883 /* Process each of the options on the chain. */
4884 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4885 opts_set, &enum_opts_set))
4886 return error_mark_node;
4888 /* If the changed options are different from the default, rerun
4889 ix86_option_override_internal, and then save the options away.
4890 The string options are are attribute options, and will be undone
4891 when we copy the save structure. */
4892 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4893 || opts->x_target_flags != def->x_target_flags
4894 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4895 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4896 || enum_opts_set.x_ix86_fpmath)
4898 /* If we are using the default tune= or arch=, undo the string assigned,
4899 and use the default. */
4900 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4901 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4902 else if (!orig_arch_specified)
4903 opts->x_ix86_arch_string = NULL;
4905 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4906 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4907 else if (orig_tune_defaulted)
4908 opts->x_ix86_tune_string = NULL;
4910 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4911 if (enum_opts_set.x_ix86_fpmath)
4912 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4913 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4914 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4916 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4917 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4920 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4921 ix86_option_override_internal (false, opts, opts_set);
4923 /* Add any builtin functions with the new isa if any. */
4924 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4926 /* Save the current options unless we are validating options for
4927 #pragma. */
4928 t = build_target_option_node (opts);
4930 opts->x_ix86_arch_string = orig_arch_string;
4931 opts->x_ix86_tune_string = orig_tune_string;
4932 opts_set->x_ix86_fpmath = orig_fpmath_set;
4934 /* Free up memory allocated to hold the strings */
4935 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4936 free (option_strings[i]);
4939 return t;
4942 /* Hook to validate attribute((target("string"))). */
4944 static bool
4945 ix86_valid_target_attribute_p (tree fndecl,
4946 tree ARG_UNUSED (name),
4947 tree args,
4948 int ARG_UNUSED (flags))
4950 struct gcc_options func_options;
4951 tree new_target, new_optimize;
4952 bool ret = true;
4954 /* attribute((target("default"))) does nothing, beyond
4955 affecting multi-versioning. */
4956 if (TREE_VALUE (args)
4957 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4958 && TREE_CHAIN (args) == NULL_TREE
4959 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4960 return true;
4962 tree old_optimize = build_optimization_node (&global_options);
4964 /* Get the optimization options of the current function. */
4965 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4967 if (!func_optimize)
4968 func_optimize = old_optimize;
4970 /* Init func_options. */
4971 memset (&func_options, 0, sizeof (func_options));
4972 init_options_struct (&func_options, NULL);
4973 lang_hooks.init_options_struct (&func_options);
4975 cl_optimization_restore (&func_options,
4976 TREE_OPTIMIZATION (func_optimize));
4978 /* Initialize func_options to the default before its target options can
4979 be set. */
4980 cl_target_option_restore (&func_options,
4981 TREE_TARGET_OPTION (target_option_default_node));
4983 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4984 &global_options_set);
4986 new_optimize = build_optimization_node (&func_options);
4988 if (new_target == error_mark_node)
4989 ret = false;
4991 else if (fndecl && new_target)
4993 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4995 if (old_optimize != new_optimize)
4996 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4999 return ret;
5003 /* Hook to determine if one function can safely inline another. */
5005 static bool
5006 ix86_can_inline_p (tree caller, tree callee)
5008 bool ret = false;
5009 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5010 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5012 /* If callee has no option attributes, then it is ok to inline. */
5013 if (!callee_tree)
5014 ret = true;
5016 /* If caller has no option attributes, but callee does then it is not ok to
5017 inline. */
5018 else if (!caller_tree)
5019 ret = false;
5021 else
5023 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5024 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5026 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
5027 can inline a SSE2 function but a SSE2 function can't inline a SSE4
5028 function. */
5029 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5030 != callee_opts->x_ix86_isa_flags)
5031 ret = false;
5033 /* See if we have the same non-isa options. */
5034 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5035 ret = false;
5037 /* See if arch, tune, etc. are the same. */
5038 else if (caller_opts->arch != callee_opts->arch)
5039 ret = false;
5041 else if (caller_opts->tune != callee_opts->tune)
5042 ret = false;
5044 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
5045 ret = false;
5047 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5048 ret = false;
5050 else
5051 ret = true;
5054 return ret;
5058 /* Remember the last target of ix86_set_current_function. */
5059 static GTY(()) tree ix86_previous_fndecl;
5061 /* Set target globals to default. */
5063 static void
5064 ix86_reset_to_default_globals (void)
5066 tree old_tree = (ix86_previous_fndecl
5067 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
5068 : NULL_TREE);
5070 if (old_tree)
5072 tree new_tree = target_option_current_node;
5073 cl_target_option_restore (&global_options,
5074 TREE_TARGET_OPTION (new_tree));
5075 if (TREE_TARGET_GLOBALS (new_tree))
5076 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5077 else if (new_tree == target_option_default_node)
5078 restore_target_globals (&default_target_globals);
5079 else
5080 TREE_TARGET_GLOBALS (new_tree)
5081 = save_target_globals_default_opts ();
5085 /* Invalidate ix86_previous_fndecl cache. */
5086 void
5087 ix86_reset_previous_fndecl (void)
5089 ix86_reset_to_default_globals ();
5090 ix86_previous_fndecl = NULL_TREE;
5093 /* Establish appropriate back-end context for processing the function
5094 FNDECL. The argument might be NULL to indicate processing at top
5095 level, outside of any function scope. */
5096 static void
5097 ix86_set_current_function (tree fndecl)
5099 /* Only change the context if the function changes. This hook is called
5100 several times in the course of compiling a function, and we don't want to
5101 slow things down too much or call target_reinit when it isn't safe. */
5102 if (fndecl && fndecl != ix86_previous_fndecl)
5104 tree old_tree = (ix86_previous_fndecl
5105 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
5106 : NULL_TREE);
5108 tree new_tree = (fndecl
5109 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
5110 : NULL_TREE);
5112 if (old_tree == new_tree)
5115 else if (new_tree && new_tree != target_option_default_node)
5117 cl_target_option_restore (&global_options,
5118 TREE_TARGET_OPTION (new_tree));
5119 if (TREE_TARGET_GLOBALS (new_tree))
5120 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5121 else
5122 TREE_TARGET_GLOBALS (new_tree)
5123 = save_target_globals_default_opts ();
5126 else if (old_tree && old_tree != target_option_default_node)
5127 ix86_reset_to_default_globals ();
5128 ix86_previous_fndecl = fndecl;
5133 /* Return true if this goes in large data/bss. */
5135 static bool
5136 ix86_in_large_data_p (tree exp)
5138 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5139 return false;
5141 /* Functions are never large data. */
5142 if (TREE_CODE (exp) == FUNCTION_DECL)
5143 return false;
5145 /* Automatic variables are never large data. */
5146 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5147 return false;
5149 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5151 const char *section = DECL_SECTION_NAME (exp);
5152 if (strcmp (section, ".ldata") == 0
5153 || strcmp (section, ".lbss") == 0)
5154 return true;
5155 return false;
5157 else
5159 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5161 /* If this is an incomplete type with size 0, then we can't put it
5162 in data because it might be too big when completed. Also,
5163 int_size_in_bytes returns -1 if size can vary or is larger than
5164 an integer in which case also it is safer to assume that it goes in
5165 large data. */
5166 if (size <= 0 || size > ix86_section_threshold)
5167 return true;
5170 return false;
5173 /* Switch to the appropriate section for output of DECL.
5174 DECL is either a `VAR_DECL' node or a constant of some sort.
5175 RELOC indicates whether forming the initial value of DECL requires
5176 link-time relocations. */
5178 ATTRIBUTE_UNUSED static section *
5179 x86_64_elf_select_section (tree decl, int reloc,
5180 unsigned HOST_WIDE_INT align)
5182 if (ix86_in_large_data_p (decl))
5184 const char *sname = NULL;
5185 unsigned int flags = SECTION_WRITE;
5186 switch (categorize_decl_for_section (decl, reloc))
5188 case SECCAT_DATA:
5189 sname = ".ldata";
5190 break;
5191 case SECCAT_DATA_REL:
5192 sname = ".ldata.rel";
5193 break;
5194 case SECCAT_DATA_REL_LOCAL:
5195 sname = ".ldata.rel.local";
5196 break;
5197 case SECCAT_DATA_REL_RO:
5198 sname = ".ldata.rel.ro";
5199 break;
5200 case SECCAT_DATA_REL_RO_LOCAL:
5201 sname = ".ldata.rel.ro.local";
5202 break;
5203 case SECCAT_BSS:
5204 sname = ".lbss";
5205 flags |= SECTION_BSS;
5206 break;
5207 case SECCAT_RODATA:
5208 case SECCAT_RODATA_MERGE_STR:
5209 case SECCAT_RODATA_MERGE_STR_INIT:
5210 case SECCAT_RODATA_MERGE_CONST:
5211 sname = ".lrodata";
5212 flags = 0;
5213 break;
5214 case SECCAT_SRODATA:
5215 case SECCAT_SDATA:
5216 case SECCAT_SBSS:
5217 gcc_unreachable ();
5218 case SECCAT_TEXT:
5219 case SECCAT_TDATA:
5220 case SECCAT_TBSS:
5221 /* We don't split these for medium model. Place them into
5222 default sections and hope for best. */
5223 break;
5225 if (sname)
5227 /* We might get called with string constants, but get_named_section
5228 doesn't like them as they are not DECLs. Also, we need to set
5229 flags in that case. */
5230 if (!DECL_P (decl))
5231 return get_section (sname, flags, NULL);
5232 return get_named_section (decl, sname, reloc);
5235 return default_elf_select_section (decl, reloc, align);
5238 /* Select a set of attributes for section NAME based on the properties
5239 of DECL and whether or not RELOC indicates that DECL's initializer
5240 might contain runtime relocations. */
5242 static unsigned int ATTRIBUTE_UNUSED
5243 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5245 unsigned int flags = default_section_type_flags (decl, name, reloc);
5247 if (decl == NULL_TREE
5248 && (strcmp (name, ".ldata.rel.ro") == 0
5249 || strcmp (name, ".ldata.rel.ro.local") == 0))
5250 flags |= SECTION_RELRO;
5252 if (strcmp (name, ".lbss") == 0
5253 || strncmp (name, ".lbss.", 5) == 0
5254 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5255 flags |= SECTION_BSS;
5257 return flags;
5260 /* Build up a unique section name, expressed as a
5261 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5262 RELOC indicates whether the initial value of EXP requires
5263 link-time relocations. */
5265 static void ATTRIBUTE_UNUSED
5266 x86_64_elf_unique_section (tree decl, int reloc)
5268 if (ix86_in_large_data_p (decl))
5270 const char *prefix = NULL;
5271 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5272 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5274 switch (categorize_decl_for_section (decl, reloc))
5276 case SECCAT_DATA:
5277 case SECCAT_DATA_REL:
5278 case SECCAT_DATA_REL_LOCAL:
5279 case SECCAT_DATA_REL_RO:
5280 case SECCAT_DATA_REL_RO_LOCAL:
5281 prefix = one_only ? ".ld" : ".ldata";
5282 break;
5283 case SECCAT_BSS:
5284 prefix = one_only ? ".lb" : ".lbss";
5285 break;
5286 case SECCAT_RODATA:
5287 case SECCAT_RODATA_MERGE_STR:
5288 case SECCAT_RODATA_MERGE_STR_INIT:
5289 case SECCAT_RODATA_MERGE_CONST:
5290 prefix = one_only ? ".lr" : ".lrodata";
5291 break;
5292 case SECCAT_SRODATA:
5293 case SECCAT_SDATA:
5294 case SECCAT_SBSS:
5295 gcc_unreachable ();
5296 case SECCAT_TEXT:
5297 case SECCAT_TDATA:
5298 case SECCAT_TBSS:
5299 /* We don't split these for medium model. Place them into
5300 default sections and hope for best. */
5301 break;
5303 if (prefix)
5305 const char *name, *linkonce;
5306 char *string;
5308 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5309 name = targetm.strip_name_encoding (name);
5311 /* If we're using one_only, then there needs to be a .gnu.linkonce
5312 prefix to the section name. */
5313 linkonce = one_only ? ".gnu.linkonce" : "";
5315 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5317 set_decl_section_name (decl, string);
5318 return;
5321 default_unique_section (decl, reloc);
5324 #ifdef COMMON_ASM_OP
5325 /* This says how to output assembler code to declare an
5326 uninitialized external linkage data object.
5328 For medium model x86-64 we need to use .largecomm opcode for
5329 large objects. */
5330 void
5331 x86_elf_aligned_common (FILE *file,
5332 const char *name, unsigned HOST_WIDE_INT size,
5333 int align)
5335 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5336 && size > (unsigned int)ix86_section_threshold)
5337 fputs ("\t.largecomm\t", file);
5338 else
5339 fputs (COMMON_ASM_OP, file);
5340 assemble_name (file, name);
5341 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5342 size, align / BITS_PER_UNIT);
5344 #endif
5346 /* Utility function for targets to use in implementing
5347 ASM_OUTPUT_ALIGNED_BSS. */
5349 void
5350 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5351 unsigned HOST_WIDE_INT size, int align)
5353 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5354 && size > (unsigned int)ix86_section_threshold)
5355 switch_to_section (get_named_section (decl, ".lbss", 0));
5356 else
5357 switch_to_section (bss_section);
5358 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5359 #ifdef ASM_DECLARE_OBJECT_NAME
5360 last_assemble_variable_decl = decl;
5361 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5362 #else
5363 /* Standard thing is just output label for the object. */
5364 ASM_OUTPUT_LABEL (file, name);
5365 #endif /* ASM_DECLARE_OBJECT_NAME */
5366 ASM_OUTPUT_SKIP (file, size ? size : 1);
5369 /* Decide whether we must probe the stack before any space allocation
5370 on this target. It's essentially TARGET_STACK_PROBE except when
5371 -fstack-check causes the stack to be already probed differently. */
5373 bool
5374 ix86_target_stack_probe (void)
5376 /* Do not probe the stack twice if static stack checking is enabled. */
5377 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5378 return false;
5380 return TARGET_STACK_PROBE;
5383 /* Decide whether we can make a sibling call to a function. DECL is the
5384 declaration of the function being targeted by the call and EXP is the
5385 CALL_EXPR representing the call. */
5387 static bool
5388 ix86_function_ok_for_sibcall (tree decl, tree exp)
5390 tree type, decl_or_type;
5391 rtx a, b;
5393 /* If we are generating position-independent code, we cannot sibcall
5394 optimize any indirect call, or a direct call to a global function,
5395 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5396 if (!TARGET_MACHO
5397 && !TARGET_64BIT
5398 && flag_pic
5399 && (!decl || !targetm.binds_local_p (decl)))
5400 return false;
5402 /* If we need to align the outgoing stack, then sibcalling would
5403 unalign the stack, which may break the called function. */
5404 if (ix86_minimum_incoming_stack_boundary (true)
5405 < PREFERRED_STACK_BOUNDARY)
5406 return false;
5408 if (decl)
5410 decl_or_type = decl;
5411 type = TREE_TYPE (decl);
5413 else
5415 /* We're looking at the CALL_EXPR, we need the type of the function. */
5416 type = CALL_EXPR_FN (exp); /* pointer expression */
5417 type = TREE_TYPE (type); /* pointer type */
5418 type = TREE_TYPE (type); /* function type */
5419 decl_or_type = type;
5422 /* Check that the return value locations are the same. Like
5423 if we are returning floats on the 80387 register stack, we cannot
5424 make a sibcall from a function that doesn't return a float to a
5425 function that does or, conversely, from a function that does return
5426 a float to a function that doesn't; the necessary stack adjustment
5427 would not be executed. This is also the place we notice
5428 differences in the return value ABI. Note that it is ok for one
5429 of the functions to have void return type as long as the return
5430 value of the other is passed in a register. */
5431 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5432 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5433 cfun->decl, false);
5434 if (STACK_REG_P (a) || STACK_REG_P (b))
5436 if (!rtx_equal_p (a, b))
5437 return false;
5439 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5441 else if (!rtx_equal_p (a, b))
5442 return false;
5444 if (TARGET_64BIT)
5446 /* The SYSV ABI has more call-clobbered registers;
5447 disallow sibcalls from MS to SYSV. */
5448 if (cfun->machine->call_abi == MS_ABI
5449 && ix86_function_type_abi (type) == SYSV_ABI)
5450 return false;
5452 else
5454 /* If this call is indirect, we'll need to be able to use a
5455 call-clobbered register for the address of the target function.
5456 Make sure that all such registers are not used for passing
5457 parameters. Note that DLLIMPORT functions are indirect. */
5458 if (!decl
5459 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5461 if (ix86_function_regparm (type, NULL) >= 3)
5463 /* ??? Need to count the actual number of registers to be used,
5464 not the possible number of registers. Fix later. */
5465 return false;
5470 /* Otherwise okay. That also includes certain types of indirect calls. */
5471 return true;
5474 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5475 and "sseregparm" calling convention attributes;
5476 arguments as in struct attribute_spec.handler. */
5478 static tree
5479 ix86_handle_cconv_attribute (tree *node, tree name,
5480 tree args,
5481 int,
5482 bool *no_add_attrs)
5484 if (TREE_CODE (*node) != FUNCTION_TYPE
5485 && TREE_CODE (*node) != METHOD_TYPE)
5487 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5488 name);
5489 *no_add_attrs = true;
5490 return NULL_TREE;
5493 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5494 if (is_attribute_p ("regparm", name))
5496 tree cst;
5498 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5500 error ("fastcall and regparm attributes are not compatible");
5503 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5505 error ("regparam and thiscall attributes are not compatible");
5508 cst = TREE_VALUE (args);
5509 if (TREE_CODE (cst) != INTEGER_CST)
5511 warning (OPT_Wattributes,
5512 "%qE attribute requires an integer constant argument",
5513 name);
5514 *no_add_attrs = true;
5516 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5518 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5519 name, REGPARM_MAX);
5520 *no_add_attrs = true;
5523 return NULL_TREE;
5526 if (TARGET_64BIT)
5528 /* Do not warn when emulating the MS ABI. */
5529 if ((TREE_CODE (*node) != FUNCTION_TYPE
5530 && TREE_CODE (*node) != METHOD_TYPE)
5531 || ix86_function_type_abi (*node) != MS_ABI)
5532 warning (OPT_Wattributes, "%qE attribute ignored",
5533 name);
5534 *no_add_attrs = true;
5535 return NULL_TREE;
5538 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5539 if (is_attribute_p ("fastcall", name))
5541 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5543 error ("fastcall and cdecl attributes are not compatible");
5545 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5547 error ("fastcall and stdcall attributes are not compatible");
5549 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5551 error ("fastcall and regparm attributes are not compatible");
5553 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5555 error ("fastcall and thiscall attributes are not compatible");
5559 /* Can combine stdcall with fastcall (redundant), regparm and
5560 sseregparm. */
5561 else if (is_attribute_p ("stdcall", name))
5563 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5565 error ("stdcall and cdecl attributes are not compatible");
5567 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5569 error ("stdcall and fastcall attributes are not compatible");
5571 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5573 error ("stdcall and thiscall attributes are not compatible");
5577 /* Can combine cdecl with regparm and sseregparm. */
5578 else if (is_attribute_p ("cdecl", name))
5580 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5582 error ("stdcall and cdecl attributes are not compatible");
5584 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5586 error ("fastcall and cdecl attributes are not compatible");
5588 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5590 error ("cdecl and thiscall attributes are not compatible");
5593 else if (is_attribute_p ("thiscall", name))
5595 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5596 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5597 name);
5598 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5600 error ("stdcall and thiscall attributes are not compatible");
5602 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5604 error ("fastcall and thiscall attributes are not compatible");
5606 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5608 error ("cdecl and thiscall attributes are not compatible");
5612 /* Can combine sseregparm with all attributes. */
5614 return NULL_TREE;
5617 /* The transactional memory builtins are implicitly regparm or fastcall
5618 depending on the ABI. Override the generic do-nothing attribute that
5619 these builtins were declared with, and replace it with one of the two
5620 attributes that we expect elsewhere. */
5622 static tree
5623 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5624 int flags, bool *no_add_attrs)
5626 tree alt;
5628 /* In no case do we want to add the placeholder attribute. */
5629 *no_add_attrs = true;
5631 /* The 64-bit ABI is unchanged for transactional memory. */
5632 if (TARGET_64BIT)
5633 return NULL_TREE;
5635 /* ??? Is there a better way to validate 32-bit windows? We have
5636 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5637 if (CHECK_STACK_LIMIT > 0)
5638 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5639 else
5641 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5642 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5644 if (TYPE_P (*node))
5645 type_attributes (node, alt, flags);
5646 else
5647 decl_attributes (node, alt, flags);
5649 return NULL_TREE;
5652 /* This function determines from TYPE the calling-convention. */
5654 unsigned int
5655 ix86_get_callcvt (const_tree type)
5657 unsigned int ret = 0;
5658 bool is_stdarg;
5659 tree attrs;
5661 if (TARGET_64BIT)
5662 return IX86_CALLCVT_CDECL;
5664 attrs = TYPE_ATTRIBUTES (type);
5665 if (attrs != NULL_TREE)
5667 if (lookup_attribute ("cdecl", attrs))
5668 ret |= IX86_CALLCVT_CDECL;
5669 else if (lookup_attribute ("stdcall", attrs))
5670 ret |= IX86_CALLCVT_STDCALL;
5671 else if (lookup_attribute ("fastcall", attrs))
5672 ret |= IX86_CALLCVT_FASTCALL;
5673 else if (lookup_attribute ("thiscall", attrs))
5674 ret |= IX86_CALLCVT_THISCALL;
5676 /* Regparam isn't allowed for thiscall and fastcall. */
5677 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5679 if (lookup_attribute ("regparm", attrs))
5680 ret |= IX86_CALLCVT_REGPARM;
5681 if (lookup_attribute ("sseregparm", attrs))
5682 ret |= IX86_CALLCVT_SSEREGPARM;
5685 if (IX86_BASE_CALLCVT(ret) != 0)
5686 return ret;
5689 is_stdarg = stdarg_p (type);
5690 if (TARGET_RTD && !is_stdarg)
5691 return IX86_CALLCVT_STDCALL | ret;
5693 if (ret != 0
5694 || is_stdarg
5695 || TREE_CODE (type) != METHOD_TYPE
5696 || ix86_function_type_abi (type) != MS_ABI)
5697 return IX86_CALLCVT_CDECL | ret;
5699 return IX86_CALLCVT_THISCALL;
5702 /* Return 0 if the attributes for two types are incompatible, 1 if they
5703 are compatible, and 2 if they are nearly compatible (which causes a
5704 warning to be generated). */
5706 static int
5707 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5709 unsigned int ccvt1, ccvt2;
5711 if (TREE_CODE (type1) != FUNCTION_TYPE
5712 && TREE_CODE (type1) != METHOD_TYPE)
5713 return 1;
5715 ccvt1 = ix86_get_callcvt (type1);
5716 ccvt2 = ix86_get_callcvt (type2);
5717 if (ccvt1 != ccvt2)
5718 return 0;
5719 if (ix86_function_regparm (type1, NULL)
5720 != ix86_function_regparm (type2, NULL))
5721 return 0;
5723 return 1;
5726 /* Return the regparm value for a function with the indicated TYPE and DECL.
5727 DECL may be NULL when calling function indirectly
5728 or considering a libcall. */
5730 static int
5731 ix86_function_regparm (const_tree type, const_tree decl)
5733 tree attr;
5734 int regparm;
5735 unsigned int ccvt;
5737 if (TARGET_64BIT)
5738 return (ix86_function_type_abi (type) == SYSV_ABI
5739 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5740 ccvt = ix86_get_callcvt (type);
5741 regparm = ix86_regparm;
5743 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5745 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5746 if (attr)
5748 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5749 return regparm;
5752 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5753 return 2;
5754 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5755 return 1;
5757 /* Use register calling convention for local functions when possible. */
5758 if (decl
5759 && TREE_CODE (decl) == FUNCTION_DECL
5760 /* Caller and callee must agree on the calling convention, so
5761 checking here just optimize means that with
5762 __attribute__((optimize (...))) caller could use regparm convention
5763 and callee not, or vice versa. Instead look at whether the callee
5764 is optimized or not. */
5765 && opt_for_fn (decl, optimize)
5766 && !(profile_flag && !flag_fentry))
5768 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5769 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE (decl));
5770 if (i && i->local && i->can_change_signature)
5772 int local_regparm, globals = 0, regno;
5774 /* Make sure no regparm register is taken by a
5775 fixed register variable. */
5776 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5777 if (fixed_regs[local_regparm])
5778 break;
5780 /* We don't want to use regparm(3) for nested functions as
5781 these use a static chain pointer in the third argument. */
5782 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5783 local_regparm = 2;
5785 /* In 32-bit mode save a register for the split stack. */
5786 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5787 local_regparm = 2;
5789 /* Each fixed register usage increases register pressure,
5790 so less registers should be used for argument passing.
5791 This functionality can be overriden by an explicit
5792 regparm value. */
5793 for (regno = AX_REG; regno <= DI_REG; regno++)
5794 if (fixed_regs[regno])
5795 globals++;
5797 local_regparm
5798 = globals < local_regparm ? local_regparm - globals : 0;
5800 if (local_regparm > regparm)
5801 regparm = local_regparm;
5805 return regparm;
5808 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5809 DFmode (2) arguments in SSE registers for a function with the
5810 indicated TYPE and DECL. DECL may be NULL when calling function
5811 indirectly or considering a libcall. Otherwise return 0. */
5813 static int
5814 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5816 gcc_assert (!TARGET_64BIT);
5818 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5819 by the sseregparm attribute. */
5820 if (TARGET_SSEREGPARM
5821 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5823 if (!TARGET_SSE)
5825 if (warn)
5827 if (decl)
5828 error ("calling %qD with attribute sseregparm without "
5829 "SSE/SSE2 enabled", decl);
5830 else
5831 error ("calling %qT with attribute sseregparm without "
5832 "SSE/SSE2 enabled", type);
5834 return 0;
5837 return 2;
5840 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5841 (and DFmode for SSE2) arguments in SSE registers. */
5842 if (decl && TARGET_SSE_MATH && optimize
5843 && !(profile_flag && !flag_fentry))
5845 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5846 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5847 if (i && i->local && i->can_change_signature)
5848 return TARGET_SSE2 ? 2 : 1;
5851 return 0;
5854 /* Return true if EAX is live at the start of the function. Used by
5855 ix86_expand_prologue to determine if we need special help before
5856 calling allocate_stack_worker. */
5858 static bool
5859 ix86_eax_live_at_start_p (void)
5861 /* Cheat. Don't bother working forward from ix86_function_regparm
5862 to the function type to whether an actual argument is located in
5863 eax. Instead just look at cfg info, which is still close enough
5864 to correct at this point. This gives false positives for broken
5865 functions that might use uninitialized data that happens to be
5866 allocated in eax, but who cares? */
5867 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5870 static bool
5871 ix86_keep_aggregate_return_pointer (tree fntype)
5873 tree attr;
5875 if (!TARGET_64BIT)
5877 attr = lookup_attribute ("callee_pop_aggregate_return",
5878 TYPE_ATTRIBUTES (fntype));
5879 if (attr)
5880 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5882 /* For 32-bit MS-ABI the default is to keep aggregate
5883 return pointer. */
5884 if (ix86_function_type_abi (fntype) == MS_ABI)
5885 return true;
5887 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5890 /* Value is the number of bytes of arguments automatically
5891 popped when returning from a subroutine call.
5892 FUNDECL is the declaration node of the function (as a tree),
5893 FUNTYPE is the data type of the function (as a tree),
5894 or for a library call it is an identifier node for the subroutine name.
5895 SIZE is the number of bytes of arguments passed on the stack.
5897 On the 80386, the RTD insn may be used to pop them if the number
5898 of args is fixed, but if the number is variable then the caller
5899 must pop them all. RTD can't be used for library calls now
5900 because the library is compiled with the Unix compiler.
5901 Use of RTD is a selectable option, since it is incompatible with
5902 standard Unix calling sequences. If the option is not selected,
5903 the caller must always pop the args.
5905 The attribute stdcall is equivalent to RTD on a per module basis. */
5907 static int
5908 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5910 unsigned int ccvt;
5912 /* None of the 64-bit ABIs pop arguments. */
5913 if (TARGET_64BIT)
5914 return 0;
5916 ccvt = ix86_get_callcvt (funtype);
5918 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5919 | IX86_CALLCVT_THISCALL)) != 0
5920 && ! stdarg_p (funtype))
5921 return size;
5923 /* Lose any fake structure return argument if it is passed on the stack. */
5924 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5925 && !ix86_keep_aggregate_return_pointer (funtype))
5927 int nregs = ix86_function_regparm (funtype, fundecl);
5928 if (nregs == 0)
5929 return GET_MODE_SIZE (Pmode);
5932 return 0;
5935 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5937 static bool
5938 ix86_legitimate_combined_insn (rtx_insn *insn)
5940 /* Check operand constraints in case hard registers were propagated
5941 into insn pattern. This check prevents combine pass from
5942 generating insn patterns with invalid hard register operands.
5943 These invalid insns can eventually confuse reload to error out
5944 with a spill failure. See also PRs 46829 and 46843. */
5945 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5947 int i;
5949 extract_insn (insn);
5950 preprocess_constraints (insn);
5952 int n_operands = recog_data.n_operands;
5953 int n_alternatives = recog_data.n_alternatives;
5954 for (i = 0; i < n_operands; i++)
5956 rtx op = recog_data.operand[i];
5957 machine_mode mode = GET_MODE (op);
5958 const operand_alternative *op_alt;
5959 int offset = 0;
5960 bool win;
5961 int j;
5963 /* For pre-AVX disallow unaligned loads/stores where the
5964 instructions don't support it. */
5965 if (!TARGET_AVX
5966 && VECTOR_MODE_P (GET_MODE (op))
5967 && misaligned_operand (op, GET_MODE (op)))
5969 int min_align = get_attr_ssememalign (insn);
5970 if (min_align == 0)
5971 return false;
5974 /* A unary operator may be accepted by the predicate, but it
5975 is irrelevant for matching constraints. */
5976 if (UNARY_P (op))
5977 op = XEXP (op, 0);
5979 if (GET_CODE (op) == SUBREG)
5981 if (REG_P (SUBREG_REG (op))
5982 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5983 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5984 GET_MODE (SUBREG_REG (op)),
5985 SUBREG_BYTE (op),
5986 GET_MODE (op));
5987 op = SUBREG_REG (op);
5990 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5991 continue;
5993 op_alt = recog_op_alt;
5995 /* Operand has no constraints, anything is OK. */
5996 win = !n_alternatives;
5998 alternative_mask preferred = get_preferred_alternatives (insn);
5999 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6001 if (!TEST_BIT (preferred, j))
6002 continue;
6003 if (op_alt[i].anything_ok
6004 || (op_alt[i].matches != -1
6005 && operands_match_p
6006 (recog_data.operand[i],
6007 recog_data.operand[op_alt[i].matches]))
6008 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6010 win = true;
6011 break;
6015 if (!win)
6016 return false;
6020 return true;
6023 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6025 static unsigned HOST_WIDE_INT
6026 ix86_asan_shadow_offset (void)
6028 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6029 : HOST_WIDE_INT_C (0x7fff8000))
6030 : (HOST_WIDE_INT_1 << 29);
6033 /* Argument support functions. */
6035 /* Return true when register may be used to pass function parameters. */
6036 bool
6037 ix86_function_arg_regno_p (int regno)
6039 int i;
6040 const int *parm_regs;
6042 if (!TARGET_64BIT)
6044 if (TARGET_MACHO)
6045 return (regno < REGPARM_MAX
6046 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6047 else
6048 return (regno < REGPARM_MAX
6049 || (TARGET_MMX && MMX_REGNO_P (regno)
6050 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6051 || (TARGET_SSE && SSE_REGNO_P (regno)
6052 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6055 if (TARGET_SSE && SSE_REGNO_P (regno)
6056 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6057 return true;
6059 /* TODO: The function should depend on current function ABI but
6060 builtins.c would need updating then. Therefore we use the
6061 default ABI. */
6063 /* RAX is used as hidden argument to va_arg functions. */
6064 if (ix86_abi == SYSV_ABI && regno == AX_REG)
6065 return true;
6067 if (ix86_abi == MS_ABI)
6068 parm_regs = x86_64_ms_abi_int_parameter_registers;
6069 else
6070 parm_regs = x86_64_int_parameter_registers;
6071 for (i = 0; i < (ix86_abi == MS_ABI
6072 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6073 if (regno == parm_regs[i])
6074 return true;
6075 return false;
6078 /* Return if we do not know how to pass TYPE solely in registers. */
6080 static bool
6081 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6083 if (must_pass_in_stack_var_size_or_pad (mode, type))
6084 return true;
6086 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6087 The layout_type routine is crafty and tries to trick us into passing
6088 currently unsupported vector types on the stack by using TImode. */
6089 return (!TARGET_64BIT && mode == TImode
6090 && type && TREE_CODE (type) != VECTOR_TYPE);
6093 /* It returns the size, in bytes, of the area reserved for arguments passed
6094 in registers for the function represented by fndecl dependent to the used
6095 abi format. */
6097 ix86_reg_parm_stack_space (const_tree fndecl)
6099 enum calling_abi call_abi = SYSV_ABI;
6100 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6101 call_abi = ix86_function_abi (fndecl);
6102 else
6103 call_abi = ix86_function_type_abi (fndecl);
6104 if (TARGET_64BIT && call_abi == MS_ABI)
6105 return 32;
6106 return 0;
6109 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6110 call abi used. */
6111 enum calling_abi
6112 ix86_function_type_abi (const_tree fntype)
6114 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6116 enum calling_abi abi = ix86_abi;
6117 if (abi == SYSV_ABI)
6119 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6120 abi = MS_ABI;
6122 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6123 abi = SYSV_ABI;
6124 return abi;
6126 return ix86_abi;
6129 /* We add this as a workaround in order to use libc_has_function
6130 hook in i386.md. */
6131 bool
6132 ix86_libc_has_function (enum function_class fn_class)
6134 return targetm.libc_has_function (fn_class);
6137 static bool
6138 ix86_function_ms_hook_prologue (const_tree fn)
6140 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6142 if (decl_function_context (fn) != NULL_TREE)
6143 error_at (DECL_SOURCE_LOCATION (fn),
6144 "ms_hook_prologue is not compatible with nested function");
6145 else
6146 return true;
6148 return false;
6151 static enum calling_abi
6152 ix86_function_abi (const_tree fndecl)
6154 if (! fndecl)
6155 return ix86_abi;
6156 return ix86_function_type_abi (TREE_TYPE (fndecl));
6159 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6160 call abi used. */
6161 enum calling_abi
6162 ix86_cfun_abi (void)
6164 if (! cfun)
6165 return ix86_abi;
6166 return cfun->machine->call_abi;
6169 /* Write the extra assembler code needed to declare a function properly. */
6171 void
6172 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6173 tree decl)
6175 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6177 if (is_ms_hook)
6179 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6180 unsigned int filler_cc = 0xcccccccc;
6182 for (i = 0; i < filler_count; i += 4)
6183 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6186 #ifdef SUBTARGET_ASM_UNWIND_INIT
6187 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6188 #endif
6190 ASM_OUTPUT_LABEL (asm_out_file, fname);
6192 /* Output magic byte marker, if hot-patch attribute is set. */
6193 if (is_ms_hook)
6195 if (TARGET_64BIT)
6197 /* leaq [%rsp + 0], %rsp */
6198 asm_fprintf (asm_out_file, ASM_BYTE
6199 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6201 else
6203 /* movl.s %edi, %edi
6204 push %ebp
6205 movl.s %esp, %ebp */
6206 asm_fprintf (asm_out_file, ASM_BYTE
6207 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6212 /* regclass.c */
6213 extern void init_regs (void);
6215 /* Implementation of call abi switching target hook. Specific to FNDECL
6216 the specific call register sets are set. See also
6217 ix86_conditional_register_usage for more details. */
6218 void
6219 ix86_call_abi_override (const_tree fndecl)
6221 if (fndecl == NULL_TREE)
6222 cfun->machine->call_abi = ix86_abi;
6223 else
6224 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6227 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6228 expensive re-initialization of init_regs each time we switch function context
6229 since this is needed only during RTL expansion. */
6230 static void
6231 ix86_maybe_switch_abi (void)
6233 if (TARGET_64BIT &&
6234 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6235 reinit_regs ();
6238 /* Return 1 if pseudo register should be created and used to hold
6239 GOT address for PIC code. */
6240 static bool
6241 ix86_use_pseudo_pic_reg (void)
6243 if ((TARGET_64BIT
6244 && (ix86_cmodel == CM_SMALL_PIC
6245 || TARGET_PECOFF))
6246 || !flag_pic)
6247 return false;
6248 return true;
6251 /* Initialize large model PIC register. */
6253 static void
6254 ix86_init_large_pic_reg (unsigned int tmp_regno)
6256 rtx_code_label *label;
6257 rtx tmp_reg;
6259 gcc_assert (Pmode == DImode);
6260 label = gen_label_rtx ();
6261 emit_label (label);
6262 LABEL_PRESERVE_P (label) = 1;
6263 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
6264 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
6265 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6266 label));
6267 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6268 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6269 pic_offset_table_rtx, tmp_reg));
6272 /* Create and initialize PIC register if required. */
6273 static void
6274 ix86_init_pic_reg (void)
6276 edge entry_edge;
6277 rtx_insn *seq;
6279 if (!ix86_use_pseudo_pic_reg ())
6280 return;
6282 start_sequence ();
6284 if (TARGET_64BIT)
6286 if (ix86_cmodel == CM_LARGE_PIC)
6287 ix86_init_large_pic_reg (R11_REG);
6288 else
6289 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6291 else
6293 /* If there is future mcount call in the function it is more profitable
6294 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
6295 rtx reg = crtl->profile
6296 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
6297 : pic_offset_table_rtx;
6298 rtx insn = emit_insn (gen_set_got (reg));
6299 RTX_FRAME_RELATED_P (insn) = 1;
6300 if (crtl->profile)
6301 emit_move_insn (pic_offset_table_rtx, reg);
6302 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6305 seq = get_insns ();
6306 end_sequence ();
6308 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6309 insert_insn_on_edge (seq, entry_edge);
6310 commit_one_edge_insertion (entry_edge);
6313 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6314 for a call to a function whose data type is FNTYPE.
6315 For a library call, FNTYPE is 0. */
6317 void
6318 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6319 tree fntype, /* tree ptr for function decl */
6320 rtx libname, /* SYMBOL_REF of library name or 0 */
6321 tree fndecl,
6322 int caller)
6324 struct cgraph_local_info *i;
6326 memset (cum, 0, sizeof (*cum));
6328 if (fndecl)
6330 i = cgraph_node::local_info (fndecl);
6331 cum->call_abi = ix86_function_abi (fndecl);
6333 else
6335 i = NULL;
6336 cum->call_abi = ix86_function_type_abi (fntype);
6339 cum->caller = caller;
6341 /* Set up the number of registers to use for passing arguments. */
6342 cum->nregs = ix86_regparm;
6343 if (TARGET_64BIT)
6345 cum->nregs = (cum->call_abi == SYSV_ABI
6346 ? X86_64_REGPARM_MAX
6347 : X86_64_MS_REGPARM_MAX);
6349 if (TARGET_SSE)
6351 cum->sse_nregs = SSE_REGPARM_MAX;
6352 if (TARGET_64BIT)
6354 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6355 ? X86_64_SSE_REGPARM_MAX
6356 : X86_64_MS_SSE_REGPARM_MAX);
6359 if (TARGET_MMX)
6360 cum->mmx_nregs = MMX_REGPARM_MAX;
6361 cum->warn_avx512f = true;
6362 cum->warn_avx = true;
6363 cum->warn_sse = true;
6364 cum->warn_mmx = true;
6366 /* Because type might mismatch in between caller and callee, we need to
6367 use actual type of function for local calls.
6368 FIXME: cgraph_analyze can be told to actually record if function uses
6369 va_start so for local functions maybe_vaarg can be made aggressive
6370 helping K&R code.
6371 FIXME: once typesytem is fixed, we won't need this code anymore. */
6372 if (i && i->local && i->can_change_signature)
6373 fntype = TREE_TYPE (fndecl);
6374 cum->stdarg = stdarg_p (fntype);
6375 cum->maybe_vaarg = (fntype
6376 ? (!prototype_p (fntype) || stdarg_p (fntype))
6377 : !libname);
6379 cum->bnd_regno = FIRST_BND_REG;
6380 cum->bnds_in_bt = 0;
6381 cum->force_bnd_pass = 0;
6383 if (!TARGET_64BIT)
6385 /* If there are variable arguments, then we won't pass anything
6386 in registers in 32-bit mode. */
6387 if (stdarg_p (fntype))
6389 cum->nregs = 0;
6390 cum->sse_nregs = 0;
6391 cum->mmx_nregs = 0;
6392 cum->warn_avx512f = false;
6393 cum->warn_avx = false;
6394 cum->warn_sse = false;
6395 cum->warn_mmx = false;
6396 return;
6399 /* Use ecx and edx registers if function has fastcall attribute,
6400 else look for regparm information. */
6401 if (fntype)
6403 unsigned int ccvt = ix86_get_callcvt (fntype);
6404 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6406 cum->nregs = 1;
6407 cum->fastcall = 1; /* Same first register as in fastcall. */
6409 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6411 cum->nregs = 2;
6412 cum->fastcall = 1;
6414 else
6415 cum->nregs = ix86_function_regparm (fntype, fndecl);
6418 /* Set up the number of SSE registers used for passing SFmode
6419 and DFmode arguments. Warn for mismatching ABI. */
6420 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6424 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6425 But in the case of vector types, it is some vector mode.
6427 When we have only some of our vector isa extensions enabled, then there
6428 are some modes for which vector_mode_supported_p is false. For these
6429 modes, the generic vector support in gcc will choose some non-vector mode
6430 in order to implement the type. By computing the natural mode, we'll
6431 select the proper ABI location for the operand and not depend on whatever
6432 the middle-end decides to do with these vector types.
6434 The midde-end can't deal with the vector types > 16 bytes. In this
6435 case, we return the original mode and warn ABI change if CUM isn't
6436 NULL.
6438 If INT_RETURN is true, warn ABI change if the vector mode isn't
6439 available for function return value. */
6441 static machine_mode
6442 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6443 bool in_return)
6445 machine_mode mode = TYPE_MODE (type);
6447 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6449 HOST_WIDE_INT size = int_size_in_bytes (type);
6450 if ((size == 8 || size == 16 || size == 32 || size == 64)
6451 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6452 && TYPE_VECTOR_SUBPARTS (type) > 1)
6454 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6456 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6457 mode = MIN_MODE_VECTOR_FLOAT;
6458 else
6459 mode = MIN_MODE_VECTOR_INT;
6461 /* Get the mode which has this inner mode and number of units. */
6462 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6463 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6464 && GET_MODE_INNER (mode) == innermode)
6466 if (size == 64 && !TARGET_AVX512F)
6468 static bool warnedavx512f;
6469 static bool warnedavx512f_ret;
6471 if (cum && cum->warn_avx512f && !warnedavx512f)
6473 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6474 "without AVX512F enabled changes the ABI"))
6475 warnedavx512f = true;
6477 else if (in_return && !warnedavx512f_ret)
6479 if (warning (OPT_Wpsabi, "AVX512F vector return "
6480 "without AVX512F enabled changes the ABI"))
6481 warnedavx512f_ret = true;
6484 return TYPE_MODE (type);
6486 else if (size == 32 && !TARGET_AVX)
6488 static bool warnedavx;
6489 static bool warnedavx_ret;
6491 if (cum && cum->warn_avx && !warnedavx)
6493 if (warning (OPT_Wpsabi, "AVX vector argument "
6494 "without AVX enabled changes the ABI"))
6495 warnedavx = true;
6497 else if (in_return && !warnedavx_ret)
6499 if (warning (OPT_Wpsabi, "AVX vector return "
6500 "without AVX enabled changes the ABI"))
6501 warnedavx_ret = true;
6504 return TYPE_MODE (type);
6506 else if (((size == 8 && TARGET_64BIT) || size == 16)
6507 && !TARGET_SSE)
6509 static bool warnedsse;
6510 static bool warnedsse_ret;
6512 if (cum && cum->warn_sse && !warnedsse)
6514 if (warning (OPT_Wpsabi, "SSE vector argument "
6515 "without SSE enabled changes the ABI"))
6516 warnedsse = true;
6518 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6520 if (warning (OPT_Wpsabi, "SSE vector return "
6521 "without SSE enabled changes the ABI"))
6522 warnedsse_ret = true;
6525 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6527 static bool warnedmmx;
6528 static bool warnedmmx_ret;
6530 if (cum && cum->warn_mmx && !warnedmmx)
6532 if (warning (OPT_Wpsabi, "MMX vector argument "
6533 "without MMX enabled changes the ABI"))
6534 warnedmmx = true;
6536 else if (in_return && !warnedmmx_ret)
6538 if (warning (OPT_Wpsabi, "MMX vector return "
6539 "without MMX enabled changes the ABI"))
6540 warnedmmx_ret = true;
6543 return mode;
6546 gcc_unreachable ();
6550 return mode;
6553 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6554 this may not agree with the mode that the type system has chosen for the
6555 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6556 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6558 static rtx
6559 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
6560 unsigned int regno)
6562 rtx tmp;
6564 if (orig_mode != BLKmode)
6565 tmp = gen_rtx_REG (orig_mode, regno);
6566 else
6568 tmp = gen_rtx_REG (mode, regno);
6569 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6570 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6573 return tmp;
6576 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6577 of this code is to classify each 8bytes of incoming argument by the register
6578 class and assign registers accordingly. */
6580 /* Return the union class of CLASS1 and CLASS2.
6581 See the x86-64 PS ABI for details. */
6583 static enum x86_64_reg_class
6584 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6586 /* Rule #1: If both classes are equal, this is the resulting class. */
6587 if (class1 == class2)
6588 return class1;
6590 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6591 the other class. */
6592 if (class1 == X86_64_NO_CLASS)
6593 return class2;
6594 if (class2 == X86_64_NO_CLASS)
6595 return class1;
6597 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6598 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6599 return X86_64_MEMORY_CLASS;
6601 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6602 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6603 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6604 return X86_64_INTEGERSI_CLASS;
6605 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6606 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6607 return X86_64_INTEGER_CLASS;
6609 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6610 MEMORY is used. */
6611 if (class1 == X86_64_X87_CLASS
6612 || class1 == X86_64_X87UP_CLASS
6613 || class1 == X86_64_COMPLEX_X87_CLASS
6614 || class2 == X86_64_X87_CLASS
6615 || class2 == X86_64_X87UP_CLASS
6616 || class2 == X86_64_COMPLEX_X87_CLASS)
6617 return X86_64_MEMORY_CLASS;
6619 /* Rule #6: Otherwise class SSE is used. */
6620 return X86_64_SSE_CLASS;
6623 /* Classify the argument of type TYPE and mode MODE.
6624 CLASSES will be filled by the register class used to pass each word
6625 of the operand. The number of words is returned. In case the parameter
6626 should be passed in memory, 0 is returned. As a special case for zero
6627 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6629 BIT_OFFSET is used internally for handling records and specifies offset
6630 of the offset in bits modulo 512 to avoid overflow cases.
6632 See the x86-64 PS ABI for details.
6635 static int
6636 classify_argument (machine_mode mode, const_tree type,
6637 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6639 HOST_WIDE_INT bytes =
6640 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6641 int words
6642 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6644 /* Variable sized entities are always passed/returned in memory. */
6645 if (bytes < 0)
6646 return 0;
6648 if (mode != VOIDmode
6649 && targetm.calls.must_pass_in_stack (mode, type))
6650 return 0;
6652 if (type && AGGREGATE_TYPE_P (type))
6654 int i;
6655 tree field;
6656 enum x86_64_reg_class subclasses[MAX_CLASSES];
6658 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6659 if (bytes > 64)
6660 return 0;
6662 for (i = 0; i < words; i++)
6663 classes[i] = X86_64_NO_CLASS;
6665 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6666 signalize memory class, so handle it as special case. */
6667 if (!words)
6669 classes[0] = X86_64_NO_CLASS;
6670 return 1;
6673 /* Classify each field of record and merge classes. */
6674 switch (TREE_CODE (type))
6676 case RECORD_TYPE:
6677 /* And now merge the fields of structure. */
6678 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6680 if (TREE_CODE (field) == FIELD_DECL)
6682 int num;
6684 if (TREE_TYPE (field) == error_mark_node)
6685 continue;
6687 /* Bitfields are always classified as integer. Handle them
6688 early, since later code would consider them to be
6689 misaligned integers. */
6690 if (DECL_BIT_FIELD (field))
6692 for (i = (int_bit_position (field)
6693 + (bit_offset % 64)) / 8 / 8;
6694 i < ((int_bit_position (field) + (bit_offset % 64))
6695 + tree_to_shwi (DECL_SIZE (field))
6696 + 63) / 8 / 8; i++)
6697 classes[i] =
6698 merge_classes (X86_64_INTEGER_CLASS,
6699 classes[i]);
6701 else
6703 int pos;
6705 type = TREE_TYPE (field);
6707 /* Flexible array member is ignored. */
6708 if (TYPE_MODE (type) == BLKmode
6709 && TREE_CODE (type) == ARRAY_TYPE
6710 && TYPE_SIZE (type) == NULL_TREE
6711 && TYPE_DOMAIN (type) != NULL_TREE
6712 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6713 == NULL_TREE))
6715 static bool warned;
6717 if (!warned && warn_psabi)
6719 warned = true;
6720 inform (input_location,
6721 "the ABI of passing struct with"
6722 " a flexible array member has"
6723 " changed in GCC 4.4");
6725 continue;
6727 num = classify_argument (TYPE_MODE (type), type,
6728 subclasses,
6729 (int_bit_position (field)
6730 + bit_offset) % 512);
6731 if (!num)
6732 return 0;
6733 pos = (int_bit_position (field)
6734 + (bit_offset % 64)) / 8 / 8;
6735 for (i = 0; i < num && (i + pos) < words; i++)
6736 classes[i + pos] =
6737 merge_classes (subclasses[i], classes[i + pos]);
6741 break;
6743 case ARRAY_TYPE:
6744 /* Arrays are handled as small records. */
6746 int num;
6747 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6748 TREE_TYPE (type), subclasses, bit_offset);
6749 if (!num)
6750 return 0;
6752 /* The partial classes are now full classes. */
6753 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6754 subclasses[0] = X86_64_SSE_CLASS;
6755 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6756 && !((bit_offset % 64) == 0 && bytes == 4))
6757 subclasses[0] = X86_64_INTEGER_CLASS;
6759 for (i = 0; i < words; i++)
6760 classes[i] = subclasses[i % num];
6762 break;
6764 case UNION_TYPE:
6765 case QUAL_UNION_TYPE:
6766 /* Unions are similar to RECORD_TYPE but offset is always 0.
6768 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6770 if (TREE_CODE (field) == FIELD_DECL)
6772 int num;
6774 if (TREE_TYPE (field) == error_mark_node)
6775 continue;
6777 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6778 TREE_TYPE (field), subclasses,
6779 bit_offset);
6780 if (!num)
6781 return 0;
6782 for (i = 0; i < num && i < words; i++)
6783 classes[i] = merge_classes (subclasses[i], classes[i]);
6786 break;
6788 default:
6789 gcc_unreachable ();
6792 if (words > 2)
6794 /* When size > 16 bytes, if the first one isn't
6795 X86_64_SSE_CLASS or any other ones aren't
6796 X86_64_SSEUP_CLASS, everything should be passed in
6797 memory. */
6798 if (classes[0] != X86_64_SSE_CLASS)
6799 return 0;
6801 for (i = 1; i < words; i++)
6802 if (classes[i] != X86_64_SSEUP_CLASS)
6803 return 0;
6806 /* Final merger cleanup. */
6807 for (i = 0; i < words; i++)
6809 /* If one class is MEMORY, everything should be passed in
6810 memory. */
6811 if (classes[i] == X86_64_MEMORY_CLASS)
6812 return 0;
6814 /* The X86_64_SSEUP_CLASS should be always preceded by
6815 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6816 if (classes[i] == X86_64_SSEUP_CLASS
6817 && classes[i - 1] != X86_64_SSE_CLASS
6818 && classes[i - 1] != X86_64_SSEUP_CLASS)
6820 /* The first one should never be X86_64_SSEUP_CLASS. */
6821 gcc_assert (i != 0);
6822 classes[i] = X86_64_SSE_CLASS;
6825 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6826 everything should be passed in memory. */
6827 if (classes[i] == X86_64_X87UP_CLASS
6828 && (classes[i - 1] != X86_64_X87_CLASS))
6830 static bool warned;
6832 /* The first one should never be X86_64_X87UP_CLASS. */
6833 gcc_assert (i != 0);
6834 if (!warned && warn_psabi)
6836 warned = true;
6837 inform (input_location,
6838 "the ABI of passing union with long double"
6839 " has changed in GCC 4.4");
6841 return 0;
6844 return words;
6847 /* Compute alignment needed. We align all types to natural boundaries with
6848 exception of XFmode that is aligned to 64bits. */
6849 if (mode != VOIDmode && mode != BLKmode)
6851 int mode_alignment = GET_MODE_BITSIZE (mode);
6853 if (mode == XFmode)
6854 mode_alignment = 128;
6855 else if (mode == XCmode)
6856 mode_alignment = 256;
6857 if (COMPLEX_MODE_P (mode))
6858 mode_alignment /= 2;
6859 /* Misaligned fields are always returned in memory. */
6860 if (bit_offset % mode_alignment)
6861 return 0;
6864 /* for V1xx modes, just use the base mode */
6865 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6866 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6867 mode = GET_MODE_INNER (mode);
6869 /* Classification of atomic types. */
6870 switch (mode)
6872 case SDmode:
6873 case DDmode:
6874 classes[0] = X86_64_SSE_CLASS;
6875 return 1;
6876 case TDmode:
6877 classes[0] = X86_64_SSE_CLASS;
6878 classes[1] = X86_64_SSEUP_CLASS;
6879 return 2;
6880 case DImode:
6881 case SImode:
6882 case HImode:
6883 case QImode:
6884 case CSImode:
6885 case CHImode:
6886 case CQImode:
6888 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6890 /* Analyze last 128 bits only. */
6891 size = (size - 1) & 0x7f;
6893 if (size < 32)
6895 classes[0] = X86_64_INTEGERSI_CLASS;
6896 return 1;
6898 else if (size < 64)
6900 classes[0] = X86_64_INTEGER_CLASS;
6901 return 1;
6903 else if (size < 64+32)
6905 classes[0] = X86_64_INTEGER_CLASS;
6906 classes[1] = X86_64_INTEGERSI_CLASS;
6907 return 2;
6909 else if (size < 64+64)
6911 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6912 return 2;
6914 else
6915 gcc_unreachable ();
6917 case CDImode:
6918 case TImode:
6919 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6920 return 2;
6921 case COImode:
6922 case OImode:
6923 /* OImode shouldn't be used directly. */
6924 gcc_unreachable ();
6925 case CTImode:
6926 return 0;
6927 case SFmode:
6928 if (!(bit_offset % 64))
6929 classes[0] = X86_64_SSESF_CLASS;
6930 else
6931 classes[0] = X86_64_SSE_CLASS;
6932 return 1;
6933 case DFmode:
6934 classes[0] = X86_64_SSEDF_CLASS;
6935 return 1;
6936 case XFmode:
6937 classes[0] = X86_64_X87_CLASS;
6938 classes[1] = X86_64_X87UP_CLASS;
6939 return 2;
6940 case TFmode:
6941 classes[0] = X86_64_SSE_CLASS;
6942 classes[1] = X86_64_SSEUP_CLASS;
6943 return 2;
6944 case SCmode:
6945 classes[0] = X86_64_SSE_CLASS;
6946 if (!(bit_offset % 64))
6947 return 1;
6948 else
6950 static bool warned;
6952 if (!warned && warn_psabi)
6954 warned = true;
6955 inform (input_location,
6956 "the ABI of passing structure with complex float"
6957 " member has changed in GCC 4.4");
6959 classes[1] = X86_64_SSESF_CLASS;
6960 return 2;
6962 case DCmode:
6963 classes[0] = X86_64_SSEDF_CLASS;
6964 classes[1] = X86_64_SSEDF_CLASS;
6965 return 2;
6966 case XCmode:
6967 classes[0] = X86_64_COMPLEX_X87_CLASS;
6968 return 1;
6969 case TCmode:
6970 /* This modes is larger than 16 bytes. */
6971 return 0;
6972 case V8SFmode:
6973 case V8SImode:
6974 case V32QImode:
6975 case V16HImode:
6976 case V4DFmode:
6977 case V4DImode:
6978 classes[0] = X86_64_SSE_CLASS;
6979 classes[1] = X86_64_SSEUP_CLASS;
6980 classes[2] = X86_64_SSEUP_CLASS;
6981 classes[3] = X86_64_SSEUP_CLASS;
6982 return 4;
6983 case V8DFmode:
6984 case V16SFmode:
6985 case V8DImode:
6986 case V16SImode:
6987 case V32HImode:
6988 case V64QImode:
6989 classes[0] = X86_64_SSE_CLASS;
6990 classes[1] = X86_64_SSEUP_CLASS;
6991 classes[2] = X86_64_SSEUP_CLASS;
6992 classes[3] = X86_64_SSEUP_CLASS;
6993 classes[4] = X86_64_SSEUP_CLASS;
6994 classes[5] = X86_64_SSEUP_CLASS;
6995 classes[6] = X86_64_SSEUP_CLASS;
6996 classes[7] = X86_64_SSEUP_CLASS;
6997 return 8;
6998 case V4SFmode:
6999 case V4SImode:
7000 case V16QImode:
7001 case V8HImode:
7002 case V2DFmode:
7003 case V2DImode:
7004 classes[0] = X86_64_SSE_CLASS;
7005 classes[1] = X86_64_SSEUP_CLASS;
7006 return 2;
7007 case V1TImode:
7008 case V1DImode:
7009 case V2SFmode:
7010 case V2SImode:
7011 case V4HImode:
7012 case V8QImode:
7013 classes[0] = X86_64_SSE_CLASS;
7014 return 1;
7015 case BLKmode:
7016 case VOIDmode:
7017 return 0;
7018 default:
7019 gcc_assert (VECTOR_MODE_P (mode));
7021 if (bytes > 16)
7022 return 0;
7024 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7026 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7027 classes[0] = X86_64_INTEGERSI_CLASS;
7028 else
7029 classes[0] = X86_64_INTEGER_CLASS;
7030 classes[1] = X86_64_INTEGER_CLASS;
7031 return 1 + (bytes > 8);
7035 /* Examine the argument and return set number of register required in each
7036 class. Return true iff parameter should be passed in memory. */
7038 static bool
7039 examine_argument (machine_mode mode, const_tree type, int in_return,
7040 int *int_nregs, int *sse_nregs)
7042 enum x86_64_reg_class regclass[MAX_CLASSES];
7043 int n = classify_argument (mode, type, regclass, 0);
7045 *int_nregs = 0;
7046 *sse_nregs = 0;
7048 if (!n)
7049 return true;
7050 for (n--; n >= 0; n--)
7051 switch (regclass[n])
7053 case X86_64_INTEGER_CLASS:
7054 case X86_64_INTEGERSI_CLASS:
7055 (*int_nregs)++;
7056 break;
7057 case X86_64_SSE_CLASS:
7058 case X86_64_SSESF_CLASS:
7059 case X86_64_SSEDF_CLASS:
7060 (*sse_nregs)++;
7061 break;
7062 case X86_64_NO_CLASS:
7063 case X86_64_SSEUP_CLASS:
7064 break;
7065 case X86_64_X87_CLASS:
7066 case X86_64_X87UP_CLASS:
7067 case X86_64_COMPLEX_X87_CLASS:
7068 if (!in_return)
7069 return true;
7070 break;
7071 case X86_64_MEMORY_CLASS:
7072 gcc_unreachable ();
7075 return false;
7078 /* Construct container for the argument used by GCC interface. See
7079 FUNCTION_ARG for the detailed description. */
7081 static rtx
7082 construct_container (machine_mode mode, machine_mode orig_mode,
7083 const_tree type, int in_return, int nintregs, int nsseregs,
7084 const int *intreg, int sse_regno)
7086 /* The following variables hold the static issued_error state. */
7087 static bool issued_sse_arg_error;
7088 static bool issued_sse_ret_error;
7089 static bool issued_x87_ret_error;
7091 machine_mode tmpmode;
7092 int bytes =
7093 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7094 enum x86_64_reg_class regclass[MAX_CLASSES];
7095 int n;
7096 int i;
7097 int nexps = 0;
7098 int needed_sseregs, needed_intregs;
7099 rtx exp[MAX_CLASSES];
7100 rtx ret;
7102 n = classify_argument (mode, type, regclass, 0);
7103 if (!n)
7104 return NULL;
7105 if (examine_argument (mode, type, in_return, &needed_intregs,
7106 &needed_sseregs))
7107 return NULL;
7108 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7109 return NULL;
7111 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7112 some less clueful developer tries to use floating-point anyway. */
7113 if (needed_sseregs && !TARGET_SSE)
7115 if (in_return)
7117 if (!issued_sse_ret_error)
7119 error ("SSE register return with SSE disabled");
7120 issued_sse_ret_error = true;
7123 else if (!issued_sse_arg_error)
7125 error ("SSE register argument with SSE disabled");
7126 issued_sse_arg_error = true;
7128 return NULL;
7131 /* Likewise, error if the ABI requires us to return values in the
7132 x87 registers and the user specified -mno-80387. */
7133 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7134 for (i = 0; i < n; i++)
7135 if (regclass[i] == X86_64_X87_CLASS
7136 || regclass[i] == X86_64_X87UP_CLASS
7137 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7139 if (!issued_x87_ret_error)
7141 error ("x87 register return with x87 disabled");
7142 issued_x87_ret_error = true;
7144 return NULL;
7147 /* First construct simple cases. Avoid SCmode, since we want to use
7148 single register to pass this type. */
7149 if (n == 1 && mode != SCmode)
7150 switch (regclass[0])
7152 case X86_64_INTEGER_CLASS:
7153 case X86_64_INTEGERSI_CLASS:
7154 return gen_rtx_REG (mode, intreg[0]);
7155 case X86_64_SSE_CLASS:
7156 case X86_64_SSESF_CLASS:
7157 case X86_64_SSEDF_CLASS:
7158 if (mode != BLKmode)
7159 return gen_reg_or_parallel (mode, orig_mode,
7160 SSE_REGNO (sse_regno));
7161 break;
7162 case X86_64_X87_CLASS:
7163 case X86_64_COMPLEX_X87_CLASS:
7164 return gen_rtx_REG (mode, FIRST_STACK_REG);
7165 case X86_64_NO_CLASS:
7166 /* Zero sized array, struct or class. */
7167 return NULL;
7168 default:
7169 gcc_unreachable ();
7171 if (n == 2
7172 && regclass[0] == X86_64_SSE_CLASS
7173 && regclass[1] == X86_64_SSEUP_CLASS
7174 && mode != BLKmode)
7175 return gen_reg_or_parallel (mode, orig_mode,
7176 SSE_REGNO (sse_regno));
7177 if (n == 4
7178 && regclass[0] == X86_64_SSE_CLASS
7179 && regclass[1] == X86_64_SSEUP_CLASS
7180 && regclass[2] == X86_64_SSEUP_CLASS
7181 && regclass[3] == X86_64_SSEUP_CLASS
7182 && mode != BLKmode)
7183 return gen_reg_or_parallel (mode, orig_mode,
7184 SSE_REGNO (sse_regno));
7185 if (n == 8
7186 && regclass[0] == X86_64_SSE_CLASS
7187 && regclass[1] == X86_64_SSEUP_CLASS
7188 && regclass[2] == X86_64_SSEUP_CLASS
7189 && regclass[3] == X86_64_SSEUP_CLASS
7190 && regclass[4] == X86_64_SSEUP_CLASS
7191 && regclass[5] == X86_64_SSEUP_CLASS
7192 && regclass[6] == X86_64_SSEUP_CLASS
7193 && regclass[7] == X86_64_SSEUP_CLASS
7194 && mode != BLKmode)
7195 return gen_reg_or_parallel (mode, orig_mode,
7196 SSE_REGNO (sse_regno));
7197 if (n == 2
7198 && regclass[0] == X86_64_X87_CLASS
7199 && regclass[1] == X86_64_X87UP_CLASS)
7200 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7202 if (n == 2
7203 && regclass[0] == X86_64_INTEGER_CLASS
7204 && regclass[1] == X86_64_INTEGER_CLASS
7205 && (mode == CDImode || mode == TImode)
7206 && intreg[0] + 1 == intreg[1])
7207 return gen_rtx_REG (mode, intreg[0]);
7209 /* Otherwise figure out the entries of the PARALLEL. */
7210 for (i = 0; i < n; i++)
7212 int pos;
7214 switch (regclass[i])
7216 case X86_64_NO_CLASS:
7217 break;
7218 case X86_64_INTEGER_CLASS:
7219 case X86_64_INTEGERSI_CLASS:
7220 /* Merge TImodes on aligned occasions here too. */
7221 if (i * 8 + 8 > bytes)
7222 tmpmode
7223 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7224 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7225 tmpmode = SImode;
7226 else
7227 tmpmode = DImode;
7228 /* We've requested 24 bytes we
7229 don't have mode for. Use DImode. */
7230 if (tmpmode == BLKmode)
7231 tmpmode = DImode;
7232 exp [nexps++]
7233 = gen_rtx_EXPR_LIST (VOIDmode,
7234 gen_rtx_REG (tmpmode, *intreg),
7235 GEN_INT (i*8));
7236 intreg++;
7237 break;
7238 case X86_64_SSESF_CLASS:
7239 exp [nexps++]
7240 = gen_rtx_EXPR_LIST (VOIDmode,
7241 gen_rtx_REG (SFmode,
7242 SSE_REGNO (sse_regno)),
7243 GEN_INT (i*8));
7244 sse_regno++;
7245 break;
7246 case X86_64_SSEDF_CLASS:
7247 exp [nexps++]
7248 = gen_rtx_EXPR_LIST (VOIDmode,
7249 gen_rtx_REG (DFmode,
7250 SSE_REGNO (sse_regno)),
7251 GEN_INT (i*8));
7252 sse_regno++;
7253 break;
7254 case X86_64_SSE_CLASS:
7255 pos = i;
7256 switch (n)
7258 case 1:
7259 tmpmode = DImode;
7260 break;
7261 case 2:
7262 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7264 tmpmode = TImode;
7265 i++;
7267 else
7268 tmpmode = DImode;
7269 break;
7270 case 4:
7271 gcc_assert (i == 0
7272 && regclass[1] == X86_64_SSEUP_CLASS
7273 && regclass[2] == X86_64_SSEUP_CLASS
7274 && regclass[3] == X86_64_SSEUP_CLASS);
7275 tmpmode = OImode;
7276 i += 3;
7277 break;
7278 case 8:
7279 gcc_assert (i == 0
7280 && regclass[1] == X86_64_SSEUP_CLASS
7281 && regclass[2] == X86_64_SSEUP_CLASS
7282 && regclass[3] == X86_64_SSEUP_CLASS
7283 && regclass[4] == X86_64_SSEUP_CLASS
7284 && regclass[5] == X86_64_SSEUP_CLASS
7285 && regclass[6] == X86_64_SSEUP_CLASS
7286 && regclass[7] == X86_64_SSEUP_CLASS);
7287 tmpmode = XImode;
7288 i += 7;
7289 break;
7290 default:
7291 gcc_unreachable ();
7293 exp [nexps++]
7294 = gen_rtx_EXPR_LIST (VOIDmode,
7295 gen_rtx_REG (tmpmode,
7296 SSE_REGNO (sse_regno)),
7297 GEN_INT (pos*8));
7298 sse_regno++;
7299 break;
7300 default:
7301 gcc_unreachable ();
7305 /* Empty aligned struct, union or class. */
7306 if (nexps == 0)
7307 return NULL;
7309 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7310 for (i = 0; i < nexps; i++)
7311 XVECEXP (ret, 0, i) = exp [i];
7312 return ret;
7315 /* Update the data in CUM to advance over an argument of mode MODE
7316 and data type TYPE. (TYPE is null for libcalls where that information
7317 may not be available.)
7319 Return a number of integer regsiters advanced over. */
7321 static int
7322 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7323 const_tree type, HOST_WIDE_INT bytes,
7324 HOST_WIDE_INT words)
7326 int res = 0;
7328 switch (mode)
7330 default:
7331 break;
7333 case BLKmode:
7334 if (bytes < 0)
7335 break;
7336 /* FALLTHRU */
7338 case DImode:
7339 case SImode:
7340 case HImode:
7341 case QImode:
7342 cum->words += words;
7343 cum->nregs -= words;
7344 cum->regno += words;
7345 if (cum->nregs >= 0)
7346 res = words;
7347 if (cum->nregs <= 0)
7349 cum->nregs = 0;
7350 cum->regno = 0;
7352 break;
7354 case OImode:
7355 /* OImode shouldn't be used directly. */
7356 gcc_unreachable ();
7358 case DFmode:
7359 if (cum->float_in_sse < 2)
7360 break;
7361 case SFmode:
7362 if (cum->float_in_sse < 1)
7363 break;
7364 /* FALLTHRU */
7366 case V8SFmode:
7367 case V8SImode:
7368 case V64QImode:
7369 case V32HImode:
7370 case V16SImode:
7371 case V8DImode:
7372 case V16SFmode:
7373 case V8DFmode:
7374 case V32QImode:
7375 case V16HImode:
7376 case V4DFmode:
7377 case V4DImode:
7378 case TImode:
7379 case V16QImode:
7380 case V8HImode:
7381 case V4SImode:
7382 case V2DImode:
7383 case V4SFmode:
7384 case V2DFmode:
7385 if (!type || !AGGREGATE_TYPE_P (type))
7387 cum->sse_words += words;
7388 cum->sse_nregs -= 1;
7389 cum->sse_regno += 1;
7390 if (cum->sse_nregs <= 0)
7392 cum->sse_nregs = 0;
7393 cum->sse_regno = 0;
7396 break;
7398 case V8QImode:
7399 case V4HImode:
7400 case V2SImode:
7401 case V2SFmode:
7402 case V1TImode:
7403 case V1DImode:
7404 if (!type || !AGGREGATE_TYPE_P (type))
7406 cum->mmx_words += words;
7407 cum->mmx_nregs -= 1;
7408 cum->mmx_regno += 1;
7409 if (cum->mmx_nregs <= 0)
7411 cum->mmx_nregs = 0;
7412 cum->mmx_regno = 0;
7415 break;
7418 return res;
7421 static int
7422 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
7423 const_tree type, HOST_WIDE_INT words, bool named)
7425 int int_nregs, sse_nregs;
7427 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7428 if (!named && (VALID_AVX512F_REG_MODE (mode)
7429 || VALID_AVX256_REG_MODE (mode)))
7430 return 0;
7432 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7433 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7435 cum->nregs -= int_nregs;
7436 cum->sse_nregs -= sse_nregs;
7437 cum->regno += int_nregs;
7438 cum->sse_regno += sse_nregs;
7439 return int_nregs;
7441 else
7443 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7444 cum->words = (cum->words + align - 1) & ~(align - 1);
7445 cum->words += words;
7446 return 0;
7450 static int
7451 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7452 HOST_WIDE_INT words)
7454 /* Otherwise, this should be passed indirect. */
7455 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7457 cum->words += words;
7458 if (cum->nregs > 0)
7460 cum->nregs -= 1;
7461 cum->regno += 1;
7462 return 1;
7464 return 0;
7467 /* Update the data in CUM to advance over an argument of mode MODE and
7468 data type TYPE. (TYPE is null for libcalls where that information
7469 may not be available.) */
7471 static void
7472 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7473 const_tree type, bool named)
7475 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7476 HOST_WIDE_INT bytes, words;
7477 int nregs;
7479 if (mode == BLKmode)
7480 bytes = int_size_in_bytes (type);
7481 else
7482 bytes = GET_MODE_SIZE (mode);
7483 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7485 if (type)
7486 mode = type_natural_mode (type, NULL, false);
7488 if ((type && POINTER_BOUNDS_TYPE_P (type))
7489 || POINTER_BOUNDS_MODE_P (mode))
7491 /* If we pass bounds in BT then just update remained bounds count. */
7492 if (cum->bnds_in_bt)
7494 cum->bnds_in_bt--;
7495 return;
7498 /* Update remained number of bounds to force. */
7499 if (cum->force_bnd_pass)
7500 cum->force_bnd_pass--;
7502 cum->bnd_regno++;
7504 return;
7507 /* The first arg not going to Bounds Tables resets this counter. */
7508 cum->bnds_in_bt = 0;
7509 /* For unnamed args we always pass bounds to avoid bounds mess when
7510 passed and received types do not match. If bounds do not follow
7511 unnamed arg, still pretend required number of bounds were passed. */
7512 if (cum->force_bnd_pass)
7514 cum->bnd_regno += cum->force_bnd_pass;
7515 cum->force_bnd_pass = 0;
7518 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7519 nregs = function_arg_advance_ms_64 (cum, bytes, words);
7520 else if (TARGET_64BIT)
7521 nregs = function_arg_advance_64 (cum, mode, type, words, named);
7522 else
7523 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
7525 /* For stdarg we expect bounds to be passed for each value passed
7526 in register. */
7527 if (cum->stdarg)
7528 cum->force_bnd_pass = nregs;
7529 /* For pointers passed in memory we expect bounds passed in Bounds
7530 Table. */
7531 if (!nregs)
7532 cum->bnds_in_bt = chkp_type_bounds_count (type);
7535 /* Define where to put the arguments to a function.
7536 Value is zero to push the argument on the stack,
7537 or a hard register in which to store the argument.
7539 MODE is the argument's machine mode.
7540 TYPE is the data type of the argument (as a tree).
7541 This is null for libcalls where that information may
7542 not be available.
7543 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7544 the preceding args and about the function being called.
7545 NAMED is nonzero if this argument is a named parameter
7546 (otherwise it is an extra parameter matching an ellipsis). */
7548 static rtx
7549 function_arg_32 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7550 machine_mode orig_mode, const_tree type,
7551 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7553 /* Avoid the AL settings for the Unix64 ABI. */
7554 if (mode == VOIDmode)
7555 return constm1_rtx;
7557 switch (mode)
7559 default:
7560 break;
7562 case BLKmode:
7563 if (bytes < 0)
7564 break;
7565 /* FALLTHRU */
7566 case DImode:
7567 case SImode:
7568 case HImode:
7569 case QImode:
7570 if (words <= cum->nregs)
7572 int regno = cum->regno;
7574 /* Fastcall allocates the first two DWORD (SImode) or
7575 smaller arguments to ECX and EDX if it isn't an
7576 aggregate type . */
7577 if (cum->fastcall)
7579 if (mode == BLKmode
7580 || mode == DImode
7581 || (type && AGGREGATE_TYPE_P (type)))
7582 break;
7584 /* ECX not EAX is the first allocated register. */
7585 if (regno == AX_REG)
7586 regno = CX_REG;
7588 return gen_rtx_REG (mode, regno);
7590 break;
7592 case DFmode:
7593 if (cum->float_in_sse < 2)
7594 break;
7595 case SFmode:
7596 if (cum->float_in_sse < 1)
7597 break;
7598 /* FALLTHRU */
7599 case TImode:
7600 /* In 32bit, we pass TImode in xmm registers. */
7601 case V16QImode:
7602 case V8HImode:
7603 case V4SImode:
7604 case V2DImode:
7605 case V4SFmode:
7606 case V2DFmode:
7607 if (!type || !AGGREGATE_TYPE_P (type))
7609 if (cum->sse_nregs)
7610 return gen_reg_or_parallel (mode, orig_mode,
7611 cum->sse_regno + FIRST_SSE_REG);
7613 break;
7615 case OImode:
7616 case XImode:
7617 /* OImode and XImode shouldn't be used directly. */
7618 gcc_unreachable ();
7620 case V64QImode:
7621 case V32HImode:
7622 case V16SImode:
7623 case V8DImode:
7624 case V16SFmode:
7625 case V8DFmode:
7626 case V8SFmode:
7627 case V8SImode:
7628 case V32QImode:
7629 case V16HImode:
7630 case V4DFmode:
7631 case V4DImode:
7632 if (!type || !AGGREGATE_TYPE_P (type))
7634 if (cum->sse_nregs)
7635 return gen_reg_or_parallel (mode, orig_mode,
7636 cum->sse_regno + FIRST_SSE_REG);
7638 break;
7640 case V8QImode:
7641 case V4HImode:
7642 case V2SImode:
7643 case V2SFmode:
7644 case V1TImode:
7645 case V1DImode:
7646 if (!type || !AGGREGATE_TYPE_P (type))
7648 if (cum->mmx_nregs)
7649 return gen_reg_or_parallel (mode, orig_mode,
7650 cum->mmx_regno + FIRST_MMX_REG);
7652 break;
7655 return NULL_RTX;
7658 static rtx
7659 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7660 machine_mode orig_mode, const_tree type, bool named)
7662 /* Handle a hidden AL argument containing number of registers
7663 for varargs x86-64 functions. */
7664 if (mode == VOIDmode)
7665 return GEN_INT (cum->maybe_vaarg
7666 ? (cum->sse_nregs < 0
7667 ? X86_64_SSE_REGPARM_MAX
7668 : cum->sse_regno)
7669 : -1);
7671 switch (mode)
7673 default:
7674 break;
7676 case V8SFmode:
7677 case V8SImode:
7678 case V32QImode:
7679 case V16HImode:
7680 case V4DFmode:
7681 case V4DImode:
7682 case V16SFmode:
7683 case V16SImode:
7684 case V64QImode:
7685 case V32HImode:
7686 case V8DFmode:
7687 case V8DImode:
7688 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7689 if (!named)
7690 return NULL;
7691 break;
7694 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7695 cum->sse_nregs,
7696 &x86_64_int_parameter_registers [cum->regno],
7697 cum->sse_regno);
7700 static rtx
7701 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7702 machine_mode orig_mode, bool named,
7703 HOST_WIDE_INT bytes)
7705 unsigned int regno;
7707 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7708 We use value of -2 to specify that current function call is MSABI. */
7709 if (mode == VOIDmode)
7710 return GEN_INT (-2);
7712 /* If we've run out of registers, it goes on the stack. */
7713 if (cum->nregs == 0)
7714 return NULL_RTX;
7716 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7718 /* Only floating point modes are passed in anything but integer regs. */
7719 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7721 if (named)
7722 regno = cum->regno + FIRST_SSE_REG;
7723 else
7725 rtx t1, t2;
7727 /* Unnamed floating parameters are passed in both the
7728 SSE and integer registers. */
7729 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7730 t2 = gen_rtx_REG (mode, regno);
7731 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7732 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7733 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7736 /* Handle aggregated types passed in register. */
7737 if (orig_mode == BLKmode)
7739 if (bytes > 0 && bytes <= 8)
7740 mode = (bytes > 4 ? DImode : SImode);
7741 if (mode == BLKmode)
7742 mode = DImode;
7745 return gen_reg_or_parallel (mode, orig_mode, regno);
7748 /* Return where to put the arguments to a function.
7749 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7751 MODE is the argument's machine mode. TYPE is the data type of the
7752 argument. It is null for libcalls where that information may not be
7753 available. CUM gives information about the preceding args and about
7754 the function being called. NAMED is nonzero if this argument is a
7755 named parameter (otherwise it is an extra parameter matching an
7756 ellipsis). */
7758 static rtx
7759 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
7760 const_tree type, bool named)
7762 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7763 machine_mode mode = omode;
7764 HOST_WIDE_INT bytes, words;
7765 rtx arg;
7767 /* All pointer bounds argumntas are handled separately here. */
7768 if ((type && POINTER_BOUNDS_TYPE_P (type))
7769 || POINTER_BOUNDS_MODE_P (mode))
7771 /* Return NULL if bounds are forced to go in Bounds Table. */
7772 if (cum->bnds_in_bt)
7773 arg = NULL;
7774 /* Return the next available bound reg if any. */
7775 else if (cum->bnd_regno <= LAST_BND_REG)
7776 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
7777 /* Return the next special slot number otherwise. */
7778 else
7779 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
7781 return arg;
7784 if (mode == BLKmode)
7785 bytes = int_size_in_bytes (type);
7786 else
7787 bytes = GET_MODE_SIZE (mode);
7788 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7790 /* To simplify the code below, represent vector types with a vector mode
7791 even if MMX/SSE are not active. */
7792 if (type && TREE_CODE (type) == VECTOR_TYPE)
7793 mode = type_natural_mode (type, cum, false);
7795 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7796 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7797 else if (TARGET_64BIT)
7798 arg = function_arg_64 (cum, mode, omode, type, named);
7799 else
7800 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7802 return arg;
7805 /* A C expression that indicates when an argument must be passed by
7806 reference. If nonzero for an argument, a copy of that argument is
7807 made in memory and a pointer to the argument is passed instead of
7808 the argument itself. The pointer is passed in whatever way is
7809 appropriate for passing a pointer to that type. */
7811 static bool
7812 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7813 const_tree type, bool)
7815 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7817 /* See Windows x64 Software Convention. */
7818 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7820 int msize = (int) GET_MODE_SIZE (mode);
7821 if (type)
7823 /* Arrays are passed by reference. */
7824 if (TREE_CODE (type) == ARRAY_TYPE)
7825 return true;
7827 if (AGGREGATE_TYPE_P (type))
7829 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7830 are passed by reference. */
7831 msize = int_size_in_bytes (type);
7835 /* __m128 is passed by reference. */
7836 switch (msize) {
7837 case 1: case 2: case 4: case 8:
7838 break;
7839 default:
7840 return true;
7843 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7844 return 1;
7846 return 0;
7849 /* Return true when TYPE should be 128bit aligned for 32bit argument
7850 passing ABI. XXX: This function is obsolete and is only used for
7851 checking psABI compatibility with previous versions of GCC. */
7853 static bool
7854 ix86_compat_aligned_value_p (const_tree type)
7856 machine_mode mode = TYPE_MODE (type);
7857 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7858 || mode == TDmode
7859 || mode == TFmode
7860 || mode == TCmode)
7861 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7862 return true;
7863 if (TYPE_ALIGN (type) < 128)
7864 return false;
7866 if (AGGREGATE_TYPE_P (type))
7868 /* Walk the aggregates recursively. */
7869 switch (TREE_CODE (type))
7871 case RECORD_TYPE:
7872 case UNION_TYPE:
7873 case QUAL_UNION_TYPE:
7875 tree field;
7877 /* Walk all the structure fields. */
7878 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7880 if (TREE_CODE (field) == FIELD_DECL
7881 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7882 return true;
7884 break;
7887 case ARRAY_TYPE:
7888 /* Just for use if some languages passes arrays by value. */
7889 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7890 return true;
7891 break;
7893 default:
7894 gcc_unreachable ();
7897 return false;
7900 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7901 XXX: This function is obsolete and is only used for checking psABI
7902 compatibility with previous versions of GCC. */
7904 static unsigned int
7905 ix86_compat_function_arg_boundary (machine_mode mode,
7906 const_tree type, unsigned int align)
7908 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7909 natural boundaries. */
7910 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7912 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7913 make an exception for SSE modes since these require 128bit
7914 alignment.
7916 The handling here differs from field_alignment. ICC aligns MMX
7917 arguments to 4 byte boundaries, while structure fields are aligned
7918 to 8 byte boundaries. */
7919 if (!type)
7921 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7922 align = PARM_BOUNDARY;
7924 else
7926 if (!ix86_compat_aligned_value_p (type))
7927 align = PARM_BOUNDARY;
7930 if (align > BIGGEST_ALIGNMENT)
7931 align = BIGGEST_ALIGNMENT;
7932 return align;
7935 /* Return true when TYPE should be 128bit aligned for 32bit argument
7936 passing ABI. */
7938 static bool
7939 ix86_contains_aligned_value_p (const_tree type)
7941 machine_mode mode = TYPE_MODE (type);
7943 if (mode == XFmode || mode == XCmode)
7944 return false;
7946 if (TYPE_ALIGN (type) < 128)
7947 return false;
7949 if (AGGREGATE_TYPE_P (type))
7951 /* Walk the aggregates recursively. */
7952 switch (TREE_CODE (type))
7954 case RECORD_TYPE:
7955 case UNION_TYPE:
7956 case QUAL_UNION_TYPE:
7958 tree field;
7960 /* Walk all the structure fields. */
7961 for (field = TYPE_FIELDS (type);
7962 field;
7963 field = DECL_CHAIN (field))
7965 if (TREE_CODE (field) == FIELD_DECL
7966 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7967 return true;
7969 break;
7972 case ARRAY_TYPE:
7973 /* Just for use if some languages passes arrays by value. */
7974 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7975 return true;
7976 break;
7978 default:
7979 gcc_unreachable ();
7982 else
7983 return TYPE_ALIGN (type) >= 128;
7985 return false;
7988 /* Gives the alignment boundary, in bits, of an argument with the
7989 specified mode and type. */
7991 static unsigned int
7992 ix86_function_arg_boundary (machine_mode mode, const_tree type)
7994 unsigned int align;
7995 if (type)
7997 /* Since the main variant type is used for call, we convert it to
7998 the main variant type. */
7999 type = TYPE_MAIN_VARIANT (type);
8000 align = TYPE_ALIGN (type);
8002 else
8003 align = GET_MODE_ALIGNMENT (mode);
8004 if (align < PARM_BOUNDARY)
8005 align = PARM_BOUNDARY;
8006 else
8008 static bool warned;
8009 unsigned int saved_align = align;
8011 if (!TARGET_64BIT)
8013 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8014 if (!type)
8016 if (mode == XFmode || mode == XCmode)
8017 align = PARM_BOUNDARY;
8019 else if (!ix86_contains_aligned_value_p (type))
8020 align = PARM_BOUNDARY;
8022 if (align < 128)
8023 align = PARM_BOUNDARY;
8026 if (warn_psabi
8027 && !warned
8028 && align != ix86_compat_function_arg_boundary (mode, type,
8029 saved_align))
8031 warned = true;
8032 inform (input_location,
8033 "The ABI for passing parameters with %d-byte"
8034 " alignment has changed in GCC 4.6",
8035 align / BITS_PER_UNIT);
8039 return align;
8042 /* Return true if N is a possible register number of function value. */
8044 static bool
8045 ix86_function_value_regno_p (const unsigned int regno)
8047 switch (regno)
8049 case AX_REG:
8050 return true;
8051 case DX_REG:
8052 return (!TARGET_64BIT || ix86_abi != MS_ABI);
8053 case DI_REG:
8054 case SI_REG:
8055 return TARGET_64BIT && ix86_abi != MS_ABI;
8057 case FIRST_BND_REG:
8058 return chkp_function_instrumented_p (current_function_decl);
8060 /* Complex values are returned in %st(0)/%st(1) pair. */
8061 case ST0_REG:
8062 case ST1_REG:
8063 /* TODO: The function should depend on current function ABI but
8064 builtins.c would need updating then. Therefore we use the
8065 default ABI. */
8066 if (TARGET_64BIT && ix86_abi == MS_ABI)
8067 return false;
8068 return TARGET_FLOAT_RETURNS_IN_80387;
8070 /* Complex values are returned in %xmm0/%xmm1 pair. */
8071 case XMM0_REG:
8072 case XMM1_REG:
8073 return TARGET_SSE;
8075 case MM0_REG:
8076 if (TARGET_MACHO || TARGET_64BIT)
8077 return false;
8078 return TARGET_MMX;
8081 return false;
8084 /* Define how to find the value returned by a function.
8085 VALTYPE is the data type of the value (as a tree).
8086 If the precise function being called is known, FUNC is its FUNCTION_DECL;
8087 otherwise, FUNC is 0. */
8089 static rtx
8090 function_value_32 (machine_mode orig_mode, machine_mode mode,
8091 const_tree fntype, const_tree fn)
8093 unsigned int regno;
8095 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8096 we normally prevent this case when mmx is not available. However
8097 some ABIs may require the result to be returned like DImode. */
8098 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8099 regno = FIRST_MMX_REG;
8101 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
8102 we prevent this case when sse is not available. However some ABIs
8103 may require the result to be returned like integer TImode. */
8104 else if (mode == TImode
8105 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8106 regno = FIRST_SSE_REG;
8108 /* 32-byte vector modes in %ymm0. */
8109 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8110 regno = FIRST_SSE_REG;
8112 /* 64-byte vector modes in %zmm0. */
8113 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8114 regno = FIRST_SSE_REG;
8116 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
8117 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8118 regno = FIRST_FLOAT_REG;
8119 else
8120 /* Most things go in %eax. */
8121 regno = AX_REG;
8123 /* Override FP return register with %xmm0 for local functions when
8124 SSE math is enabled or for functions with sseregparm attribute. */
8125 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8127 int sse_level = ix86_function_sseregparm (fntype, fn, false);
8128 if ((sse_level >= 1 && mode == SFmode)
8129 || (sse_level == 2 && mode == DFmode))
8130 regno = FIRST_SSE_REG;
8133 /* OImode shouldn't be used directly. */
8134 gcc_assert (mode != OImode);
8136 return gen_rtx_REG (orig_mode, regno);
8139 static rtx
8140 function_value_64 (machine_mode orig_mode, machine_mode mode,
8141 const_tree valtype)
8143 rtx ret;
8145 /* Handle libcalls, which don't provide a type node. */
8146 if (valtype == NULL)
8148 unsigned int regno;
8150 switch (mode)
8152 case SFmode:
8153 case SCmode:
8154 case DFmode:
8155 case DCmode:
8156 case TFmode:
8157 case SDmode:
8158 case DDmode:
8159 case TDmode:
8160 regno = FIRST_SSE_REG;
8161 break;
8162 case XFmode:
8163 case XCmode:
8164 regno = FIRST_FLOAT_REG;
8165 break;
8166 case TCmode:
8167 return NULL;
8168 default:
8169 regno = AX_REG;
8172 return gen_rtx_REG (mode, regno);
8174 else if (POINTER_TYPE_P (valtype))
8176 /* Pointers are always returned in word_mode. */
8177 mode = word_mode;
8180 ret = construct_container (mode, orig_mode, valtype, 1,
8181 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8182 x86_64_int_return_registers, 0);
8184 /* For zero sized structures, construct_container returns NULL, but we
8185 need to keep rest of compiler happy by returning meaningful value. */
8186 if (!ret)
8187 ret = gen_rtx_REG (orig_mode, AX_REG);
8189 return ret;
8192 static rtx
8193 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
8194 const_tree valtype)
8196 unsigned int regno = AX_REG;
8198 if (TARGET_SSE)
8200 switch (GET_MODE_SIZE (mode))
8202 case 16:
8203 if (valtype != NULL_TREE
8204 && !VECTOR_INTEGER_TYPE_P (valtype)
8205 && !VECTOR_INTEGER_TYPE_P (valtype)
8206 && !INTEGRAL_TYPE_P (valtype)
8207 && !VECTOR_FLOAT_TYPE_P (valtype))
8208 break;
8209 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8210 && !COMPLEX_MODE_P (mode))
8211 regno = FIRST_SSE_REG;
8212 break;
8213 case 8:
8214 case 4:
8215 if (mode == SFmode || mode == DFmode)
8216 regno = FIRST_SSE_REG;
8217 break;
8218 default:
8219 break;
8222 return gen_rtx_REG (orig_mode, regno);
8225 static rtx
8226 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8227 machine_mode orig_mode, machine_mode mode)
8229 const_tree fn, fntype;
8231 fn = NULL_TREE;
8232 if (fntype_or_decl && DECL_P (fntype_or_decl))
8233 fn = fntype_or_decl;
8234 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8236 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8237 || POINTER_BOUNDS_MODE_P (mode))
8238 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8239 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8240 return function_value_ms_64 (orig_mode, mode, valtype);
8241 else if (TARGET_64BIT)
8242 return function_value_64 (orig_mode, mode, valtype);
8243 else
8244 return function_value_32 (orig_mode, mode, fntype, fn);
8247 static rtx
8248 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8250 machine_mode mode, orig_mode;
8252 orig_mode = TYPE_MODE (valtype);
8253 mode = type_natural_mode (valtype, NULL, true);
8254 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8257 /* Return an RTX representing a place where a function returns
8258 or recieves pointer bounds or NULL if no bounds are returned.
8260 VALTYPE is a data type of a value returned by the function.
8262 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
8263 or FUNCTION_TYPE of the function.
8265 If OUTGOING is false, return a place in which the caller will
8266 see the return value. Otherwise, return a place where a
8267 function returns a value. */
8269 static rtx
8270 ix86_function_value_bounds (const_tree valtype,
8271 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
8272 bool outgoing ATTRIBUTE_UNUSED)
8274 rtx res = NULL_RTX;
8276 if (BOUNDED_TYPE_P (valtype))
8277 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
8278 else if (chkp_type_has_pointer (valtype))
8280 bitmap slots;
8281 rtx bounds[2];
8282 bitmap_iterator bi;
8283 unsigned i, bnd_no = 0;
8285 bitmap_obstack_initialize (NULL);
8286 slots = BITMAP_ALLOC (NULL);
8287 chkp_find_bound_slots (valtype, slots);
8289 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
8291 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
8292 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
8293 gcc_assert (bnd_no < 2);
8294 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
8297 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
8299 BITMAP_FREE (slots);
8300 bitmap_obstack_release (NULL);
8302 else
8303 res = NULL_RTX;
8305 return res;
8308 /* Pointer function arguments and return values are promoted to
8309 word_mode. */
8311 static machine_mode
8312 ix86_promote_function_mode (const_tree type, machine_mode mode,
8313 int *punsignedp, const_tree fntype,
8314 int for_return)
8316 if (type != NULL_TREE && POINTER_TYPE_P (type))
8318 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8319 return word_mode;
8321 return default_promote_function_mode (type, mode, punsignedp, fntype,
8322 for_return);
8325 /* Return true if a structure, union or array with MODE containing FIELD
8326 should be accessed using BLKmode. */
8328 static bool
8329 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
8331 /* Union with XFmode must be in BLKmode. */
8332 return (mode == XFmode
8333 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8334 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8338 ix86_libcall_value (machine_mode mode)
8340 return ix86_function_value_1 (NULL, NULL, mode, mode);
8343 /* Return true iff type is returned in memory. */
8345 static bool
8346 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8348 #ifdef SUBTARGET_RETURN_IN_MEMORY
8349 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8350 #else
8351 const machine_mode mode = type_natural_mode (type, NULL, true);
8352 HOST_WIDE_INT size;
8354 if (POINTER_BOUNDS_TYPE_P (type))
8355 return false;
8357 if (TARGET_64BIT)
8359 if (ix86_function_type_abi (fntype) == MS_ABI)
8361 size = int_size_in_bytes (type);
8363 /* __m128 is returned in xmm0. */
8364 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8365 || INTEGRAL_TYPE_P (type)
8366 || VECTOR_FLOAT_TYPE_P (type))
8367 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8368 && !COMPLEX_MODE_P (mode)
8369 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8370 return false;
8372 /* Otherwise, the size must be exactly in [1248]. */
8373 return size != 1 && size != 2 && size != 4 && size != 8;
8375 else
8377 int needed_intregs, needed_sseregs;
8379 return examine_argument (mode, type, 1,
8380 &needed_intregs, &needed_sseregs);
8383 else
8385 if (mode == BLKmode)
8386 return true;
8388 size = int_size_in_bytes (type);
8390 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8391 return false;
8393 if (VECTOR_MODE_P (mode) || mode == TImode)
8395 /* User-created vectors small enough to fit in EAX. */
8396 if (size < 8)
8397 return false;
8399 /* Unless ABI prescibes otherwise,
8400 MMX/3dNow values are returned in MM0 if available. */
8402 if (size == 8)
8403 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8405 /* SSE values are returned in XMM0 if available. */
8406 if (size == 16)
8407 return !TARGET_SSE;
8409 /* AVX values are returned in YMM0 if available. */
8410 if (size == 32)
8411 return !TARGET_AVX;
8413 /* AVX512F values are returned in ZMM0 if available. */
8414 if (size == 64)
8415 return !TARGET_AVX512F;
8418 if (mode == XFmode)
8419 return false;
8421 if (size > 12)
8422 return true;
8424 /* OImode shouldn't be used directly. */
8425 gcc_assert (mode != OImode);
8427 return false;
8429 #endif
8433 /* Create the va_list data type. */
8435 /* Returns the calling convention specific va_list date type.
8436 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8438 static tree
8439 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8441 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8443 /* For i386 we use plain pointer to argument area. */
8444 if (!TARGET_64BIT || abi == MS_ABI)
8445 return build_pointer_type (char_type_node);
8447 record = lang_hooks.types.make_type (RECORD_TYPE);
8448 type_decl = build_decl (BUILTINS_LOCATION,
8449 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8451 f_gpr = build_decl (BUILTINS_LOCATION,
8452 FIELD_DECL, get_identifier ("gp_offset"),
8453 unsigned_type_node);
8454 f_fpr = build_decl (BUILTINS_LOCATION,
8455 FIELD_DECL, get_identifier ("fp_offset"),
8456 unsigned_type_node);
8457 f_ovf = build_decl (BUILTINS_LOCATION,
8458 FIELD_DECL, get_identifier ("overflow_arg_area"),
8459 ptr_type_node);
8460 f_sav = build_decl (BUILTINS_LOCATION,
8461 FIELD_DECL, get_identifier ("reg_save_area"),
8462 ptr_type_node);
8464 va_list_gpr_counter_field = f_gpr;
8465 va_list_fpr_counter_field = f_fpr;
8467 DECL_FIELD_CONTEXT (f_gpr) = record;
8468 DECL_FIELD_CONTEXT (f_fpr) = record;
8469 DECL_FIELD_CONTEXT (f_ovf) = record;
8470 DECL_FIELD_CONTEXT (f_sav) = record;
8472 TYPE_STUB_DECL (record) = type_decl;
8473 TYPE_NAME (record) = type_decl;
8474 TYPE_FIELDS (record) = f_gpr;
8475 DECL_CHAIN (f_gpr) = f_fpr;
8476 DECL_CHAIN (f_fpr) = f_ovf;
8477 DECL_CHAIN (f_ovf) = f_sav;
8479 layout_type (record);
8481 /* The correct type is an array type of one element. */
8482 return build_array_type (record, build_index_type (size_zero_node));
8485 /* Setup the builtin va_list data type and for 64-bit the additional
8486 calling convention specific va_list data types. */
8488 static tree
8489 ix86_build_builtin_va_list (void)
8491 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8493 /* Initialize abi specific va_list builtin types. */
8494 if (TARGET_64BIT)
8496 tree t;
8497 if (ix86_abi == MS_ABI)
8499 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8500 if (TREE_CODE (t) != RECORD_TYPE)
8501 t = build_variant_type_copy (t);
8502 sysv_va_list_type_node = t;
8504 else
8506 t = ret;
8507 if (TREE_CODE (t) != RECORD_TYPE)
8508 t = build_variant_type_copy (t);
8509 sysv_va_list_type_node = t;
8511 if (ix86_abi != MS_ABI)
8513 t = ix86_build_builtin_va_list_abi (MS_ABI);
8514 if (TREE_CODE (t) != RECORD_TYPE)
8515 t = build_variant_type_copy (t);
8516 ms_va_list_type_node = t;
8518 else
8520 t = ret;
8521 if (TREE_CODE (t) != RECORD_TYPE)
8522 t = build_variant_type_copy (t);
8523 ms_va_list_type_node = t;
8527 return ret;
8530 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8532 static void
8533 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8535 rtx save_area, mem;
8536 alias_set_type set;
8537 int i, max;
8539 /* GPR size of varargs save area. */
8540 if (cfun->va_list_gpr_size)
8541 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8542 else
8543 ix86_varargs_gpr_size = 0;
8545 /* FPR size of varargs save area. We don't need it if we don't pass
8546 anything in SSE registers. */
8547 if (TARGET_SSE && cfun->va_list_fpr_size)
8548 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8549 else
8550 ix86_varargs_fpr_size = 0;
8552 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8553 return;
8555 save_area = frame_pointer_rtx;
8556 set = get_varargs_alias_set ();
8558 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8559 if (max > X86_64_REGPARM_MAX)
8560 max = X86_64_REGPARM_MAX;
8562 for (i = cum->regno; i < max; i++)
8564 mem = gen_rtx_MEM (word_mode,
8565 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8566 MEM_NOTRAP_P (mem) = 1;
8567 set_mem_alias_set (mem, set);
8568 emit_move_insn (mem,
8569 gen_rtx_REG (word_mode,
8570 x86_64_int_parameter_registers[i]));
8573 if (ix86_varargs_fpr_size)
8575 machine_mode smode;
8576 rtx_code_label *label;
8577 rtx test;
8579 /* Now emit code to save SSE registers. The AX parameter contains number
8580 of SSE parameter registers used to call this function, though all we
8581 actually check here is the zero/non-zero status. */
8583 label = gen_label_rtx ();
8584 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8585 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8586 label));
8588 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8589 we used movdqa (i.e. TImode) instead? Perhaps even better would
8590 be if we could determine the real mode of the data, via a hook
8591 into pass_stdarg. Ignore all that for now. */
8592 smode = V4SFmode;
8593 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8594 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8596 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8597 if (max > X86_64_SSE_REGPARM_MAX)
8598 max = X86_64_SSE_REGPARM_MAX;
8600 for (i = cum->sse_regno; i < max; ++i)
8602 mem = plus_constant (Pmode, save_area,
8603 i * 16 + ix86_varargs_gpr_size);
8604 mem = gen_rtx_MEM (smode, mem);
8605 MEM_NOTRAP_P (mem) = 1;
8606 set_mem_alias_set (mem, set);
8607 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8609 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8612 emit_label (label);
8616 static void
8617 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8619 alias_set_type set = get_varargs_alias_set ();
8620 int i;
8622 /* Reset to zero, as there might be a sysv vaarg used
8623 before. */
8624 ix86_varargs_gpr_size = 0;
8625 ix86_varargs_fpr_size = 0;
8627 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8629 rtx reg, mem;
8631 mem = gen_rtx_MEM (Pmode,
8632 plus_constant (Pmode, virtual_incoming_args_rtx,
8633 i * UNITS_PER_WORD));
8634 MEM_NOTRAP_P (mem) = 1;
8635 set_mem_alias_set (mem, set);
8637 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8638 emit_move_insn (mem, reg);
8642 static void
8643 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
8644 tree type, int *, int no_rtl)
8646 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8647 CUMULATIVE_ARGS next_cum;
8648 tree fntype;
8650 /* This argument doesn't appear to be used anymore. Which is good,
8651 because the old code here didn't suppress rtl generation. */
8652 gcc_assert (!no_rtl);
8654 if (!TARGET_64BIT)
8655 return;
8657 fntype = TREE_TYPE (current_function_decl);
8659 /* For varargs, we do not want to skip the dummy va_dcl argument.
8660 For stdargs, we do want to skip the last named argument. */
8661 next_cum = *cum;
8662 if (stdarg_p (fntype))
8663 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8664 true);
8666 if (cum->call_abi == MS_ABI)
8667 setup_incoming_varargs_ms_64 (&next_cum);
8668 else
8669 setup_incoming_varargs_64 (&next_cum);
8672 static void
8673 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
8674 enum machine_mode mode,
8675 tree type,
8676 int *pretend_size ATTRIBUTE_UNUSED,
8677 int no_rtl)
8679 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8680 CUMULATIVE_ARGS next_cum;
8681 tree fntype;
8682 rtx save_area;
8683 int bnd_reg, i, max;
8685 gcc_assert (!no_rtl);
8687 /* Do nothing if we use plain pointer to argument area. */
8688 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
8689 return;
8691 fntype = TREE_TYPE (current_function_decl);
8693 /* For varargs, we do not want to skip the dummy va_dcl argument.
8694 For stdargs, we do want to skip the last named argument. */
8695 next_cum = *cum;
8696 if (stdarg_p (fntype))
8697 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8698 true);
8699 save_area = frame_pointer_rtx;
8701 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8702 if (max > X86_64_REGPARM_MAX)
8703 max = X86_64_REGPARM_MAX;
8705 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
8706 if (chkp_function_instrumented_p (current_function_decl))
8707 for (i = cum->regno; i < max; i++)
8709 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
8710 rtx reg = gen_rtx_REG (DImode,
8711 x86_64_int_parameter_registers[i]);
8712 rtx ptr = reg;
8713 rtx bounds;
8715 if (bnd_reg <= LAST_BND_REG)
8716 bounds = gen_rtx_REG (BNDmode, bnd_reg);
8717 else
8719 rtx ldx_addr =
8720 plus_constant (Pmode, arg_pointer_rtx,
8721 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
8722 bounds = gen_reg_rtx (BNDmode);
8723 emit_insn (BNDmode == BND64mode
8724 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
8725 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
8728 emit_insn (BNDmode == BND64mode
8729 ? gen_bnd64_stx (addr, ptr, bounds)
8730 : gen_bnd32_stx (addr, ptr, bounds));
8732 bnd_reg++;
8737 /* Checks if TYPE is of kind va_list char *. */
8739 static bool
8740 is_va_list_char_pointer (tree type)
8742 tree canonic;
8744 /* For 32-bit it is always true. */
8745 if (!TARGET_64BIT)
8746 return true;
8747 canonic = ix86_canonical_va_list_type (type);
8748 return (canonic == ms_va_list_type_node
8749 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8752 /* Implement va_start. */
8754 static void
8755 ix86_va_start (tree valist, rtx nextarg)
8757 HOST_WIDE_INT words, n_gpr, n_fpr;
8758 tree f_gpr, f_fpr, f_ovf, f_sav;
8759 tree gpr, fpr, ovf, sav, t;
8760 tree type;
8761 rtx ovf_rtx;
8763 if (flag_split_stack
8764 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8766 unsigned int scratch_regno;
8768 /* When we are splitting the stack, we can't refer to the stack
8769 arguments using internal_arg_pointer, because they may be on
8770 the old stack. The split stack prologue will arrange to
8771 leave a pointer to the old stack arguments in a scratch
8772 register, which we here copy to a pseudo-register. The split
8773 stack prologue can't set the pseudo-register directly because
8774 it (the prologue) runs before any registers have been saved. */
8776 scratch_regno = split_stack_prologue_scratch_regno ();
8777 if (scratch_regno != INVALID_REGNUM)
8779 rtx reg;
8780 rtx_insn *seq;
8782 reg = gen_reg_rtx (Pmode);
8783 cfun->machine->split_stack_varargs_pointer = reg;
8785 start_sequence ();
8786 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8787 seq = get_insns ();
8788 end_sequence ();
8790 push_topmost_sequence ();
8791 emit_insn_after (seq, entry_of_function ());
8792 pop_topmost_sequence ();
8796 /* Only 64bit target needs something special. */
8797 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8799 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8800 std_expand_builtin_va_start (valist, nextarg);
8801 else
8803 rtx va_r, next;
8805 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8806 next = expand_binop (ptr_mode, add_optab,
8807 cfun->machine->split_stack_varargs_pointer,
8808 crtl->args.arg_offset_rtx,
8809 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8810 convert_move (va_r, next, 0);
8812 /* Store zero bounds for va_list. */
8813 if (chkp_function_instrumented_p (current_function_decl))
8814 chkp_expand_bounds_reset_for_mem (valist,
8815 make_tree (TREE_TYPE (valist),
8816 next));
8819 return;
8822 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8823 f_fpr = DECL_CHAIN (f_gpr);
8824 f_ovf = DECL_CHAIN (f_fpr);
8825 f_sav = DECL_CHAIN (f_ovf);
8827 valist = build_simple_mem_ref (valist);
8828 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8829 /* The following should be folded into the MEM_REF offset. */
8830 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8831 f_gpr, NULL_TREE);
8832 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8833 f_fpr, NULL_TREE);
8834 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8835 f_ovf, NULL_TREE);
8836 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8837 f_sav, NULL_TREE);
8839 /* Count number of gp and fp argument registers used. */
8840 words = crtl->args.info.words;
8841 n_gpr = crtl->args.info.regno;
8842 n_fpr = crtl->args.info.sse_regno;
8844 if (cfun->va_list_gpr_size)
8846 type = TREE_TYPE (gpr);
8847 t = build2 (MODIFY_EXPR, type,
8848 gpr, build_int_cst (type, n_gpr * 8));
8849 TREE_SIDE_EFFECTS (t) = 1;
8850 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8853 if (TARGET_SSE && cfun->va_list_fpr_size)
8855 type = TREE_TYPE (fpr);
8856 t = build2 (MODIFY_EXPR, type, fpr,
8857 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8858 TREE_SIDE_EFFECTS (t) = 1;
8859 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8862 /* Find the overflow area. */
8863 type = TREE_TYPE (ovf);
8864 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8865 ovf_rtx = crtl->args.internal_arg_pointer;
8866 else
8867 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8868 t = make_tree (type, ovf_rtx);
8869 if (words != 0)
8870 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8872 /* Store zero bounds for overflow area pointer. */
8873 if (chkp_function_instrumented_p (current_function_decl))
8874 chkp_expand_bounds_reset_for_mem (ovf, t);
8876 t = build2 (MODIFY_EXPR, type, ovf, t);
8877 TREE_SIDE_EFFECTS (t) = 1;
8878 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8880 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8882 /* Find the register save area.
8883 Prologue of the function save it right above stack frame. */
8884 type = TREE_TYPE (sav);
8885 t = make_tree (type, frame_pointer_rtx);
8886 if (!ix86_varargs_gpr_size)
8887 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8889 /* Store zero bounds for save area pointer. */
8890 if (chkp_function_instrumented_p (current_function_decl))
8891 chkp_expand_bounds_reset_for_mem (sav, t);
8893 t = build2 (MODIFY_EXPR, type, sav, t);
8894 TREE_SIDE_EFFECTS (t) = 1;
8895 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8899 /* Implement va_arg. */
8901 static tree
8902 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8903 gimple_seq *post_p)
8905 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8906 tree f_gpr, f_fpr, f_ovf, f_sav;
8907 tree gpr, fpr, ovf, sav, t;
8908 int size, rsize;
8909 tree lab_false, lab_over = NULL_TREE;
8910 tree addr, t2;
8911 rtx container;
8912 int indirect_p = 0;
8913 tree ptrtype;
8914 machine_mode nat_mode;
8915 unsigned int arg_boundary;
8917 /* Only 64bit target needs something special. */
8918 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8919 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8921 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8922 f_fpr = DECL_CHAIN (f_gpr);
8923 f_ovf = DECL_CHAIN (f_fpr);
8924 f_sav = DECL_CHAIN (f_ovf);
8926 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8927 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8928 valist = build_va_arg_indirect_ref (valist);
8929 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8930 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8931 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8933 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8934 if (indirect_p)
8935 type = build_pointer_type (type);
8936 size = int_size_in_bytes (type);
8937 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8939 nat_mode = type_natural_mode (type, NULL, false);
8940 switch (nat_mode)
8942 case V8SFmode:
8943 case V8SImode:
8944 case V32QImode:
8945 case V16HImode:
8946 case V4DFmode:
8947 case V4DImode:
8948 case V16SFmode:
8949 case V16SImode:
8950 case V64QImode:
8951 case V32HImode:
8952 case V8DFmode:
8953 case V8DImode:
8954 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8955 if (!TARGET_64BIT_MS_ABI)
8957 container = NULL;
8958 break;
8961 default:
8962 container = construct_container (nat_mode, TYPE_MODE (type),
8963 type, 0, X86_64_REGPARM_MAX,
8964 X86_64_SSE_REGPARM_MAX, intreg,
8966 break;
8969 /* Pull the value out of the saved registers. */
8971 addr = create_tmp_var (ptr_type_node, "addr");
8973 if (container)
8975 int needed_intregs, needed_sseregs;
8976 bool need_temp;
8977 tree int_addr, sse_addr;
8979 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8980 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8982 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8984 need_temp = (!REG_P (container)
8985 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8986 || TYPE_ALIGN (type) > 128));
8988 /* In case we are passing structure, verify that it is consecutive block
8989 on the register save area. If not we need to do moves. */
8990 if (!need_temp && !REG_P (container))
8992 /* Verify that all registers are strictly consecutive */
8993 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8995 int i;
8997 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8999 rtx slot = XVECEXP (container, 0, i);
9000 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
9001 || INTVAL (XEXP (slot, 1)) != i * 16)
9002 need_temp = 1;
9005 else
9007 int i;
9009 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9011 rtx slot = XVECEXP (container, 0, i);
9012 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9013 || INTVAL (XEXP (slot, 1)) != i * 8)
9014 need_temp = 1;
9018 if (!need_temp)
9020 int_addr = addr;
9021 sse_addr = addr;
9023 else
9025 int_addr = create_tmp_var (ptr_type_node, "int_addr");
9026 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9029 /* First ensure that we fit completely in registers. */
9030 if (needed_intregs)
9032 t = build_int_cst (TREE_TYPE (gpr),
9033 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9034 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9035 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9036 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9037 gimplify_and_add (t, pre_p);
9039 if (needed_sseregs)
9041 t = build_int_cst (TREE_TYPE (fpr),
9042 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9043 + X86_64_REGPARM_MAX * 8);
9044 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9045 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9046 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9047 gimplify_and_add (t, pre_p);
9050 /* Compute index to start of area used for integer regs. */
9051 if (needed_intregs)
9053 /* int_addr = gpr + sav; */
9054 t = fold_build_pointer_plus (sav, gpr);
9055 gimplify_assign (int_addr, t, pre_p);
9057 if (needed_sseregs)
9059 /* sse_addr = fpr + sav; */
9060 t = fold_build_pointer_plus (sav, fpr);
9061 gimplify_assign (sse_addr, t, pre_p);
9063 if (need_temp)
9065 int i, prev_size = 0;
9066 tree temp = create_tmp_var (type, "va_arg_tmp");
9068 /* addr = &temp; */
9069 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9070 gimplify_assign (addr, t, pre_p);
9072 for (i = 0; i < XVECLEN (container, 0); i++)
9074 rtx slot = XVECEXP (container, 0, i);
9075 rtx reg = XEXP (slot, 0);
9076 machine_mode mode = GET_MODE (reg);
9077 tree piece_type;
9078 tree addr_type;
9079 tree daddr_type;
9080 tree src_addr, src;
9081 int src_offset;
9082 tree dest_addr, dest;
9083 int cur_size = GET_MODE_SIZE (mode);
9085 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9086 prev_size = INTVAL (XEXP (slot, 1));
9087 if (prev_size + cur_size > size)
9089 cur_size = size - prev_size;
9090 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
9091 if (mode == BLKmode)
9092 mode = QImode;
9094 piece_type = lang_hooks.types.type_for_mode (mode, 1);
9095 if (mode == GET_MODE (reg))
9096 addr_type = build_pointer_type (piece_type);
9097 else
9098 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9099 true);
9100 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9101 true);
9103 if (SSE_REGNO_P (REGNO (reg)))
9105 src_addr = sse_addr;
9106 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9108 else
9110 src_addr = int_addr;
9111 src_offset = REGNO (reg) * 8;
9113 src_addr = fold_convert (addr_type, src_addr);
9114 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9116 dest_addr = fold_convert (daddr_type, addr);
9117 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9118 if (cur_size == GET_MODE_SIZE (mode))
9120 src = build_va_arg_indirect_ref (src_addr);
9121 dest = build_va_arg_indirect_ref (dest_addr);
9123 gimplify_assign (dest, src, pre_p);
9125 else
9127 tree copy
9128 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9129 3, dest_addr, src_addr,
9130 size_int (cur_size));
9131 gimplify_and_add (copy, pre_p);
9133 prev_size += cur_size;
9137 if (needed_intregs)
9139 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9140 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9141 gimplify_assign (gpr, t, pre_p);
9144 if (needed_sseregs)
9146 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9147 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9148 gimplify_assign (fpr, t, pre_p);
9151 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9153 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9156 /* ... otherwise out of the overflow area. */
9158 /* When we align parameter on stack for caller, if the parameter
9159 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9160 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
9161 here with caller. */
9162 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9163 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9164 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9166 /* Care for on-stack alignment if needed. */
9167 if (arg_boundary <= 64 || size == 0)
9168 t = ovf;
9169 else
9171 HOST_WIDE_INT align = arg_boundary / 8;
9172 t = fold_build_pointer_plus_hwi (ovf, align - 1);
9173 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9174 build_int_cst (TREE_TYPE (t), -align));
9177 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9178 gimplify_assign (addr, t, pre_p);
9180 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9181 gimplify_assign (unshare_expr (ovf), t, pre_p);
9183 if (container)
9184 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9186 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9187 addr = fold_convert (ptrtype, addr);
9189 if (indirect_p)
9190 addr = build_va_arg_indirect_ref (addr);
9191 return build_va_arg_indirect_ref (addr);
9194 /* Return true if OPNUM's MEM should be matched
9195 in movabs* patterns. */
9197 bool
9198 ix86_check_movabs (rtx insn, int opnum)
9200 rtx set, mem;
9202 set = PATTERN (insn);
9203 if (GET_CODE (set) == PARALLEL)
9204 set = XVECEXP (set, 0, 0);
9205 gcc_assert (GET_CODE (set) == SET);
9206 mem = XEXP (set, opnum);
9207 while (GET_CODE (mem) == SUBREG)
9208 mem = SUBREG_REG (mem);
9209 gcc_assert (MEM_P (mem));
9210 return volatile_ok || !MEM_VOLATILE_P (mem);
9213 /* Initialize the table of extra 80387 mathematical constants. */
9215 static void
9216 init_ext_80387_constants (void)
9218 static const char * cst[5] =
9220 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
9221 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
9222 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
9223 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
9224 "3.1415926535897932385128089594061862044", /* 4: fldpi */
9226 int i;
9228 for (i = 0; i < 5; i++)
9230 real_from_string (&ext_80387_constants_table[i], cst[i]);
9231 /* Ensure each constant is rounded to XFmode precision. */
9232 real_convert (&ext_80387_constants_table[i],
9233 XFmode, &ext_80387_constants_table[i]);
9236 ext_80387_constants_init = 1;
9239 /* Return non-zero if the constant is something that
9240 can be loaded with a special instruction. */
9243 standard_80387_constant_p (rtx x)
9245 machine_mode mode = GET_MODE (x);
9247 REAL_VALUE_TYPE r;
9249 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
9250 return -1;
9252 if (x == CONST0_RTX (mode))
9253 return 1;
9254 if (x == CONST1_RTX (mode))
9255 return 2;
9257 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9259 /* For XFmode constants, try to find a special 80387 instruction when
9260 optimizing for size or on those CPUs that benefit from them. */
9261 if (mode == XFmode
9262 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
9264 int i;
9266 if (! ext_80387_constants_init)
9267 init_ext_80387_constants ();
9269 for (i = 0; i < 5; i++)
9270 if (real_identical (&r, &ext_80387_constants_table[i]))
9271 return i + 3;
9274 /* Load of the constant -0.0 or -1.0 will be split as
9275 fldz;fchs or fld1;fchs sequence. */
9276 if (real_isnegzero (&r))
9277 return 8;
9278 if (real_identical (&r, &dconstm1))
9279 return 9;
9281 return 0;
9284 /* Return the opcode of the special instruction to be used to load
9285 the constant X. */
9287 const char *
9288 standard_80387_constant_opcode (rtx x)
9290 switch (standard_80387_constant_p (x))
9292 case 1:
9293 return "fldz";
9294 case 2:
9295 return "fld1";
9296 case 3:
9297 return "fldlg2";
9298 case 4:
9299 return "fldln2";
9300 case 5:
9301 return "fldl2e";
9302 case 6:
9303 return "fldl2t";
9304 case 7:
9305 return "fldpi";
9306 case 8:
9307 case 9:
9308 return "#";
9309 default:
9310 gcc_unreachable ();
9314 /* Return the CONST_DOUBLE representing the 80387 constant that is
9315 loaded by the specified special instruction. The argument IDX
9316 matches the return value from standard_80387_constant_p. */
9319 standard_80387_constant_rtx (int idx)
9321 int i;
9323 if (! ext_80387_constants_init)
9324 init_ext_80387_constants ();
9326 switch (idx)
9328 case 3:
9329 case 4:
9330 case 5:
9331 case 6:
9332 case 7:
9333 i = idx - 3;
9334 break;
9336 default:
9337 gcc_unreachable ();
9340 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9341 XFmode);
9344 /* Return 1 if X is all 0s and 2 if x is all 1s
9345 in supported SSE/AVX vector mode. */
9348 standard_sse_constant_p (rtx x)
9350 machine_mode mode = GET_MODE (x);
9352 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
9353 return 1;
9354 if (vector_all_ones_operand (x, mode))
9355 switch (mode)
9357 case V16QImode:
9358 case V8HImode:
9359 case V4SImode:
9360 case V2DImode:
9361 if (TARGET_SSE2)
9362 return 2;
9363 case V32QImode:
9364 case V16HImode:
9365 case V8SImode:
9366 case V4DImode:
9367 if (TARGET_AVX2)
9368 return 2;
9369 case V64QImode:
9370 case V32HImode:
9371 case V16SImode:
9372 case V8DImode:
9373 if (TARGET_AVX512F)
9374 return 2;
9375 default:
9376 break;
9379 return 0;
9382 /* Return the opcode of the special instruction to be used to load
9383 the constant X. */
9385 const char *
9386 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9388 switch (standard_sse_constant_p (x))
9390 case 1:
9391 switch (get_attr_mode (insn))
9393 case MODE_XI:
9394 return "vpxord\t%g0, %g0, %g0";
9395 case MODE_V16SF:
9396 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9397 : "vpxord\t%g0, %g0, %g0";
9398 case MODE_V8DF:
9399 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9400 : "vpxorq\t%g0, %g0, %g0";
9401 case MODE_TI:
9402 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9403 : "%vpxor\t%0, %d0";
9404 case MODE_V2DF:
9405 return "%vxorpd\t%0, %d0";
9406 case MODE_V4SF:
9407 return "%vxorps\t%0, %d0";
9409 case MODE_OI:
9410 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9411 : "vpxor\t%x0, %x0, %x0";
9412 case MODE_V4DF:
9413 return "vxorpd\t%x0, %x0, %x0";
9414 case MODE_V8SF:
9415 return "vxorps\t%x0, %x0, %x0";
9417 default:
9418 break;
9421 case 2:
9422 if (TARGET_AVX512VL
9423 || get_attr_mode (insn) == MODE_XI
9424 || get_attr_mode (insn) == MODE_V8DF
9425 || get_attr_mode (insn) == MODE_V16SF)
9426 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9427 if (TARGET_AVX)
9428 return "vpcmpeqd\t%0, %0, %0";
9429 else
9430 return "pcmpeqd\t%0, %0";
9432 default:
9433 break;
9435 gcc_unreachable ();
9438 /* Returns true if OP contains a symbol reference */
9440 bool
9441 symbolic_reference_mentioned_p (rtx op)
9443 const char *fmt;
9444 int i;
9446 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9447 return true;
9449 fmt = GET_RTX_FORMAT (GET_CODE (op));
9450 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9452 if (fmt[i] == 'E')
9454 int j;
9456 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9457 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9458 return true;
9461 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9462 return true;
9465 return false;
9468 /* Return true if it is appropriate to emit `ret' instructions in the
9469 body of a function. Do this only if the epilogue is simple, needing a
9470 couple of insns. Prior to reloading, we can't tell how many registers
9471 must be saved, so return false then. Return false if there is no frame
9472 marker to de-allocate. */
9474 bool
9475 ix86_can_use_return_insn_p (void)
9477 struct ix86_frame frame;
9479 if (! reload_completed || frame_pointer_needed)
9480 return 0;
9482 /* Don't allow more than 32k pop, since that's all we can do
9483 with one instruction. */
9484 if (crtl->args.pops_args && crtl->args.size >= 32768)
9485 return 0;
9487 ix86_compute_frame_layout (&frame);
9488 return (frame.stack_pointer_offset == UNITS_PER_WORD
9489 && (frame.nregs + frame.nsseregs) == 0);
9492 /* Value should be nonzero if functions must have frame pointers.
9493 Zero means the frame pointer need not be set up (and parms may
9494 be accessed via the stack pointer) in functions that seem suitable. */
9496 static bool
9497 ix86_frame_pointer_required (void)
9499 /* If we accessed previous frames, then the generated code expects
9500 to be able to access the saved ebp value in our frame. */
9501 if (cfun->machine->accesses_prev_frame)
9502 return true;
9504 /* Several x86 os'es need a frame pointer for other reasons,
9505 usually pertaining to setjmp. */
9506 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9507 return true;
9509 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9510 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9511 return true;
9513 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9514 allocation is 4GB. */
9515 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9516 return true;
9518 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9519 turns off the frame pointer by default. Turn it back on now if
9520 we've not got a leaf function. */
9521 if (TARGET_OMIT_LEAF_FRAME_POINTER
9522 && (!crtl->is_leaf
9523 || ix86_current_function_calls_tls_descriptor))
9524 return true;
9526 if (crtl->profile && !flag_fentry)
9527 return true;
9529 return false;
9532 /* Record that the current function accesses previous call frames. */
9534 void
9535 ix86_setup_frame_addresses (void)
9537 cfun->machine->accesses_prev_frame = 1;
9540 #ifndef USE_HIDDEN_LINKONCE
9541 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9542 # define USE_HIDDEN_LINKONCE 1
9543 # else
9544 # define USE_HIDDEN_LINKONCE 0
9545 # endif
9546 #endif
9548 static int pic_labels_used;
9550 /* Fills in the label name that should be used for a pc thunk for
9551 the given register. */
9553 static void
9554 get_pc_thunk_name (char name[32], unsigned int regno)
9556 gcc_assert (!TARGET_64BIT);
9558 if (USE_HIDDEN_LINKONCE)
9559 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9560 else
9561 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9565 /* This function generates code for -fpic that loads %ebx with
9566 the return address of the caller and then returns. */
9568 static void
9569 ix86_code_end (void)
9571 rtx xops[2];
9572 int regno;
9574 for (regno = AX_REG; regno <= SP_REG; regno++)
9576 char name[32];
9577 tree decl;
9579 if (!(pic_labels_used & (1 << regno)))
9580 continue;
9582 get_pc_thunk_name (name, regno);
9584 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9585 get_identifier (name),
9586 build_function_type_list (void_type_node, NULL_TREE));
9587 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9588 NULL_TREE, void_type_node);
9589 TREE_PUBLIC (decl) = 1;
9590 TREE_STATIC (decl) = 1;
9591 DECL_IGNORED_P (decl) = 1;
9593 #if TARGET_MACHO
9594 if (TARGET_MACHO)
9596 switch_to_section (darwin_sections[text_coal_section]);
9597 fputs ("\t.weak_definition\t", asm_out_file);
9598 assemble_name (asm_out_file, name);
9599 fputs ("\n\t.private_extern\t", asm_out_file);
9600 assemble_name (asm_out_file, name);
9601 putc ('\n', asm_out_file);
9602 ASM_OUTPUT_LABEL (asm_out_file, name);
9603 DECL_WEAK (decl) = 1;
9605 else
9606 #endif
9607 if (USE_HIDDEN_LINKONCE)
9609 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9611 targetm.asm_out.unique_section (decl, 0);
9612 switch_to_section (get_named_section (decl, NULL, 0));
9614 targetm.asm_out.globalize_label (asm_out_file, name);
9615 fputs ("\t.hidden\t", asm_out_file);
9616 assemble_name (asm_out_file, name);
9617 putc ('\n', asm_out_file);
9618 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9620 else
9622 switch_to_section (text_section);
9623 ASM_OUTPUT_LABEL (asm_out_file, name);
9626 DECL_INITIAL (decl) = make_node (BLOCK);
9627 current_function_decl = decl;
9628 init_function_start (decl);
9629 first_function_block_is_cold = false;
9630 /* Make sure unwind info is emitted for the thunk if needed. */
9631 final_start_function (emit_barrier (), asm_out_file, 1);
9633 /* Pad stack IP move with 4 instructions (two NOPs count
9634 as one instruction). */
9635 if (TARGET_PAD_SHORT_FUNCTION)
9637 int i = 8;
9639 while (i--)
9640 fputs ("\tnop\n", asm_out_file);
9643 xops[0] = gen_rtx_REG (Pmode, regno);
9644 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9645 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9646 output_asm_insn ("%!ret", NULL);
9647 final_end_function ();
9648 init_insn_lengths ();
9649 free_after_compilation (cfun);
9650 set_cfun (NULL);
9651 current_function_decl = NULL;
9654 if (flag_split_stack)
9655 file_end_indicate_split_stack ();
9658 /* Emit code for the SET_GOT patterns. */
9660 const char *
9661 output_set_got (rtx dest, rtx label)
9663 rtx xops[3];
9665 xops[0] = dest;
9667 if (TARGET_VXWORKS_RTP && flag_pic)
9669 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9670 xops[2] = gen_rtx_MEM (Pmode,
9671 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9672 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9674 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9675 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9676 an unadorned address. */
9677 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9678 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9679 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9680 return "";
9683 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9685 if (!flag_pic)
9687 if (TARGET_MACHO)
9688 /* We don't need a pic base, we're not producing pic. */
9689 gcc_unreachable ();
9691 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9692 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9693 targetm.asm_out.internal_label (asm_out_file, "L",
9694 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9696 else
9698 char name[32];
9699 get_pc_thunk_name (name, REGNO (dest));
9700 pic_labels_used |= 1 << REGNO (dest);
9702 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9703 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9704 output_asm_insn ("%!call\t%X2", xops);
9706 #if TARGET_MACHO
9707 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9708 This is what will be referenced by the Mach-O PIC subsystem. */
9709 if (machopic_should_output_picbase_label () || !label)
9710 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9712 /* When we are restoring the pic base at the site of a nonlocal label,
9713 and we decided to emit the pic base above, we will still output a
9714 local label used for calculating the correction offset (even though
9715 the offset will be 0 in that case). */
9716 if (label)
9717 targetm.asm_out.internal_label (asm_out_file, "L",
9718 CODE_LABEL_NUMBER (label));
9719 #endif
9722 if (!TARGET_MACHO)
9723 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9725 return "";
9728 /* Generate an "push" pattern for input ARG. */
9730 static rtx
9731 gen_push (rtx arg)
9733 struct machine_function *m = cfun->machine;
9735 if (m->fs.cfa_reg == stack_pointer_rtx)
9736 m->fs.cfa_offset += UNITS_PER_WORD;
9737 m->fs.sp_offset += UNITS_PER_WORD;
9739 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9740 arg = gen_rtx_REG (word_mode, REGNO (arg));
9742 return gen_rtx_SET (VOIDmode,
9743 gen_rtx_MEM (word_mode,
9744 gen_rtx_PRE_DEC (Pmode,
9745 stack_pointer_rtx)),
9746 arg);
9749 /* Generate an "pop" pattern for input ARG. */
9751 static rtx
9752 gen_pop (rtx arg)
9754 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9755 arg = gen_rtx_REG (word_mode, REGNO (arg));
9757 return gen_rtx_SET (VOIDmode,
9758 arg,
9759 gen_rtx_MEM (word_mode,
9760 gen_rtx_POST_INC (Pmode,
9761 stack_pointer_rtx)));
9764 /* Return >= 0 if there is an unused call-clobbered register available
9765 for the entire function. */
9767 static unsigned int
9768 ix86_select_alt_pic_regnum (void)
9770 if (ix86_use_pseudo_pic_reg ())
9771 return INVALID_REGNUM;
9773 if (crtl->is_leaf
9774 && !crtl->profile
9775 && !ix86_current_function_calls_tls_descriptor)
9777 int i, drap;
9778 /* Can't use the same register for both PIC and DRAP. */
9779 if (crtl->drap_reg)
9780 drap = REGNO (crtl->drap_reg);
9781 else
9782 drap = -1;
9783 for (i = 2; i >= 0; --i)
9784 if (i != drap && !df_regs_ever_live_p (i))
9785 return i;
9788 return INVALID_REGNUM;
9791 /* Return TRUE if we need to save REGNO. */
9793 static bool
9794 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9796 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
9797 && pic_offset_table_rtx)
9799 if (ix86_use_pseudo_pic_reg ())
9801 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
9802 _mcount in prologue. */
9803 if (!TARGET_64BIT && flag_pic && crtl->profile)
9804 return true;
9806 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9807 || crtl->profile
9808 || crtl->calls_eh_return
9809 || crtl->uses_const_pool
9810 || cfun->has_nonlocal_label)
9811 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9814 if (crtl->calls_eh_return && maybe_eh_return)
9816 unsigned i;
9817 for (i = 0; ; i++)
9819 unsigned test = EH_RETURN_DATA_REGNO (i);
9820 if (test == INVALID_REGNUM)
9821 break;
9822 if (test == regno)
9823 return true;
9827 if (crtl->drap_reg
9828 && regno == REGNO (crtl->drap_reg)
9829 && !cfun->machine->no_drap_save_restore)
9830 return true;
9832 return (df_regs_ever_live_p (regno)
9833 && !call_used_regs[regno]
9834 && !fixed_regs[regno]
9835 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9838 /* Return number of saved general prupose registers. */
9840 static int
9841 ix86_nsaved_regs (void)
9843 int nregs = 0;
9844 int regno;
9846 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9847 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9848 nregs ++;
9849 return nregs;
9852 /* Return number of saved SSE registrers. */
9854 static int
9855 ix86_nsaved_sseregs (void)
9857 int nregs = 0;
9858 int regno;
9860 if (!TARGET_64BIT_MS_ABI)
9861 return 0;
9862 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9863 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9864 nregs ++;
9865 return nregs;
9868 /* Given FROM and TO register numbers, say whether this elimination is
9869 allowed. If stack alignment is needed, we can only replace argument
9870 pointer with hard frame pointer, or replace frame pointer with stack
9871 pointer. Otherwise, frame pointer elimination is automatically
9872 handled and all other eliminations are valid. */
9874 static bool
9875 ix86_can_eliminate (const int from, const int to)
9877 if (stack_realign_fp)
9878 return ((from == ARG_POINTER_REGNUM
9879 && to == HARD_FRAME_POINTER_REGNUM)
9880 || (from == FRAME_POINTER_REGNUM
9881 && to == STACK_POINTER_REGNUM));
9882 else
9883 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9886 /* Return the offset between two registers, one to be eliminated, and the other
9887 its replacement, at the start of a routine. */
9889 HOST_WIDE_INT
9890 ix86_initial_elimination_offset (int from, int to)
9892 struct ix86_frame frame;
9893 ix86_compute_frame_layout (&frame);
9895 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9896 return frame.hard_frame_pointer_offset;
9897 else if (from == FRAME_POINTER_REGNUM
9898 && to == HARD_FRAME_POINTER_REGNUM)
9899 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9900 else
9902 gcc_assert (to == STACK_POINTER_REGNUM);
9904 if (from == ARG_POINTER_REGNUM)
9905 return frame.stack_pointer_offset;
9907 gcc_assert (from == FRAME_POINTER_REGNUM);
9908 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9912 /* In a dynamically-aligned function, we can't know the offset from
9913 stack pointer to frame pointer, so we must ensure that setjmp
9914 eliminates fp against the hard fp (%ebp) rather than trying to
9915 index from %esp up to the top of the frame across a gap that is
9916 of unknown (at compile-time) size. */
9917 static rtx
9918 ix86_builtin_setjmp_frame_value (void)
9920 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9923 /* When using -fsplit-stack, the allocation routines set a field in
9924 the TCB to the bottom of the stack plus this much space, measured
9925 in bytes. */
9927 #define SPLIT_STACK_AVAILABLE 256
9929 /* Fill structure ix86_frame about frame of currently computed function. */
9931 static void
9932 ix86_compute_frame_layout (struct ix86_frame *frame)
9934 unsigned HOST_WIDE_INT stack_alignment_needed;
9935 HOST_WIDE_INT offset;
9936 unsigned HOST_WIDE_INT preferred_alignment;
9937 HOST_WIDE_INT size = get_frame_size ();
9938 HOST_WIDE_INT to_allocate;
9940 frame->nregs = ix86_nsaved_regs ();
9941 frame->nsseregs = ix86_nsaved_sseregs ();
9943 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9944 function prologues and leaf. */
9945 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
9946 && (!crtl->is_leaf || cfun->calls_alloca != 0
9947 || ix86_current_function_calls_tls_descriptor))
9949 crtl->preferred_stack_boundary = 128;
9950 crtl->stack_alignment_needed = 128;
9952 /* preferred_stack_boundary is never updated for call
9953 expanded from tls descriptor. Update it here. We don't update it in
9954 expand stage because according to the comments before
9955 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
9956 away. */
9957 else if (ix86_current_function_calls_tls_descriptor
9958 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
9960 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
9961 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
9962 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
9965 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9966 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9968 gcc_assert (!size || stack_alignment_needed);
9969 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9970 gcc_assert (preferred_alignment <= stack_alignment_needed);
9972 /* For SEH we have to limit the amount of code movement into the prologue.
9973 At present we do this via a BLOCKAGE, at which point there's very little
9974 scheduling that can be done, which means that there's very little point
9975 in doing anything except PUSHs. */
9976 if (TARGET_SEH)
9977 cfun->machine->use_fast_prologue_epilogue = false;
9979 /* During reload iteration the amount of registers saved can change.
9980 Recompute the value as needed. Do not recompute when amount of registers
9981 didn't change as reload does multiple calls to the function and does not
9982 expect the decision to change within single iteration. */
9983 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9984 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9986 int count = frame->nregs;
9987 struct cgraph_node *node = cgraph_node::get (current_function_decl);
9989 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9991 /* The fast prologue uses move instead of push to save registers. This
9992 is significantly longer, but also executes faster as modern hardware
9993 can execute the moves in parallel, but can't do that for push/pop.
9995 Be careful about choosing what prologue to emit: When function takes
9996 many instructions to execute we may use slow version as well as in
9997 case function is known to be outside hot spot (this is known with
9998 feedback only). Weight the size of function by number of registers
9999 to save as it is cheap to use one or two push instructions but very
10000 slow to use many of them. */
10001 if (count)
10002 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
10003 if (node->frequency < NODE_FREQUENCY_NORMAL
10004 || (flag_branch_probabilities
10005 && node->frequency < NODE_FREQUENCY_HOT))
10006 cfun->machine->use_fast_prologue_epilogue = false;
10007 else
10008 cfun->machine->use_fast_prologue_epilogue
10009 = !expensive_function_p (count);
10012 frame->save_regs_using_mov
10013 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
10014 /* If static stack checking is enabled and done with probes,
10015 the registers need to be saved before allocating the frame. */
10016 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
10018 /* Skip return address. */
10019 offset = UNITS_PER_WORD;
10021 /* Skip pushed static chain. */
10022 if (ix86_static_chain_on_stack)
10023 offset += UNITS_PER_WORD;
10025 /* Skip saved base pointer. */
10026 if (frame_pointer_needed)
10027 offset += UNITS_PER_WORD;
10028 frame->hfp_save_offset = offset;
10030 /* The traditional frame pointer location is at the top of the frame. */
10031 frame->hard_frame_pointer_offset = offset;
10033 /* Register save area */
10034 offset += frame->nregs * UNITS_PER_WORD;
10035 frame->reg_save_offset = offset;
10037 /* On SEH target, registers are pushed just before the frame pointer
10038 location. */
10039 if (TARGET_SEH)
10040 frame->hard_frame_pointer_offset = offset;
10042 /* Align and set SSE register save area. */
10043 if (frame->nsseregs)
10045 /* The only ABI that has saved SSE registers (Win64) also has a
10046 16-byte aligned default stack, and thus we don't need to be
10047 within the re-aligned local stack frame to save them. */
10048 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
10049 offset = (offset + 16 - 1) & -16;
10050 offset += frame->nsseregs * 16;
10052 frame->sse_reg_save_offset = offset;
10054 /* The re-aligned stack starts here. Values before this point are not
10055 directly comparable with values below this point. In order to make
10056 sure that no value happens to be the same before and after, force
10057 the alignment computation below to add a non-zero value. */
10058 if (stack_realign_fp)
10059 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
10061 /* Va-arg area */
10062 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
10063 offset += frame->va_arg_size;
10065 /* Align start of frame for local function. */
10066 if (stack_realign_fp
10067 || offset != frame->sse_reg_save_offset
10068 || size != 0
10069 || !crtl->is_leaf
10070 || cfun->calls_alloca
10071 || ix86_current_function_calls_tls_descriptor)
10072 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10074 /* Frame pointer points here. */
10075 frame->frame_pointer_offset = offset;
10077 offset += size;
10079 /* Add outgoing arguments area. Can be skipped if we eliminated
10080 all the function calls as dead code.
10081 Skipping is however impossible when function calls alloca. Alloca
10082 expander assumes that last crtl->outgoing_args_size
10083 of stack frame are unused. */
10084 if (ACCUMULATE_OUTGOING_ARGS
10085 && (!crtl->is_leaf || cfun->calls_alloca
10086 || ix86_current_function_calls_tls_descriptor))
10088 offset += crtl->outgoing_args_size;
10089 frame->outgoing_arguments_size = crtl->outgoing_args_size;
10091 else
10092 frame->outgoing_arguments_size = 0;
10094 /* Align stack boundary. Only needed if we're calling another function
10095 or using alloca. */
10096 if (!crtl->is_leaf || cfun->calls_alloca
10097 || ix86_current_function_calls_tls_descriptor)
10098 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
10100 /* We've reached end of stack frame. */
10101 frame->stack_pointer_offset = offset;
10103 /* Size prologue needs to allocate. */
10104 to_allocate = offset - frame->sse_reg_save_offset;
10106 if ((!to_allocate && frame->nregs <= 1)
10107 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
10108 frame->save_regs_using_mov = false;
10110 if (ix86_using_red_zone ()
10111 && crtl->sp_is_unchanging
10112 && crtl->is_leaf
10113 && !ix86_current_function_calls_tls_descriptor)
10115 frame->red_zone_size = to_allocate;
10116 if (frame->save_regs_using_mov)
10117 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
10118 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
10119 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
10121 else
10122 frame->red_zone_size = 0;
10123 frame->stack_pointer_offset -= frame->red_zone_size;
10125 /* The SEH frame pointer location is near the bottom of the frame.
10126 This is enforced by the fact that the difference between the
10127 stack pointer and the frame pointer is limited to 240 bytes in
10128 the unwind data structure. */
10129 if (TARGET_SEH)
10131 HOST_WIDE_INT diff;
10133 /* If we can leave the frame pointer where it is, do so. Also, returns
10134 the establisher frame for __builtin_frame_address (0). */
10135 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
10136 if (diff <= SEH_MAX_FRAME_SIZE
10137 && (diff > 240 || (diff & 15) != 0)
10138 && !crtl->accesses_prior_frames)
10140 /* Ideally we'd determine what portion of the local stack frame
10141 (within the constraint of the lowest 240) is most heavily used.
10142 But without that complication, simply bias the frame pointer
10143 by 128 bytes so as to maximize the amount of the local stack
10144 frame that is addressable with 8-bit offsets. */
10145 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
10150 /* This is semi-inlined memory_address_length, but simplified
10151 since we know that we're always dealing with reg+offset, and
10152 to avoid having to create and discard all that rtl. */
10154 static inline int
10155 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
10157 int len = 4;
10159 if (offset == 0)
10161 /* EBP and R13 cannot be encoded without an offset. */
10162 len = (regno == BP_REG || regno == R13_REG);
10164 else if (IN_RANGE (offset, -128, 127))
10165 len = 1;
10167 /* ESP and R12 must be encoded with a SIB byte. */
10168 if (regno == SP_REG || regno == R12_REG)
10169 len++;
10171 return len;
10174 /* Return an RTX that points to CFA_OFFSET within the stack frame.
10175 The valid base registers are taken from CFUN->MACHINE->FS. */
10177 static rtx
10178 choose_baseaddr (HOST_WIDE_INT cfa_offset)
10180 const struct machine_function *m = cfun->machine;
10181 rtx base_reg = NULL;
10182 HOST_WIDE_INT base_offset = 0;
10184 if (m->use_fast_prologue_epilogue)
10186 /* Choose the base register most likely to allow the most scheduling
10187 opportunities. Generally FP is valid throughout the function,
10188 while DRAP must be reloaded within the epilogue. But choose either
10189 over the SP due to increased encoding size. */
10191 if (m->fs.fp_valid)
10193 base_reg = hard_frame_pointer_rtx;
10194 base_offset = m->fs.fp_offset - cfa_offset;
10196 else if (m->fs.drap_valid)
10198 base_reg = crtl->drap_reg;
10199 base_offset = 0 - cfa_offset;
10201 else if (m->fs.sp_valid)
10203 base_reg = stack_pointer_rtx;
10204 base_offset = m->fs.sp_offset - cfa_offset;
10207 else
10209 HOST_WIDE_INT toffset;
10210 int len = 16, tlen;
10212 /* Choose the base register with the smallest address encoding.
10213 With a tie, choose FP > DRAP > SP. */
10214 if (m->fs.sp_valid)
10216 base_reg = stack_pointer_rtx;
10217 base_offset = m->fs.sp_offset - cfa_offset;
10218 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
10220 if (m->fs.drap_valid)
10222 toffset = 0 - cfa_offset;
10223 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
10224 if (tlen <= len)
10226 base_reg = crtl->drap_reg;
10227 base_offset = toffset;
10228 len = tlen;
10231 if (m->fs.fp_valid)
10233 toffset = m->fs.fp_offset - cfa_offset;
10234 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
10235 if (tlen <= len)
10237 base_reg = hard_frame_pointer_rtx;
10238 base_offset = toffset;
10239 len = tlen;
10243 gcc_assert (base_reg != NULL);
10245 return plus_constant (Pmode, base_reg, base_offset);
10248 /* Emit code to save registers in the prologue. */
10250 static void
10251 ix86_emit_save_regs (void)
10253 unsigned int regno;
10254 rtx insn;
10256 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
10257 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10259 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
10260 RTX_FRAME_RELATED_P (insn) = 1;
10264 /* Emit a single register save at CFA - CFA_OFFSET. */
10266 static void
10267 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
10268 HOST_WIDE_INT cfa_offset)
10270 struct machine_function *m = cfun->machine;
10271 rtx reg = gen_rtx_REG (mode, regno);
10272 rtx mem, addr, base, insn;
10274 addr = choose_baseaddr (cfa_offset);
10275 mem = gen_frame_mem (mode, addr);
10277 /* For SSE saves, we need to indicate the 128-bit alignment. */
10278 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
10280 insn = emit_move_insn (mem, reg);
10281 RTX_FRAME_RELATED_P (insn) = 1;
10283 base = addr;
10284 if (GET_CODE (base) == PLUS)
10285 base = XEXP (base, 0);
10286 gcc_checking_assert (REG_P (base));
10288 /* When saving registers into a re-aligned local stack frame, avoid
10289 any tricky guessing by dwarf2out. */
10290 if (m->fs.realigned)
10292 gcc_checking_assert (stack_realign_drap);
10294 if (regno == REGNO (crtl->drap_reg))
10296 /* A bit of a hack. We force the DRAP register to be saved in
10297 the re-aligned stack frame, which provides us with a copy
10298 of the CFA that will last past the prologue. Install it. */
10299 gcc_checking_assert (cfun->machine->fs.fp_valid);
10300 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10301 cfun->machine->fs.fp_offset - cfa_offset);
10302 mem = gen_rtx_MEM (mode, addr);
10303 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
10305 else
10307 /* The frame pointer is a stable reference within the
10308 aligned frame. Use it. */
10309 gcc_checking_assert (cfun->machine->fs.fp_valid);
10310 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10311 cfun->machine->fs.fp_offset - cfa_offset);
10312 mem = gen_rtx_MEM (mode, addr);
10313 add_reg_note (insn, REG_CFA_EXPRESSION,
10314 gen_rtx_SET (VOIDmode, mem, reg));
10318 /* The memory may not be relative to the current CFA register,
10319 which means that we may need to generate a new pattern for
10320 use by the unwind info. */
10321 else if (base != m->fs.cfa_reg)
10323 addr = plus_constant (Pmode, m->fs.cfa_reg,
10324 m->fs.cfa_offset - cfa_offset);
10325 mem = gen_rtx_MEM (mode, addr);
10326 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
10330 /* Emit code to save registers using MOV insns.
10331 First register is stored at CFA - CFA_OFFSET. */
10332 static void
10333 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10335 unsigned int regno;
10337 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10338 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10340 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10341 cfa_offset -= UNITS_PER_WORD;
10345 /* Emit code to save SSE registers using MOV insns.
10346 First register is stored at CFA - CFA_OFFSET. */
10347 static void
10348 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10350 unsigned int regno;
10352 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10353 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10355 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10356 cfa_offset -= 16;
10360 static GTY(()) rtx queued_cfa_restores;
10362 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10363 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10364 Don't add the note if the previously saved value will be left untouched
10365 within stack red-zone till return, as unwinders can find the same value
10366 in the register and on the stack. */
10368 static void
10369 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
10371 if (!crtl->shrink_wrapped
10372 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10373 return;
10375 if (insn)
10377 add_reg_note (insn, REG_CFA_RESTORE, reg);
10378 RTX_FRAME_RELATED_P (insn) = 1;
10380 else
10381 queued_cfa_restores
10382 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10385 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10387 static void
10388 ix86_add_queued_cfa_restore_notes (rtx insn)
10390 rtx last;
10391 if (!queued_cfa_restores)
10392 return;
10393 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10395 XEXP (last, 1) = REG_NOTES (insn);
10396 REG_NOTES (insn) = queued_cfa_restores;
10397 queued_cfa_restores = NULL_RTX;
10398 RTX_FRAME_RELATED_P (insn) = 1;
10401 /* Expand prologue or epilogue stack adjustment.
10402 The pattern exist to put a dependency on all ebp-based memory accesses.
10403 STYLE should be negative if instructions should be marked as frame related,
10404 zero if %r11 register is live and cannot be freely used and positive
10405 otherwise. */
10407 static void
10408 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10409 int style, bool set_cfa)
10411 struct machine_function *m = cfun->machine;
10412 rtx insn;
10413 bool add_frame_related_expr = false;
10415 if (Pmode == SImode)
10416 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10417 else if (x86_64_immediate_operand (offset, DImode))
10418 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10419 else
10421 rtx tmp;
10422 /* r11 is used by indirect sibcall return as well, set before the
10423 epilogue and used after the epilogue. */
10424 if (style)
10425 tmp = gen_rtx_REG (DImode, R11_REG);
10426 else
10428 gcc_assert (src != hard_frame_pointer_rtx
10429 && dest != hard_frame_pointer_rtx);
10430 tmp = hard_frame_pointer_rtx;
10432 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10433 if (style < 0)
10434 add_frame_related_expr = true;
10436 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10439 insn = emit_insn (insn);
10440 if (style >= 0)
10441 ix86_add_queued_cfa_restore_notes (insn);
10443 if (set_cfa)
10445 rtx r;
10447 gcc_assert (m->fs.cfa_reg == src);
10448 m->fs.cfa_offset += INTVAL (offset);
10449 m->fs.cfa_reg = dest;
10451 r = gen_rtx_PLUS (Pmode, src, offset);
10452 r = gen_rtx_SET (VOIDmode, dest, r);
10453 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10454 RTX_FRAME_RELATED_P (insn) = 1;
10456 else if (style < 0)
10458 RTX_FRAME_RELATED_P (insn) = 1;
10459 if (add_frame_related_expr)
10461 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10462 r = gen_rtx_SET (VOIDmode, dest, r);
10463 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10467 if (dest == stack_pointer_rtx)
10469 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10470 bool valid = m->fs.sp_valid;
10472 if (src == hard_frame_pointer_rtx)
10474 valid = m->fs.fp_valid;
10475 ooffset = m->fs.fp_offset;
10477 else if (src == crtl->drap_reg)
10479 valid = m->fs.drap_valid;
10480 ooffset = 0;
10482 else
10484 /* Else there are two possibilities: SP itself, which we set
10485 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10486 taken care of this by hand along the eh_return path. */
10487 gcc_checking_assert (src == stack_pointer_rtx
10488 || offset == const0_rtx);
10491 m->fs.sp_offset = ooffset - INTVAL (offset);
10492 m->fs.sp_valid = valid;
10496 /* Find an available register to be used as dynamic realign argument
10497 pointer regsiter. Such a register will be written in prologue and
10498 used in begin of body, so it must not be
10499 1. parameter passing register.
10500 2. GOT pointer.
10501 We reuse static-chain register if it is available. Otherwise, we
10502 use DI for i386 and R13 for x86-64. We chose R13 since it has
10503 shorter encoding.
10505 Return: the regno of chosen register. */
10507 static unsigned int
10508 find_drap_reg (void)
10510 tree decl = cfun->decl;
10512 if (TARGET_64BIT)
10514 /* Use R13 for nested function or function need static chain.
10515 Since function with tail call may use any caller-saved
10516 registers in epilogue, DRAP must not use caller-saved
10517 register in such case. */
10518 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10519 return R13_REG;
10521 return R10_REG;
10523 else
10525 /* Use DI for nested function or function need static chain.
10526 Since function with tail call may use any caller-saved
10527 registers in epilogue, DRAP must not use caller-saved
10528 register in such case. */
10529 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10530 return DI_REG;
10532 /* Reuse static chain register if it isn't used for parameter
10533 passing. */
10534 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10536 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10537 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10538 return CX_REG;
10540 return DI_REG;
10544 /* Return minimum incoming stack alignment. */
10546 static unsigned int
10547 ix86_minimum_incoming_stack_boundary (bool sibcall)
10549 unsigned int incoming_stack_boundary;
10551 /* Prefer the one specified at command line. */
10552 if (ix86_user_incoming_stack_boundary)
10553 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10554 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10555 if -mstackrealign is used, it isn't used for sibcall check and
10556 estimated stack alignment is 128bit. */
10557 else if (!sibcall
10558 && !TARGET_64BIT
10559 && ix86_force_align_arg_pointer
10560 && crtl->stack_alignment_estimated == 128)
10561 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10562 else
10563 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10565 /* Incoming stack alignment can be changed on individual functions
10566 via force_align_arg_pointer attribute. We use the smallest
10567 incoming stack boundary. */
10568 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10569 && lookup_attribute (ix86_force_align_arg_pointer_string,
10570 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10571 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10573 /* The incoming stack frame has to be aligned at least at
10574 parm_stack_boundary. */
10575 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10576 incoming_stack_boundary = crtl->parm_stack_boundary;
10578 /* Stack at entrance of main is aligned by runtime. We use the
10579 smallest incoming stack boundary. */
10580 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10581 && DECL_NAME (current_function_decl)
10582 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10583 && DECL_FILE_SCOPE_P (current_function_decl))
10584 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10586 return incoming_stack_boundary;
10589 /* Update incoming stack boundary and estimated stack alignment. */
10591 static void
10592 ix86_update_stack_boundary (void)
10594 ix86_incoming_stack_boundary
10595 = ix86_minimum_incoming_stack_boundary (false);
10597 /* x86_64 vararg needs 16byte stack alignment for register save
10598 area. */
10599 if (TARGET_64BIT
10600 && cfun->stdarg
10601 && crtl->stack_alignment_estimated < 128)
10602 crtl->stack_alignment_estimated = 128;
10605 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10606 needed or an rtx for DRAP otherwise. */
10608 static rtx
10609 ix86_get_drap_rtx (void)
10611 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10612 crtl->need_drap = true;
10614 if (stack_realign_drap)
10616 /* Assign DRAP to vDRAP and returns vDRAP */
10617 unsigned int regno = find_drap_reg ();
10618 rtx drap_vreg;
10619 rtx arg_ptr;
10620 rtx_insn *seq, *insn;
10622 arg_ptr = gen_rtx_REG (Pmode, regno);
10623 crtl->drap_reg = arg_ptr;
10625 start_sequence ();
10626 drap_vreg = copy_to_reg (arg_ptr);
10627 seq = get_insns ();
10628 end_sequence ();
10630 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10631 if (!optimize)
10633 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10634 RTX_FRAME_RELATED_P (insn) = 1;
10636 return drap_vreg;
10638 else
10639 return NULL;
10642 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10644 static rtx
10645 ix86_internal_arg_pointer (void)
10647 return virtual_incoming_args_rtx;
10650 struct scratch_reg {
10651 rtx reg;
10652 bool saved;
10655 /* Return a short-lived scratch register for use on function entry.
10656 In 32-bit mode, it is valid only after the registers are saved
10657 in the prologue. This register must be released by means of
10658 release_scratch_register_on_entry once it is dead. */
10660 static void
10661 get_scratch_register_on_entry (struct scratch_reg *sr)
10663 int regno;
10665 sr->saved = false;
10667 if (TARGET_64BIT)
10669 /* We always use R11 in 64-bit mode. */
10670 regno = R11_REG;
10672 else
10674 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10675 bool fastcall_p
10676 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10677 bool thiscall_p
10678 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10679 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10680 int regparm = ix86_function_regparm (fntype, decl);
10681 int drap_regno
10682 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10684 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10685 for the static chain register. */
10686 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10687 && drap_regno != AX_REG)
10688 regno = AX_REG;
10689 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10690 for the static chain register. */
10691 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10692 regno = AX_REG;
10693 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10694 regno = DX_REG;
10695 /* ecx is the static chain register. */
10696 else if (regparm < 3 && !fastcall_p && !thiscall_p
10697 && !static_chain_p
10698 && drap_regno != CX_REG)
10699 regno = CX_REG;
10700 else if (ix86_save_reg (BX_REG, true))
10701 regno = BX_REG;
10702 /* esi is the static chain register. */
10703 else if (!(regparm == 3 && static_chain_p)
10704 && ix86_save_reg (SI_REG, true))
10705 regno = SI_REG;
10706 else if (ix86_save_reg (DI_REG, true))
10707 regno = DI_REG;
10708 else
10710 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10711 sr->saved = true;
10715 sr->reg = gen_rtx_REG (Pmode, regno);
10716 if (sr->saved)
10718 rtx insn = emit_insn (gen_push (sr->reg));
10719 RTX_FRAME_RELATED_P (insn) = 1;
10723 /* Release a scratch register obtained from the preceding function. */
10725 static void
10726 release_scratch_register_on_entry (struct scratch_reg *sr)
10728 if (sr->saved)
10730 struct machine_function *m = cfun->machine;
10731 rtx x, insn = emit_insn (gen_pop (sr->reg));
10733 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10734 RTX_FRAME_RELATED_P (insn) = 1;
10735 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10736 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10737 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10738 m->fs.sp_offset -= UNITS_PER_WORD;
10742 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10744 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10746 static void
10747 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10749 /* We skip the probe for the first interval + a small dope of 4 words and
10750 probe that many bytes past the specified size to maintain a protection
10751 area at the botton of the stack. */
10752 const int dope = 4 * UNITS_PER_WORD;
10753 rtx size_rtx = GEN_INT (size), last;
10755 /* See if we have a constant small number of probes to generate. If so,
10756 that's the easy case. The run-time loop is made up of 11 insns in the
10757 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10758 for n # of intervals. */
10759 if (size <= 5 * PROBE_INTERVAL)
10761 HOST_WIDE_INT i, adjust;
10762 bool first_probe = true;
10764 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10765 values of N from 1 until it exceeds SIZE. If only one probe is
10766 needed, this will not generate any code. Then adjust and probe
10767 to PROBE_INTERVAL + SIZE. */
10768 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10770 if (first_probe)
10772 adjust = 2 * PROBE_INTERVAL + dope;
10773 first_probe = false;
10775 else
10776 adjust = PROBE_INTERVAL;
10778 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10779 plus_constant (Pmode, stack_pointer_rtx,
10780 -adjust)));
10781 emit_stack_probe (stack_pointer_rtx);
10784 if (first_probe)
10785 adjust = size + PROBE_INTERVAL + dope;
10786 else
10787 adjust = size + PROBE_INTERVAL - i;
10789 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10790 plus_constant (Pmode, stack_pointer_rtx,
10791 -adjust)));
10792 emit_stack_probe (stack_pointer_rtx);
10794 /* Adjust back to account for the additional first interval. */
10795 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10796 plus_constant (Pmode, stack_pointer_rtx,
10797 PROBE_INTERVAL + dope)));
10800 /* Otherwise, do the same as above, but in a loop. Note that we must be
10801 extra careful with variables wrapping around because we might be at
10802 the very top (or the very bottom) of the address space and we have
10803 to be able to handle this case properly; in particular, we use an
10804 equality test for the loop condition. */
10805 else
10807 HOST_WIDE_INT rounded_size;
10808 struct scratch_reg sr;
10810 get_scratch_register_on_entry (&sr);
10813 /* Step 1: round SIZE to the previous multiple of the interval. */
10815 rounded_size = size & -PROBE_INTERVAL;
10818 /* Step 2: compute initial and final value of the loop counter. */
10820 /* SP = SP_0 + PROBE_INTERVAL. */
10821 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10822 plus_constant (Pmode, stack_pointer_rtx,
10823 - (PROBE_INTERVAL + dope))));
10825 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10826 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10827 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10828 gen_rtx_PLUS (Pmode, sr.reg,
10829 stack_pointer_rtx)));
10832 /* Step 3: the loop
10834 while (SP != LAST_ADDR)
10836 SP = SP + PROBE_INTERVAL
10837 probe at SP
10840 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10841 values of N from 1 until it is equal to ROUNDED_SIZE. */
10843 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10846 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10847 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10849 if (size != rounded_size)
10851 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10852 plus_constant (Pmode, stack_pointer_rtx,
10853 rounded_size - size)));
10854 emit_stack_probe (stack_pointer_rtx);
10857 /* Adjust back to account for the additional first interval. */
10858 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10859 plus_constant (Pmode, stack_pointer_rtx,
10860 PROBE_INTERVAL + dope)));
10862 release_scratch_register_on_entry (&sr);
10865 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10867 /* Even if the stack pointer isn't the CFA register, we need to correctly
10868 describe the adjustments made to it, in particular differentiate the
10869 frame-related ones from the frame-unrelated ones. */
10870 if (size > 0)
10872 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10873 XVECEXP (expr, 0, 0)
10874 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10875 plus_constant (Pmode, stack_pointer_rtx, -size));
10876 XVECEXP (expr, 0, 1)
10877 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10878 plus_constant (Pmode, stack_pointer_rtx,
10879 PROBE_INTERVAL + dope + size));
10880 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10881 RTX_FRAME_RELATED_P (last) = 1;
10883 cfun->machine->fs.sp_offset += size;
10886 /* Make sure nothing is scheduled before we are done. */
10887 emit_insn (gen_blockage ());
10890 /* Adjust the stack pointer up to REG while probing it. */
10892 const char *
10893 output_adjust_stack_and_probe (rtx reg)
10895 static int labelno = 0;
10896 char loop_lab[32], end_lab[32];
10897 rtx xops[2];
10899 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10900 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10902 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10904 /* Jump to END_LAB if SP == LAST_ADDR. */
10905 xops[0] = stack_pointer_rtx;
10906 xops[1] = reg;
10907 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10908 fputs ("\tje\t", asm_out_file);
10909 assemble_name_raw (asm_out_file, end_lab);
10910 fputc ('\n', asm_out_file);
10912 /* SP = SP + PROBE_INTERVAL. */
10913 xops[1] = GEN_INT (PROBE_INTERVAL);
10914 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10916 /* Probe at SP. */
10917 xops[1] = const0_rtx;
10918 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10920 fprintf (asm_out_file, "\tjmp\t");
10921 assemble_name_raw (asm_out_file, loop_lab);
10922 fputc ('\n', asm_out_file);
10924 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10926 return "";
10929 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10930 inclusive. These are offsets from the current stack pointer. */
10932 static void
10933 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10935 /* See if we have a constant small number of probes to generate. If so,
10936 that's the easy case. The run-time loop is made up of 7 insns in the
10937 generic case while the compile-time loop is made up of n insns for n #
10938 of intervals. */
10939 if (size <= 7 * PROBE_INTERVAL)
10941 HOST_WIDE_INT i;
10943 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10944 it exceeds SIZE. If only one probe is needed, this will not
10945 generate any code. Then probe at FIRST + SIZE. */
10946 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10947 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10948 -(first + i)));
10950 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10951 -(first + size)));
10954 /* Otherwise, do the same as above, but in a loop. Note that we must be
10955 extra careful with variables wrapping around because we might be at
10956 the very top (or the very bottom) of the address space and we have
10957 to be able to handle this case properly; in particular, we use an
10958 equality test for the loop condition. */
10959 else
10961 HOST_WIDE_INT rounded_size, last;
10962 struct scratch_reg sr;
10964 get_scratch_register_on_entry (&sr);
10967 /* Step 1: round SIZE to the previous multiple of the interval. */
10969 rounded_size = size & -PROBE_INTERVAL;
10972 /* Step 2: compute initial and final value of the loop counter. */
10974 /* TEST_OFFSET = FIRST. */
10975 emit_move_insn (sr.reg, GEN_INT (-first));
10977 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10978 last = first + rounded_size;
10981 /* Step 3: the loop
10983 while (TEST_ADDR != LAST_ADDR)
10985 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10986 probe at TEST_ADDR
10989 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10990 until it is equal to ROUNDED_SIZE. */
10992 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10995 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10996 that SIZE is equal to ROUNDED_SIZE. */
10998 if (size != rounded_size)
10999 emit_stack_probe (plus_constant (Pmode,
11000 gen_rtx_PLUS (Pmode,
11001 stack_pointer_rtx,
11002 sr.reg),
11003 rounded_size - size));
11005 release_scratch_register_on_entry (&sr);
11008 /* Make sure nothing is scheduled before we are done. */
11009 emit_insn (gen_blockage ());
11012 /* Probe a range of stack addresses from REG to END, inclusive. These are
11013 offsets from the current stack pointer. */
11015 const char *
11016 output_probe_stack_range (rtx reg, rtx end)
11018 static int labelno = 0;
11019 char loop_lab[32], end_lab[32];
11020 rtx xops[3];
11022 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11023 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11025 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11027 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
11028 xops[0] = reg;
11029 xops[1] = end;
11030 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11031 fputs ("\tje\t", asm_out_file);
11032 assemble_name_raw (asm_out_file, end_lab);
11033 fputc ('\n', asm_out_file);
11035 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
11036 xops[1] = GEN_INT (PROBE_INTERVAL);
11037 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11039 /* Probe at TEST_ADDR. */
11040 xops[0] = stack_pointer_rtx;
11041 xops[1] = reg;
11042 xops[2] = const0_rtx;
11043 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
11045 fprintf (asm_out_file, "\tjmp\t");
11046 assemble_name_raw (asm_out_file, loop_lab);
11047 fputc ('\n', asm_out_file);
11049 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11051 return "";
11054 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
11055 to be generated in correct form. */
11056 static void
11057 ix86_finalize_stack_realign_flags (void)
11059 /* Check if stack realign is really needed after reload, and
11060 stores result in cfun */
11061 unsigned int incoming_stack_boundary
11062 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
11063 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
11064 unsigned int stack_realign = (incoming_stack_boundary
11065 < (crtl->is_leaf
11066 ? crtl->max_used_stack_slot_alignment
11067 : crtl->stack_alignment_needed));
11069 if (crtl->stack_realign_finalized)
11071 /* After stack_realign_needed is finalized, we can't no longer
11072 change it. */
11073 gcc_assert (crtl->stack_realign_needed == stack_realign);
11074 return;
11077 /* If the only reason for frame_pointer_needed is that we conservatively
11078 assumed stack realignment might be needed, but in the end nothing that
11079 needed the stack alignment had been spilled, clear frame_pointer_needed
11080 and say we don't need stack realignment. */
11081 if (stack_realign
11082 && frame_pointer_needed
11083 && crtl->is_leaf
11084 && flag_omit_frame_pointer
11085 && crtl->sp_is_unchanging
11086 && !ix86_current_function_calls_tls_descriptor
11087 && !crtl->accesses_prior_frames
11088 && !cfun->calls_alloca
11089 && !crtl->calls_eh_return
11090 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
11091 && !ix86_frame_pointer_required ()
11092 && get_frame_size () == 0
11093 && ix86_nsaved_sseregs () == 0
11094 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
11096 HARD_REG_SET set_up_by_prologue, prologue_used;
11097 basic_block bb;
11099 CLEAR_HARD_REG_SET (prologue_used);
11100 CLEAR_HARD_REG_SET (set_up_by_prologue);
11101 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
11102 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
11103 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
11104 HARD_FRAME_POINTER_REGNUM);
11105 FOR_EACH_BB_FN (bb, cfun)
11107 rtx_insn *insn;
11108 FOR_BB_INSNS (bb, insn)
11109 if (NONDEBUG_INSN_P (insn)
11110 && requires_stack_frame_p (insn, prologue_used,
11111 set_up_by_prologue))
11113 crtl->stack_realign_needed = stack_realign;
11114 crtl->stack_realign_finalized = true;
11115 return;
11119 /* If drap has been set, but it actually isn't live at the start
11120 of the function, there is no reason to set it up. */
11121 if (crtl->drap_reg)
11123 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11124 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
11126 crtl->drap_reg = NULL_RTX;
11127 crtl->need_drap = false;
11130 else
11131 cfun->machine->no_drap_save_restore = true;
11133 frame_pointer_needed = false;
11134 stack_realign = false;
11135 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
11136 crtl->stack_alignment_needed = incoming_stack_boundary;
11137 crtl->stack_alignment_estimated = incoming_stack_boundary;
11138 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
11139 crtl->preferred_stack_boundary = incoming_stack_boundary;
11140 df_finish_pass (true);
11141 df_scan_alloc (NULL);
11142 df_scan_blocks ();
11143 df_compute_regs_ever_live (true);
11144 df_analyze ();
11147 crtl->stack_realign_needed = stack_realign;
11148 crtl->stack_realign_finalized = true;
11151 /* Delete SET_GOT right after entry block if it is allocated to reg. */
11153 static void
11154 ix86_elim_entry_set_got (rtx reg)
11156 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11157 rtx_insn *c_insn = BB_HEAD (bb);
11158 if (!NONDEBUG_INSN_P (c_insn))
11159 c_insn = next_nonnote_nondebug_insn (c_insn);
11160 if (c_insn && NONJUMP_INSN_P (c_insn))
11162 rtx pat = PATTERN (c_insn);
11163 if (GET_CODE (pat) == PARALLEL)
11165 rtx vec = XVECEXP (pat, 0, 0);
11166 if (GET_CODE (vec) == SET
11167 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
11168 && REGNO (XEXP (vec, 0)) == REGNO (reg))
11169 delete_insn (c_insn);
11174 /* Expand the prologue into a bunch of separate insns. */
11176 void
11177 ix86_expand_prologue (void)
11179 struct machine_function *m = cfun->machine;
11180 rtx insn, t;
11181 struct ix86_frame frame;
11182 HOST_WIDE_INT allocate;
11183 bool int_registers_saved;
11184 bool sse_registers_saved;
11186 ix86_finalize_stack_realign_flags ();
11188 /* DRAP should not coexist with stack_realign_fp */
11189 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
11191 memset (&m->fs, 0, sizeof (m->fs));
11193 /* Initialize CFA state for before the prologue. */
11194 m->fs.cfa_reg = stack_pointer_rtx;
11195 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
11197 /* Track SP offset to the CFA. We continue tracking this after we've
11198 swapped the CFA register away from SP. In the case of re-alignment
11199 this is fudged; we're interested to offsets within the local frame. */
11200 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11201 m->fs.sp_valid = true;
11203 ix86_compute_frame_layout (&frame);
11205 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
11207 /* We should have already generated an error for any use of
11208 ms_hook on a nested function. */
11209 gcc_checking_assert (!ix86_static_chain_on_stack);
11211 /* Check if profiling is active and we shall use profiling before
11212 prologue variant. If so sorry. */
11213 if (crtl->profile && flag_fentry != 0)
11214 sorry ("ms_hook_prologue attribute isn%'t compatible "
11215 "with -mfentry for 32-bit");
11217 /* In ix86_asm_output_function_label we emitted:
11218 8b ff movl.s %edi,%edi
11219 55 push %ebp
11220 8b ec movl.s %esp,%ebp
11222 This matches the hookable function prologue in Win32 API
11223 functions in Microsoft Windows XP Service Pack 2 and newer.
11224 Wine uses this to enable Windows apps to hook the Win32 API
11225 functions provided by Wine.
11227 What that means is that we've already set up the frame pointer. */
11229 if (frame_pointer_needed
11230 && !(crtl->drap_reg && crtl->stack_realign_needed))
11232 rtx push, mov;
11234 /* We've decided to use the frame pointer already set up.
11235 Describe this to the unwinder by pretending that both
11236 push and mov insns happen right here.
11238 Putting the unwind info here at the end of the ms_hook
11239 is done so that we can make absolutely certain we get
11240 the required byte sequence at the start of the function,
11241 rather than relying on an assembler that can produce
11242 the exact encoding required.
11244 However it does mean (in the unpatched case) that we have
11245 a 1 insn window where the asynchronous unwind info is
11246 incorrect. However, if we placed the unwind info at
11247 its correct location we would have incorrect unwind info
11248 in the patched case. Which is probably all moot since
11249 I don't expect Wine generates dwarf2 unwind info for the
11250 system libraries that use this feature. */
11252 insn = emit_insn (gen_blockage ());
11254 push = gen_push (hard_frame_pointer_rtx);
11255 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
11256 stack_pointer_rtx);
11257 RTX_FRAME_RELATED_P (push) = 1;
11258 RTX_FRAME_RELATED_P (mov) = 1;
11260 RTX_FRAME_RELATED_P (insn) = 1;
11261 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11262 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
11264 /* Note that gen_push incremented m->fs.cfa_offset, even
11265 though we didn't emit the push insn here. */
11266 m->fs.cfa_reg = hard_frame_pointer_rtx;
11267 m->fs.fp_offset = m->fs.cfa_offset;
11268 m->fs.fp_valid = true;
11270 else
11272 /* The frame pointer is not needed so pop %ebp again.
11273 This leaves us with a pristine state. */
11274 emit_insn (gen_pop (hard_frame_pointer_rtx));
11278 /* The first insn of a function that accepts its static chain on the
11279 stack is to push the register that would be filled in by a direct
11280 call. This insn will be skipped by the trampoline. */
11281 else if (ix86_static_chain_on_stack)
11283 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
11284 emit_insn (gen_blockage ());
11286 /* We don't want to interpret this push insn as a register save,
11287 only as a stack adjustment. The real copy of the register as
11288 a save will be done later, if needed. */
11289 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
11290 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
11291 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
11292 RTX_FRAME_RELATED_P (insn) = 1;
11295 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
11296 of DRAP is needed and stack realignment is really needed after reload */
11297 if (stack_realign_drap)
11299 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11301 /* Only need to push parameter pointer reg if it is caller saved. */
11302 if (!call_used_regs[REGNO (crtl->drap_reg)])
11304 /* Push arg pointer reg */
11305 insn = emit_insn (gen_push (crtl->drap_reg));
11306 RTX_FRAME_RELATED_P (insn) = 1;
11309 /* Grab the argument pointer. */
11310 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
11311 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11312 RTX_FRAME_RELATED_P (insn) = 1;
11313 m->fs.cfa_reg = crtl->drap_reg;
11314 m->fs.cfa_offset = 0;
11316 /* Align the stack. */
11317 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11318 stack_pointer_rtx,
11319 GEN_INT (-align_bytes)));
11320 RTX_FRAME_RELATED_P (insn) = 1;
11322 /* Replicate the return address on the stack so that return
11323 address can be reached via (argp - 1) slot. This is needed
11324 to implement macro RETURN_ADDR_RTX and intrinsic function
11325 expand_builtin_return_addr etc. */
11326 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
11327 t = gen_frame_mem (word_mode, t);
11328 insn = emit_insn (gen_push (t));
11329 RTX_FRAME_RELATED_P (insn) = 1;
11331 /* For the purposes of frame and register save area addressing,
11332 we've started over with a new frame. */
11333 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11334 m->fs.realigned = true;
11337 int_registers_saved = (frame.nregs == 0);
11338 sse_registers_saved = (frame.nsseregs == 0);
11340 if (frame_pointer_needed && !m->fs.fp_valid)
11342 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11343 slower on all targets. Also sdb doesn't like it. */
11344 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11345 RTX_FRAME_RELATED_P (insn) = 1;
11347 /* Push registers now, before setting the frame pointer
11348 on SEH target. */
11349 if (!int_registers_saved
11350 && TARGET_SEH
11351 && !frame.save_regs_using_mov)
11353 ix86_emit_save_regs ();
11354 int_registers_saved = true;
11355 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11358 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11360 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11361 RTX_FRAME_RELATED_P (insn) = 1;
11363 if (m->fs.cfa_reg == stack_pointer_rtx)
11364 m->fs.cfa_reg = hard_frame_pointer_rtx;
11365 m->fs.fp_offset = m->fs.sp_offset;
11366 m->fs.fp_valid = true;
11370 if (!int_registers_saved)
11372 /* If saving registers via PUSH, do so now. */
11373 if (!frame.save_regs_using_mov)
11375 ix86_emit_save_regs ();
11376 int_registers_saved = true;
11377 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11380 /* When using red zone we may start register saving before allocating
11381 the stack frame saving one cycle of the prologue. However, avoid
11382 doing this if we have to probe the stack; at least on x86_64 the
11383 stack probe can turn into a call that clobbers a red zone location. */
11384 else if (ix86_using_red_zone ()
11385 && (! TARGET_STACK_PROBE
11386 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11388 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11389 int_registers_saved = true;
11393 if (stack_realign_fp)
11395 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11396 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11398 /* The computation of the size of the re-aligned stack frame means
11399 that we must allocate the size of the register save area before
11400 performing the actual alignment. Otherwise we cannot guarantee
11401 that there's enough storage above the realignment point. */
11402 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11403 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11404 GEN_INT (m->fs.sp_offset
11405 - frame.sse_reg_save_offset),
11406 -1, false);
11408 /* Align the stack. */
11409 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11410 stack_pointer_rtx,
11411 GEN_INT (-align_bytes)));
11413 /* For the purposes of register save area addressing, the stack
11414 pointer is no longer valid. As for the value of sp_offset,
11415 see ix86_compute_frame_layout, which we need to match in order
11416 to pass verification of stack_pointer_offset at the end. */
11417 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11418 m->fs.sp_valid = false;
11421 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11423 if (flag_stack_usage_info)
11425 /* We start to count from ARG_POINTER. */
11426 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11428 /* If it was realigned, take into account the fake frame. */
11429 if (stack_realign_drap)
11431 if (ix86_static_chain_on_stack)
11432 stack_size += UNITS_PER_WORD;
11434 if (!call_used_regs[REGNO (crtl->drap_reg)])
11435 stack_size += UNITS_PER_WORD;
11437 /* This over-estimates by 1 minimal-stack-alignment-unit but
11438 mitigates that by counting in the new return address slot. */
11439 current_function_dynamic_stack_size
11440 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11443 current_function_static_stack_size = stack_size;
11446 /* On SEH target with very large frame size, allocate an area to save
11447 SSE registers (as the very large allocation won't be described). */
11448 if (TARGET_SEH
11449 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11450 && !sse_registers_saved)
11452 HOST_WIDE_INT sse_size =
11453 frame.sse_reg_save_offset - frame.reg_save_offset;
11455 gcc_assert (int_registers_saved);
11457 /* No need to do stack checking as the area will be immediately
11458 written. */
11459 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11460 GEN_INT (-sse_size), -1,
11461 m->fs.cfa_reg == stack_pointer_rtx);
11462 allocate -= sse_size;
11463 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11464 sse_registers_saved = true;
11467 /* The stack has already been decremented by the instruction calling us
11468 so probe if the size is non-negative to preserve the protection area. */
11469 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11471 /* We expect the registers to be saved when probes are used. */
11472 gcc_assert (int_registers_saved);
11474 if (STACK_CHECK_MOVING_SP)
11476 if (!(crtl->is_leaf && !cfun->calls_alloca
11477 && allocate <= PROBE_INTERVAL))
11479 ix86_adjust_stack_and_probe (allocate);
11480 allocate = 0;
11483 else
11485 HOST_WIDE_INT size = allocate;
11487 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11488 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11490 if (TARGET_STACK_PROBE)
11492 if (crtl->is_leaf && !cfun->calls_alloca)
11494 if (size > PROBE_INTERVAL)
11495 ix86_emit_probe_stack_range (0, size);
11497 else
11498 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11500 else
11502 if (crtl->is_leaf && !cfun->calls_alloca)
11504 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11505 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11506 size - STACK_CHECK_PROTECT);
11508 else
11509 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11514 if (allocate == 0)
11516 else if (!ix86_target_stack_probe ()
11517 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11519 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11520 GEN_INT (-allocate), -1,
11521 m->fs.cfa_reg == stack_pointer_rtx);
11523 else
11525 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11526 rtx r10 = NULL;
11527 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11528 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11529 bool eax_live = ix86_eax_live_at_start_p ();
11530 bool r10_live = false;
11532 if (TARGET_64BIT)
11533 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11535 if (eax_live)
11537 insn = emit_insn (gen_push (eax));
11538 allocate -= UNITS_PER_WORD;
11539 /* Note that SEH directives need to continue tracking the stack
11540 pointer even after the frame pointer has been set up. */
11541 if (sp_is_cfa_reg || TARGET_SEH)
11543 if (sp_is_cfa_reg)
11544 m->fs.cfa_offset += UNITS_PER_WORD;
11545 RTX_FRAME_RELATED_P (insn) = 1;
11549 if (r10_live)
11551 r10 = gen_rtx_REG (Pmode, R10_REG);
11552 insn = emit_insn (gen_push (r10));
11553 allocate -= UNITS_PER_WORD;
11554 if (sp_is_cfa_reg || TARGET_SEH)
11556 if (sp_is_cfa_reg)
11557 m->fs.cfa_offset += UNITS_PER_WORD;
11558 RTX_FRAME_RELATED_P (insn) = 1;
11562 emit_move_insn (eax, GEN_INT (allocate));
11563 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11565 /* Use the fact that AX still contains ALLOCATE. */
11566 adjust_stack_insn = (Pmode == DImode
11567 ? gen_pro_epilogue_adjust_stack_di_sub
11568 : gen_pro_epilogue_adjust_stack_si_sub);
11570 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11571 stack_pointer_rtx, eax));
11573 if (sp_is_cfa_reg || TARGET_SEH)
11575 if (sp_is_cfa_reg)
11576 m->fs.cfa_offset += allocate;
11577 RTX_FRAME_RELATED_P (insn) = 1;
11578 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11579 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11580 plus_constant (Pmode, stack_pointer_rtx,
11581 -allocate)));
11583 m->fs.sp_offset += allocate;
11585 /* Use stack_pointer_rtx for relative addressing so that code
11586 works for realigned stack, too. */
11587 if (r10_live && eax_live)
11589 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11590 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11591 gen_frame_mem (word_mode, t));
11592 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11593 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11594 gen_frame_mem (word_mode, t));
11596 else if (eax_live || r10_live)
11598 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11599 emit_move_insn (gen_rtx_REG (word_mode,
11600 (eax_live ? AX_REG : R10_REG)),
11601 gen_frame_mem (word_mode, t));
11604 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11606 /* If we havn't already set up the frame pointer, do so now. */
11607 if (frame_pointer_needed && !m->fs.fp_valid)
11609 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11610 GEN_INT (frame.stack_pointer_offset
11611 - frame.hard_frame_pointer_offset));
11612 insn = emit_insn (insn);
11613 RTX_FRAME_RELATED_P (insn) = 1;
11614 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11616 if (m->fs.cfa_reg == stack_pointer_rtx)
11617 m->fs.cfa_reg = hard_frame_pointer_rtx;
11618 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11619 m->fs.fp_valid = true;
11622 if (!int_registers_saved)
11623 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11624 if (!sse_registers_saved)
11625 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11627 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
11628 in PROLOGUE. */
11629 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
11631 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
11632 insn = emit_insn (gen_set_got (pic));
11633 RTX_FRAME_RELATED_P (insn) = 1;
11634 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11635 emit_insn (gen_prologue_use (pic));
11636 /* Deleting already emmitted SET_GOT if exist and allocated to
11637 REAL_PIC_OFFSET_TABLE_REGNUM. */
11638 ix86_elim_entry_set_got (pic);
11641 if (crtl->drap_reg && !crtl->stack_realign_needed)
11643 /* vDRAP is setup but after reload it turns out stack realign
11644 isn't necessary, here we will emit prologue to setup DRAP
11645 without stack realign adjustment */
11646 t = choose_baseaddr (0);
11647 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11650 /* Prevent instructions from being scheduled into register save push
11651 sequence when access to the redzone area is done through frame pointer.
11652 The offset between the frame pointer and the stack pointer is calculated
11653 relative to the value of the stack pointer at the end of the function
11654 prologue, and moving instructions that access redzone area via frame
11655 pointer inside push sequence violates this assumption. */
11656 if (frame_pointer_needed && frame.red_zone_size)
11657 emit_insn (gen_memory_blockage ());
11659 /* Emit cld instruction if stringops are used in the function. */
11660 if (TARGET_CLD && ix86_current_function_needs_cld)
11661 emit_insn (gen_cld ());
11663 /* SEH requires that the prologue end within 256 bytes of the start of
11664 the function. Prevent instruction schedules that would extend that.
11665 Further, prevent alloca modifications to the stack pointer from being
11666 combined with prologue modifications. */
11667 if (TARGET_SEH)
11668 emit_insn (gen_prologue_use (stack_pointer_rtx));
11671 /* Emit code to restore REG using a POP insn. */
11673 static void
11674 ix86_emit_restore_reg_using_pop (rtx reg)
11676 struct machine_function *m = cfun->machine;
11677 rtx insn = emit_insn (gen_pop (reg));
11679 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11680 m->fs.sp_offset -= UNITS_PER_WORD;
11682 if (m->fs.cfa_reg == crtl->drap_reg
11683 && REGNO (reg) == REGNO (crtl->drap_reg))
11685 /* Previously we'd represented the CFA as an expression
11686 like *(%ebp - 8). We've just popped that value from
11687 the stack, which means we need to reset the CFA to
11688 the drap register. This will remain until we restore
11689 the stack pointer. */
11690 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11691 RTX_FRAME_RELATED_P (insn) = 1;
11693 /* This means that the DRAP register is valid for addressing too. */
11694 m->fs.drap_valid = true;
11695 return;
11698 if (m->fs.cfa_reg == stack_pointer_rtx)
11700 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11701 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11702 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11703 RTX_FRAME_RELATED_P (insn) = 1;
11705 m->fs.cfa_offset -= UNITS_PER_WORD;
11708 /* When the frame pointer is the CFA, and we pop it, we are
11709 swapping back to the stack pointer as the CFA. This happens
11710 for stack frames that don't allocate other data, so we assume
11711 the stack pointer is now pointing at the return address, i.e.
11712 the function entry state, which makes the offset be 1 word. */
11713 if (reg == hard_frame_pointer_rtx)
11715 m->fs.fp_valid = false;
11716 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11718 m->fs.cfa_reg = stack_pointer_rtx;
11719 m->fs.cfa_offset -= UNITS_PER_WORD;
11721 add_reg_note (insn, REG_CFA_DEF_CFA,
11722 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11723 GEN_INT (m->fs.cfa_offset)));
11724 RTX_FRAME_RELATED_P (insn) = 1;
11729 /* Emit code to restore saved registers using POP insns. */
11731 static void
11732 ix86_emit_restore_regs_using_pop (void)
11734 unsigned int regno;
11736 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11737 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11738 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11741 /* Emit code and notes for the LEAVE instruction. */
11743 static void
11744 ix86_emit_leave (void)
11746 struct machine_function *m = cfun->machine;
11747 rtx insn = emit_insn (ix86_gen_leave ());
11749 ix86_add_queued_cfa_restore_notes (insn);
11751 gcc_assert (m->fs.fp_valid);
11752 m->fs.sp_valid = true;
11753 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11754 m->fs.fp_valid = false;
11756 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11758 m->fs.cfa_reg = stack_pointer_rtx;
11759 m->fs.cfa_offset = m->fs.sp_offset;
11761 add_reg_note (insn, REG_CFA_DEF_CFA,
11762 plus_constant (Pmode, stack_pointer_rtx,
11763 m->fs.sp_offset));
11764 RTX_FRAME_RELATED_P (insn) = 1;
11766 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11767 m->fs.fp_offset);
11770 /* Emit code to restore saved registers using MOV insns.
11771 First register is restored from CFA - CFA_OFFSET. */
11772 static void
11773 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11774 bool maybe_eh_return)
11776 struct machine_function *m = cfun->machine;
11777 unsigned int regno;
11779 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11780 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11782 rtx reg = gen_rtx_REG (word_mode, regno);
11783 rtx insn, mem;
11785 mem = choose_baseaddr (cfa_offset);
11786 mem = gen_frame_mem (word_mode, mem);
11787 insn = emit_move_insn (reg, mem);
11789 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11791 /* Previously we'd represented the CFA as an expression
11792 like *(%ebp - 8). We've just popped that value from
11793 the stack, which means we need to reset the CFA to
11794 the drap register. This will remain until we restore
11795 the stack pointer. */
11796 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11797 RTX_FRAME_RELATED_P (insn) = 1;
11799 /* This means that the DRAP register is valid for addressing. */
11800 m->fs.drap_valid = true;
11802 else
11803 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11805 cfa_offset -= UNITS_PER_WORD;
11809 /* Emit code to restore saved registers using MOV insns.
11810 First register is restored from CFA - CFA_OFFSET. */
11811 static void
11812 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11813 bool maybe_eh_return)
11815 unsigned int regno;
11817 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11818 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11820 rtx reg = gen_rtx_REG (V4SFmode, regno);
11821 rtx mem;
11823 mem = choose_baseaddr (cfa_offset);
11824 mem = gen_rtx_MEM (V4SFmode, mem);
11825 set_mem_align (mem, 128);
11826 emit_move_insn (reg, mem);
11828 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11830 cfa_offset -= 16;
11834 /* Restore function stack, frame, and registers. */
11836 void
11837 ix86_expand_epilogue (int style)
11839 struct machine_function *m = cfun->machine;
11840 struct machine_frame_state frame_state_save = m->fs;
11841 struct ix86_frame frame;
11842 bool restore_regs_via_mov;
11843 bool using_drap;
11845 ix86_finalize_stack_realign_flags ();
11846 ix86_compute_frame_layout (&frame);
11848 m->fs.sp_valid = (!frame_pointer_needed
11849 || (crtl->sp_is_unchanging
11850 && !stack_realign_fp));
11851 gcc_assert (!m->fs.sp_valid
11852 || m->fs.sp_offset == frame.stack_pointer_offset);
11854 /* The FP must be valid if the frame pointer is present. */
11855 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11856 gcc_assert (!m->fs.fp_valid
11857 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11859 /* We must have *some* valid pointer to the stack frame. */
11860 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11862 /* The DRAP is never valid at this point. */
11863 gcc_assert (!m->fs.drap_valid);
11865 /* See the comment about red zone and frame
11866 pointer usage in ix86_expand_prologue. */
11867 if (frame_pointer_needed && frame.red_zone_size)
11868 emit_insn (gen_memory_blockage ());
11870 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11871 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11873 /* Determine the CFA offset of the end of the red-zone. */
11874 m->fs.red_zone_offset = 0;
11875 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11877 /* The red-zone begins below the return address. */
11878 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11880 /* When the register save area is in the aligned portion of
11881 the stack, determine the maximum runtime displacement that
11882 matches up with the aligned frame. */
11883 if (stack_realign_drap)
11884 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11885 + UNITS_PER_WORD);
11888 /* Special care must be taken for the normal return case of a function
11889 using eh_return: the eax and edx registers are marked as saved, but
11890 not restored along this path. Adjust the save location to match. */
11891 if (crtl->calls_eh_return && style != 2)
11892 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11894 /* EH_RETURN requires the use of moves to function properly. */
11895 if (crtl->calls_eh_return)
11896 restore_regs_via_mov = true;
11897 /* SEH requires the use of pops to identify the epilogue. */
11898 else if (TARGET_SEH)
11899 restore_regs_via_mov = false;
11900 /* If we're only restoring one register and sp is not valid then
11901 using a move instruction to restore the register since it's
11902 less work than reloading sp and popping the register. */
11903 else if (!m->fs.sp_valid && frame.nregs <= 1)
11904 restore_regs_via_mov = true;
11905 else if (TARGET_EPILOGUE_USING_MOVE
11906 && cfun->machine->use_fast_prologue_epilogue
11907 && (frame.nregs > 1
11908 || m->fs.sp_offset != frame.reg_save_offset))
11909 restore_regs_via_mov = true;
11910 else if (frame_pointer_needed
11911 && !frame.nregs
11912 && m->fs.sp_offset != frame.reg_save_offset)
11913 restore_regs_via_mov = true;
11914 else if (frame_pointer_needed
11915 && TARGET_USE_LEAVE
11916 && cfun->machine->use_fast_prologue_epilogue
11917 && frame.nregs == 1)
11918 restore_regs_via_mov = true;
11919 else
11920 restore_regs_via_mov = false;
11922 if (restore_regs_via_mov || frame.nsseregs)
11924 /* Ensure that the entire register save area is addressable via
11925 the stack pointer, if we will restore via sp. */
11926 if (TARGET_64BIT
11927 && m->fs.sp_offset > 0x7fffffff
11928 && !(m->fs.fp_valid || m->fs.drap_valid)
11929 && (frame.nsseregs + frame.nregs) != 0)
11931 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11932 GEN_INT (m->fs.sp_offset
11933 - frame.sse_reg_save_offset),
11934 style,
11935 m->fs.cfa_reg == stack_pointer_rtx);
11939 /* If there are any SSE registers to restore, then we have to do it
11940 via moves, since there's obviously no pop for SSE regs. */
11941 if (frame.nsseregs)
11942 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11943 style == 2);
11945 if (restore_regs_via_mov)
11947 rtx t;
11949 if (frame.nregs)
11950 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11952 /* eh_return epilogues need %ecx added to the stack pointer. */
11953 if (style == 2)
11955 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11957 /* Stack align doesn't work with eh_return. */
11958 gcc_assert (!stack_realign_drap);
11959 /* Neither does regparm nested functions. */
11960 gcc_assert (!ix86_static_chain_on_stack);
11962 if (frame_pointer_needed)
11964 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11965 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11966 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11968 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11969 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11971 /* Note that we use SA as a temporary CFA, as the return
11972 address is at the proper place relative to it. We
11973 pretend this happens at the FP restore insn because
11974 prior to this insn the FP would be stored at the wrong
11975 offset relative to SA, and after this insn we have no
11976 other reasonable register to use for the CFA. We don't
11977 bother resetting the CFA to the SP for the duration of
11978 the return insn. */
11979 add_reg_note (insn, REG_CFA_DEF_CFA,
11980 plus_constant (Pmode, sa, UNITS_PER_WORD));
11981 ix86_add_queued_cfa_restore_notes (insn);
11982 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11983 RTX_FRAME_RELATED_P (insn) = 1;
11985 m->fs.cfa_reg = sa;
11986 m->fs.cfa_offset = UNITS_PER_WORD;
11987 m->fs.fp_valid = false;
11989 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11990 const0_rtx, style, false);
11992 else
11994 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11995 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11996 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11997 ix86_add_queued_cfa_restore_notes (insn);
11999 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
12000 if (m->fs.cfa_offset != UNITS_PER_WORD)
12002 m->fs.cfa_offset = UNITS_PER_WORD;
12003 add_reg_note (insn, REG_CFA_DEF_CFA,
12004 plus_constant (Pmode, stack_pointer_rtx,
12005 UNITS_PER_WORD));
12006 RTX_FRAME_RELATED_P (insn) = 1;
12009 m->fs.sp_offset = UNITS_PER_WORD;
12010 m->fs.sp_valid = true;
12013 else
12015 /* SEH requires that the function end with (1) a stack adjustment
12016 if necessary, (2) a sequence of pops, and (3) a return or
12017 jump instruction. Prevent insns from the function body from
12018 being scheduled into this sequence. */
12019 if (TARGET_SEH)
12021 /* Prevent a catch region from being adjacent to the standard
12022 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
12023 several other flags that would be interesting to test are
12024 not yet set up. */
12025 if (flag_non_call_exceptions)
12026 emit_insn (gen_nops (const1_rtx));
12027 else
12028 emit_insn (gen_blockage ());
12031 /* First step is to deallocate the stack frame so that we can
12032 pop the registers. Also do it on SEH target for very large
12033 frame as the emitted instructions aren't allowed by the ABI in
12034 epilogues. */
12035 if (!m->fs.sp_valid
12036 || (TARGET_SEH
12037 && (m->fs.sp_offset - frame.reg_save_offset
12038 >= SEH_MAX_FRAME_SIZE)))
12040 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
12041 GEN_INT (m->fs.fp_offset
12042 - frame.reg_save_offset),
12043 style, false);
12045 else if (m->fs.sp_offset != frame.reg_save_offset)
12047 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12048 GEN_INT (m->fs.sp_offset
12049 - frame.reg_save_offset),
12050 style,
12051 m->fs.cfa_reg == stack_pointer_rtx);
12054 ix86_emit_restore_regs_using_pop ();
12057 /* If we used a stack pointer and haven't already got rid of it,
12058 then do so now. */
12059 if (m->fs.fp_valid)
12061 /* If the stack pointer is valid and pointing at the frame
12062 pointer store address, then we only need a pop. */
12063 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
12064 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12065 /* Leave results in shorter dependency chains on CPUs that are
12066 able to grok it fast. */
12067 else if (TARGET_USE_LEAVE
12068 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
12069 || !cfun->machine->use_fast_prologue_epilogue)
12070 ix86_emit_leave ();
12071 else
12073 pro_epilogue_adjust_stack (stack_pointer_rtx,
12074 hard_frame_pointer_rtx,
12075 const0_rtx, style, !using_drap);
12076 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12080 if (using_drap)
12082 int param_ptr_offset = UNITS_PER_WORD;
12083 rtx insn;
12085 gcc_assert (stack_realign_drap);
12087 if (ix86_static_chain_on_stack)
12088 param_ptr_offset += UNITS_PER_WORD;
12089 if (!call_used_regs[REGNO (crtl->drap_reg)])
12090 param_ptr_offset += UNITS_PER_WORD;
12092 insn = emit_insn (gen_rtx_SET
12093 (VOIDmode, stack_pointer_rtx,
12094 gen_rtx_PLUS (Pmode,
12095 crtl->drap_reg,
12096 GEN_INT (-param_ptr_offset))));
12097 m->fs.cfa_reg = stack_pointer_rtx;
12098 m->fs.cfa_offset = param_ptr_offset;
12099 m->fs.sp_offset = param_ptr_offset;
12100 m->fs.realigned = false;
12102 add_reg_note (insn, REG_CFA_DEF_CFA,
12103 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12104 GEN_INT (param_ptr_offset)));
12105 RTX_FRAME_RELATED_P (insn) = 1;
12107 if (!call_used_regs[REGNO (crtl->drap_reg)])
12108 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
12111 /* At this point the stack pointer must be valid, and we must have
12112 restored all of the registers. We may not have deallocated the
12113 entire stack frame. We've delayed this until now because it may
12114 be possible to merge the local stack deallocation with the
12115 deallocation forced by ix86_static_chain_on_stack. */
12116 gcc_assert (m->fs.sp_valid);
12117 gcc_assert (!m->fs.fp_valid);
12118 gcc_assert (!m->fs.realigned);
12119 if (m->fs.sp_offset != UNITS_PER_WORD)
12121 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12122 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
12123 style, true);
12125 else
12126 ix86_add_queued_cfa_restore_notes (get_last_insn ());
12128 /* Sibcall epilogues don't want a return instruction. */
12129 if (style == 0)
12131 m->fs = frame_state_save;
12132 return;
12135 if (crtl->args.pops_args && crtl->args.size)
12137 rtx popc = GEN_INT (crtl->args.pops_args);
12139 /* i386 can only pop 64K bytes. If asked to pop more, pop return
12140 address, do explicit add, and jump indirectly to the caller. */
12142 if (crtl->args.pops_args >= 65536)
12144 rtx ecx = gen_rtx_REG (SImode, CX_REG);
12145 rtx insn;
12147 /* There is no "pascal" calling convention in any 64bit ABI. */
12148 gcc_assert (!TARGET_64BIT);
12150 insn = emit_insn (gen_pop (ecx));
12151 m->fs.cfa_offset -= UNITS_PER_WORD;
12152 m->fs.sp_offset -= UNITS_PER_WORD;
12154 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12155 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
12156 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12157 add_reg_note (insn, REG_CFA_REGISTER,
12158 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
12159 RTX_FRAME_RELATED_P (insn) = 1;
12161 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12162 popc, -1, true);
12163 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
12165 else
12166 emit_jump_insn (gen_simple_return_pop_internal (popc));
12168 else
12169 emit_jump_insn (gen_simple_return_internal ());
12171 /* Restore the state back to the state from the prologue,
12172 so that it's correct for the next epilogue. */
12173 m->fs = frame_state_save;
12176 /* Reset from the function's potential modifications. */
12178 static void
12179 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
12181 if (pic_offset_table_rtx
12182 && !ix86_use_pseudo_pic_reg ())
12183 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
12184 #if TARGET_MACHO
12185 /* Mach-O doesn't support labels at the end of objects, so if
12186 it looks like we might want one, insert a NOP. */
12188 rtx_insn *insn = get_last_insn ();
12189 rtx_insn *deleted_debug_label = NULL;
12190 while (insn
12191 && NOTE_P (insn)
12192 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
12194 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
12195 notes only, instead set their CODE_LABEL_NUMBER to -1,
12196 otherwise there would be code generation differences
12197 in between -g and -g0. */
12198 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12199 deleted_debug_label = insn;
12200 insn = PREV_INSN (insn);
12202 if (insn
12203 && (LABEL_P (insn)
12204 || (NOTE_P (insn)
12205 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
12206 fputs ("\tnop\n", file);
12207 else if (deleted_debug_label)
12208 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
12209 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12210 CODE_LABEL_NUMBER (insn) = -1;
12212 #endif
12216 /* Return a scratch register to use in the split stack prologue. The
12217 split stack prologue is used for -fsplit-stack. It is the first
12218 instructions in the function, even before the regular prologue.
12219 The scratch register can be any caller-saved register which is not
12220 used for parameters or for the static chain. */
12222 static unsigned int
12223 split_stack_prologue_scratch_regno (void)
12225 if (TARGET_64BIT)
12226 return R11_REG;
12227 else
12229 bool is_fastcall, is_thiscall;
12230 int regparm;
12232 is_fastcall = (lookup_attribute ("fastcall",
12233 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12234 != NULL);
12235 is_thiscall = (lookup_attribute ("thiscall",
12236 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12237 != NULL);
12238 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12240 if (is_fastcall)
12242 if (DECL_STATIC_CHAIN (cfun->decl))
12244 sorry ("-fsplit-stack does not support fastcall with "
12245 "nested function");
12246 return INVALID_REGNUM;
12248 return AX_REG;
12250 else if (is_thiscall)
12252 if (!DECL_STATIC_CHAIN (cfun->decl))
12253 return DX_REG;
12254 return AX_REG;
12256 else if (regparm < 3)
12258 if (!DECL_STATIC_CHAIN (cfun->decl))
12259 return CX_REG;
12260 else
12262 if (regparm >= 2)
12264 sorry ("-fsplit-stack does not support 2 register "
12265 "parameters for a nested function");
12266 return INVALID_REGNUM;
12268 return DX_REG;
12271 else
12273 /* FIXME: We could make this work by pushing a register
12274 around the addition and comparison. */
12275 sorry ("-fsplit-stack does not support 3 register parameters");
12276 return INVALID_REGNUM;
12281 /* A SYMBOL_REF for the function which allocates new stackspace for
12282 -fsplit-stack. */
12284 static GTY(()) rtx split_stack_fn;
12286 /* A SYMBOL_REF for the more stack function when using the large
12287 model. */
12289 static GTY(()) rtx split_stack_fn_large;
12291 /* Handle -fsplit-stack. These are the first instructions in the
12292 function, even before the regular prologue. */
12294 void
12295 ix86_expand_split_stack_prologue (void)
12297 struct ix86_frame frame;
12298 HOST_WIDE_INT allocate;
12299 unsigned HOST_WIDE_INT args_size;
12300 rtx_code_label *label;
12301 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12302 rtx scratch_reg = NULL_RTX;
12303 rtx_code_label *varargs_label = NULL;
12304 rtx fn;
12306 gcc_assert (flag_split_stack && reload_completed);
12308 ix86_finalize_stack_realign_flags ();
12309 ix86_compute_frame_layout (&frame);
12310 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12312 /* This is the label we will branch to if we have enough stack
12313 space. We expect the basic block reordering pass to reverse this
12314 branch if optimizing, so that we branch in the unlikely case. */
12315 label = gen_label_rtx ();
12317 /* We need to compare the stack pointer minus the frame size with
12318 the stack boundary in the TCB. The stack boundary always gives
12319 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12320 can compare directly. Otherwise we need to do an addition. */
12322 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12323 UNSPEC_STACK_CHECK);
12324 limit = gen_rtx_CONST (Pmode, limit);
12325 limit = gen_rtx_MEM (Pmode, limit);
12326 if (allocate < SPLIT_STACK_AVAILABLE)
12327 current = stack_pointer_rtx;
12328 else
12330 unsigned int scratch_regno;
12331 rtx offset;
12333 /* We need a scratch register to hold the stack pointer minus
12334 the required frame size. Since this is the very start of the
12335 function, the scratch register can be any caller-saved
12336 register which is not used for parameters. */
12337 offset = GEN_INT (- allocate);
12338 scratch_regno = split_stack_prologue_scratch_regno ();
12339 if (scratch_regno == INVALID_REGNUM)
12340 return;
12341 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12342 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12344 /* We don't use ix86_gen_add3 in this case because it will
12345 want to split to lea, but when not optimizing the insn
12346 will not be split after this point. */
12347 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12348 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12349 offset)));
12351 else
12353 emit_move_insn (scratch_reg, offset);
12354 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12355 stack_pointer_rtx));
12357 current = scratch_reg;
12360 ix86_expand_branch (GEU, current, limit, label);
12361 jump_insn = get_last_insn ();
12362 JUMP_LABEL (jump_insn) = label;
12364 /* Mark the jump as very likely to be taken. */
12365 add_int_reg_note (jump_insn, REG_BR_PROB,
12366 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12368 if (split_stack_fn == NULL_RTX)
12370 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12371 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12373 fn = split_stack_fn;
12375 /* Get more stack space. We pass in the desired stack space and the
12376 size of the arguments to copy to the new stack. In 32-bit mode
12377 we push the parameters; __morestack will return on a new stack
12378 anyhow. In 64-bit mode we pass the parameters in r10 and
12379 r11. */
12380 allocate_rtx = GEN_INT (allocate);
12381 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12382 call_fusage = NULL_RTX;
12383 if (TARGET_64BIT)
12385 rtx reg10, reg11;
12387 reg10 = gen_rtx_REG (Pmode, R10_REG);
12388 reg11 = gen_rtx_REG (Pmode, R11_REG);
12390 /* If this function uses a static chain, it will be in %r10.
12391 Preserve it across the call to __morestack. */
12392 if (DECL_STATIC_CHAIN (cfun->decl))
12394 rtx rax;
12396 rax = gen_rtx_REG (word_mode, AX_REG);
12397 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12398 use_reg (&call_fusage, rax);
12401 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12402 && !TARGET_PECOFF)
12404 HOST_WIDE_INT argval;
12406 gcc_assert (Pmode == DImode);
12407 /* When using the large model we need to load the address
12408 into a register, and we've run out of registers. So we
12409 switch to a different calling convention, and we call a
12410 different function: __morestack_large. We pass the
12411 argument size in the upper 32 bits of r10 and pass the
12412 frame size in the lower 32 bits. */
12413 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12414 gcc_assert ((args_size & 0xffffffff) == args_size);
12416 if (split_stack_fn_large == NULL_RTX)
12418 split_stack_fn_large =
12419 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12420 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12422 if (ix86_cmodel == CM_LARGE_PIC)
12424 rtx_code_label *label;
12425 rtx x;
12427 label = gen_label_rtx ();
12428 emit_label (label);
12429 LABEL_PRESERVE_P (label) = 1;
12430 emit_insn (gen_set_rip_rex64 (reg10, label));
12431 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12432 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12433 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12434 UNSPEC_GOT);
12435 x = gen_rtx_CONST (Pmode, x);
12436 emit_move_insn (reg11, x);
12437 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12438 x = gen_const_mem (Pmode, x);
12439 emit_move_insn (reg11, x);
12441 else
12442 emit_move_insn (reg11, split_stack_fn_large);
12444 fn = reg11;
12446 argval = ((args_size << 16) << 16) + allocate;
12447 emit_move_insn (reg10, GEN_INT (argval));
12449 else
12451 emit_move_insn (reg10, allocate_rtx);
12452 emit_move_insn (reg11, GEN_INT (args_size));
12453 use_reg (&call_fusage, reg11);
12456 use_reg (&call_fusage, reg10);
12458 else
12460 emit_insn (gen_push (GEN_INT (args_size)));
12461 emit_insn (gen_push (allocate_rtx));
12463 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12464 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12465 NULL_RTX, false);
12466 add_function_usage_to (call_insn, call_fusage);
12468 /* In order to make call/return prediction work right, we now need
12469 to execute a return instruction. See
12470 libgcc/config/i386/morestack.S for the details on how this works.
12472 For flow purposes gcc must not see this as a return
12473 instruction--we need control flow to continue at the subsequent
12474 label. Therefore, we use an unspec. */
12475 gcc_assert (crtl->args.pops_args < 65536);
12476 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12478 /* If we are in 64-bit mode and this function uses a static chain,
12479 we saved %r10 in %rax before calling _morestack. */
12480 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12481 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12482 gen_rtx_REG (word_mode, AX_REG));
12484 /* If this function calls va_start, we need to store a pointer to
12485 the arguments on the old stack, because they may not have been
12486 all copied to the new stack. At this point the old stack can be
12487 found at the frame pointer value used by __morestack, because
12488 __morestack has set that up before calling back to us. Here we
12489 store that pointer in a scratch register, and in
12490 ix86_expand_prologue we store the scratch register in a stack
12491 slot. */
12492 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12494 unsigned int scratch_regno;
12495 rtx frame_reg;
12496 int words;
12498 scratch_regno = split_stack_prologue_scratch_regno ();
12499 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12500 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12502 /* 64-bit:
12503 fp -> old fp value
12504 return address within this function
12505 return address of caller of this function
12506 stack arguments
12507 So we add three words to get to the stack arguments.
12509 32-bit:
12510 fp -> old fp value
12511 return address within this function
12512 first argument to __morestack
12513 second argument to __morestack
12514 return address of caller of this function
12515 stack arguments
12516 So we add five words to get to the stack arguments.
12518 words = TARGET_64BIT ? 3 : 5;
12519 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12520 gen_rtx_PLUS (Pmode, frame_reg,
12521 GEN_INT (words * UNITS_PER_WORD))));
12523 varargs_label = gen_label_rtx ();
12524 emit_jump_insn (gen_jump (varargs_label));
12525 JUMP_LABEL (get_last_insn ()) = varargs_label;
12527 emit_barrier ();
12530 emit_label (label);
12531 LABEL_NUSES (label) = 1;
12533 /* If this function calls va_start, we now have to set the scratch
12534 register for the case where we do not call __morestack. In this
12535 case we need to set it based on the stack pointer. */
12536 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12538 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12539 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12540 GEN_INT (UNITS_PER_WORD))));
12542 emit_label (varargs_label);
12543 LABEL_NUSES (varargs_label) = 1;
12547 /* We may have to tell the dataflow pass that the split stack prologue
12548 is initializing a scratch register. */
12550 static void
12551 ix86_live_on_entry (bitmap regs)
12553 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12555 gcc_assert (flag_split_stack);
12556 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12560 /* Extract the parts of an RTL expression that is a valid memory address
12561 for an instruction. Return 0 if the structure of the address is
12562 grossly off. Return -1 if the address contains ASHIFT, so it is not
12563 strictly valid, but still used for computing length of lea instruction. */
12566 ix86_decompose_address (rtx addr, struct ix86_address *out)
12568 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12569 rtx base_reg, index_reg;
12570 HOST_WIDE_INT scale = 1;
12571 rtx scale_rtx = NULL_RTX;
12572 rtx tmp;
12573 int retval = 1;
12574 enum ix86_address_seg seg = SEG_DEFAULT;
12576 /* Allow zero-extended SImode addresses,
12577 they will be emitted with addr32 prefix. */
12578 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12580 if (GET_CODE (addr) == ZERO_EXTEND
12581 && GET_MODE (XEXP (addr, 0)) == SImode)
12583 addr = XEXP (addr, 0);
12584 if (CONST_INT_P (addr))
12585 return 0;
12587 else if (GET_CODE (addr) == AND
12588 && const_32bit_mask (XEXP (addr, 1), DImode))
12590 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12591 if (addr == NULL_RTX)
12592 return 0;
12594 if (CONST_INT_P (addr))
12595 return 0;
12599 /* Allow SImode subregs of DImode addresses,
12600 they will be emitted with addr32 prefix. */
12601 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12603 if (GET_CODE (addr) == SUBREG
12604 && GET_MODE (SUBREG_REG (addr)) == DImode)
12606 addr = SUBREG_REG (addr);
12607 if (CONST_INT_P (addr))
12608 return 0;
12612 if (REG_P (addr))
12613 base = addr;
12614 else if (GET_CODE (addr) == SUBREG)
12616 if (REG_P (SUBREG_REG (addr)))
12617 base = addr;
12618 else
12619 return 0;
12621 else if (GET_CODE (addr) == PLUS)
12623 rtx addends[4], op;
12624 int n = 0, i;
12626 op = addr;
12629 if (n >= 4)
12630 return 0;
12631 addends[n++] = XEXP (op, 1);
12632 op = XEXP (op, 0);
12634 while (GET_CODE (op) == PLUS);
12635 if (n >= 4)
12636 return 0;
12637 addends[n] = op;
12639 for (i = n; i >= 0; --i)
12641 op = addends[i];
12642 switch (GET_CODE (op))
12644 case MULT:
12645 if (index)
12646 return 0;
12647 index = XEXP (op, 0);
12648 scale_rtx = XEXP (op, 1);
12649 break;
12651 case ASHIFT:
12652 if (index)
12653 return 0;
12654 index = XEXP (op, 0);
12655 tmp = XEXP (op, 1);
12656 if (!CONST_INT_P (tmp))
12657 return 0;
12658 scale = INTVAL (tmp);
12659 if ((unsigned HOST_WIDE_INT) scale > 3)
12660 return 0;
12661 scale = 1 << scale;
12662 break;
12664 case ZERO_EXTEND:
12665 op = XEXP (op, 0);
12666 if (GET_CODE (op) != UNSPEC)
12667 return 0;
12668 /* FALLTHRU */
12670 case UNSPEC:
12671 if (XINT (op, 1) == UNSPEC_TP
12672 && TARGET_TLS_DIRECT_SEG_REFS
12673 && seg == SEG_DEFAULT)
12674 seg = DEFAULT_TLS_SEG_REG;
12675 else
12676 return 0;
12677 break;
12679 case SUBREG:
12680 if (!REG_P (SUBREG_REG (op)))
12681 return 0;
12682 /* FALLTHRU */
12684 case REG:
12685 if (!base)
12686 base = op;
12687 else if (!index)
12688 index = op;
12689 else
12690 return 0;
12691 break;
12693 case CONST:
12694 case CONST_INT:
12695 case SYMBOL_REF:
12696 case LABEL_REF:
12697 if (disp)
12698 return 0;
12699 disp = op;
12700 break;
12702 default:
12703 return 0;
12707 else if (GET_CODE (addr) == MULT)
12709 index = XEXP (addr, 0); /* index*scale */
12710 scale_rtx = XEXP (addr, 1);
12712 else if (GET_CODE (addr) == ASHIFT)
12714 /* We're called for lea too, which implements ashift on occasion. */
12715 index = XEXP (addr, 0);
12716 tmp = XEXP (addr, 1);
12717 if (!CONST_INT_P (tmp))
12718 return 0;
12719 scale = INTVAL (tmp);
12720 if ((unsigned HOST_WIDE_INT) scale > 3)
12721 return 0;
12722 scale = 1 << scale;
12723 retval = -1;
12725 else
12726 disp = addr; /* displacement */
12728 if (index)
12730 if (REG_P (index))
12732 else if (GET_CODE (index) == SUBREG
12733 && REG_P (SUBREG_REG (index)))
12735 else
12736 return 0;
12739 /* Extract the integral value of scale. */
12740 if (scale_rtx)
12742 if (!CONST_INT_P (scale_rtx))
12743 return 0;
12744 scale = INTVAL (scale_rtx);
12747 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12748 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12750 /* Avoid useless 0 displacement. */
12751 if (disp == const0_rtx && (base || index))
12752 disp = NULL_RTX;
12754 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12755 if (base_reg && index_reg && scale == 1
12756 && (index_reg == arg_pointer_rtx
12757 || index_reg == frame_pointer_rtx
12758 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12760 std::swap (base, index);
12761 std::swap (base_reg, index_reg);
12764 /* Special case: %ebp cannot be encoded as a base without a displacement.
12765 Similarly %r13. */
12766 if (!disp
12767 && base_reg
12768 && (base_reg == hard_frame_pointer_rtx
12769 || base_reg == frame_pointer_rtx
12770 || base_reg == arg_pointer_rtx
12771 || (REG_P (base_reg)
12772 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12773 || REGNO (base_reg) == R13_REG))))
12774 disp = const0_rtx;
12776 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12777 Avoid this by transforming to [%esi+0].
12778 Reload calls address legitimization without cfun defined, so we need
12779 to test cfun for being non-NULL. */
12780 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12781 && base_reg && !index_reg && !disp
12782 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12783 disp = const0_rtx;
12785 /* Special case: encode reg+reg instead of reg*2. */
12786 if (!base && index && scale == 2)
12787 base = index, base_reg = index_reg, scale = 1;
12789 /* Special case: scaling cannot be encoded without base or displacement. */
12790 if (!base && !disp && index && scale != 1)
12791 disp = const0_rtx;
12793 out->base = base;
12794 out->index = index;
12795 out->disp = disp;
12796 out->scale = scale;
12797 out->seg = seg;
12799 return retval;
12802 /* Return cost of the memory address x.
12803 For i386, it is better to use a complex address than let gcc copy
12804 the address into a reg and make a new pseudo. But not if the address
12805 requires to two regs - that would mean more pseudos with longer
12806 lifetimes. */
12807 static int
12808 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
12810 struct ix86_address parts;
12811 int cost = 1;
12812 int ok = ix86_decompose_address (x, &parts);
12814 gcc_assert (ok);
12816 if (parts.base && GET_CODE (parts.base) == SUBREG)
12817 parts.base = SUBREG_REG (parts.base);
12818 if (parts.index && GET_CODE (parts.index) == SUBREG)
12819 parts.index = SUBREG_REG (parts.index);
12821 /* Attempt to minimize number of registers in the address. */
12822 if ((parts.base
12823 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12824 || (parts.index
12825 && (!REG_P (parts.index)
12826 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12827 cost++;
12829 /* When address base or index is "pic_offset_table_rtx" we don't increase
12830 address cost. When a memopt with "pic_offset_table_rtx" is not invariant
12831 itself it most likely means that base or index is not invariant.
12832 Therefore only "pic_offset_table_rtx" could be hoisted out, which is not
12833 profitable for x86. */
12834 if (parts.base
12835 && (current_pass->type == GIMPLE_PASS
12836 || (!pic_offset_table_rtx
12837 || REGNO (pic_offset_table_rtx) != REGNO(parts.base)))
12838 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12839 && parts.index
12840 && (current_pass->type == GIMPLE_PASS
12841 || (!pic_offset_table_rtx
12842 || REGNO (pic_offset_table_rtx) != REGNO(parts.index)))
12843 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12844 && parts.base != parts.index)
12845 cost++;
12847 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12848 since it's predecode logic can't detect the length of instructions
12849 and it degenerates to vector decoded. Increase cost of such
12850 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12851 to split such addresses or even refuse such addresses at all.
12853 Following addressing modes are affected:
12854 [base+scale*index]
12855 [scale*index+disp]
12856 [base+index]
12858 The first and last case may be avoidable by explicitly coding the zero in
12859 memory address, but I don't have AMD-K6 machine handy to check this
12860 theory. */
12862 if (TARGET_K6
12863 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12864 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12865 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12866 cost += 10;
12868 return cost;
12871 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12872 this is used for to form addresses to local data when -fPIC is in
12873 use. */
12875 static bool
12876 darwin_local_data_pic (rtx disp)
12878 return (GET_CODE (disp) == UNSPEC
12879 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12882 /* Determine if a given RTX is a valid constant. We already know this
12883 satisfies CONSTANT_P. */
12885 static bool
12886 ix86_legitimate_constant_p (machine_mode, rtx x)
12888 /* Pointer bounds constants are not valid. */
12889 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
12890 return false;
12892 switch (GET_CODE (x))
12894 case CONST:
12895 x = XEXP (x, 0);
12897 if (GET_CODE (x) == PLUS)
12899 if (!CONST_INT_P (XEXP (x, 1)))
12900 return false;
12901 x = XEXP (x, 0);
12904 if (TARGET_MACHO && darwin_local_data_pic (x))
12905 return true;
12907 /* Only some unspecs are valid as "constants". */
12908 if (GET_CODE (x) == UNSPEC)
12909 switch (XINT (x, 1))
12911 case UNSPEC_GOT:
12912 case UNSPEC_GOTOFF:
12913 case UNSPEC_PLTOFF:
12914 return TARGET_64BIT;
12915 case UNSPEC_TPOFF:
12916 case UNSPEC_NTPOFF:
12917 x = XVECEXP (x, 0, 0);
12918 return (GET_CODE (x) == SYMBOL_REF
12919 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12920 case UNSPEC_DTPOFF:
12921 x = XVECEXP (x, 0, 0);
12922 return (GET_CODE (x) == SYMBOL_REF
12923 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12924 default:
12925 return false;
12928 /* We must have drilled down to a symbol. */
12929 if (GET_CODE (x) == LABEL_REF)
12930 return true;
12931 if (GET_CODE (x) != SYMBOL_REF)
12932 return false;
12933 /* FALLTHRU */
12935 case SYMBOL_REF:
12936 /* TLS symbols are never valid. */
12937 if (SYMBOL_REF_TLS_MODEL (x))
12938 return false;
12940 /* DLLIMPORT symbols are never valid. */
12941 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12942 && SYMBOL_REF_DLLIMPORT_P (x))
12943 return false;
12945 #if TARGET_MACHO
12946 /* mdynamic-no-pic */
12947 if (MACHO_DYNAMIC_NO_PIC_P)
12948 return machopic_symbol_defined_p (x);
12949 #endif
12950 break;
12952 case CONST_DOUBLE:
12953 if (GET_MODE (x) == TImode
12954 && x != CONST0_RTX (TImode)
12955 && !TARGET_64BIT)
12956 return false;
12957 break;
12959 case CONST_VECTOR:
12960 if (!standard_sse_constant_p (x))
12961 return false;
12963 default:
12964 break;
12967 /* Otherwise we handle everything else in the move patterns. */
12968 return true;
12971 /* Determine if it's legal to put X into the constant pool. This
12972 is not possible for the address of thread-local symbols, which
12973 is checked above. */
12975 static bool
12976 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
12978 /* We can always put integral constants and vectors in memory. */
12979 switch (GET_CODE (x))
12981 case CONST_INT:
12982 case CONST_DOUBLE:
12983 case CONST_VECTOR:
12984 return false;
12986 default:
12987 break;
12989 return !ix86_legitimate_constant_p (mode, x);
12992 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12993 otherwise zero. */
12995 static bool
12996 is_imported_p (rtx x)
12998 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12999 || GET_CODE (x) != SYMBOL_REF)
13000 return false;
13002 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
13006 /* Nonzero if the constant value X is a legitimate general operand
13007 when generating PIC code. It is given that flag_pic is on and
13008 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
13010 bool
13011 legitimate_pic_operand_p (rtx x)
13013 rtx inner;
13015 switch (GET_CODE (x))
13017 case CONST:
13018 inner = XEXP (x, 0);
13019 if (GET_CODE (inner) == PLUS
13020 && CONST_INT_P (XEXP (inner, 1)))
13021 inner = XEXP (inner, 0);
13023 /* Only some unspecs are valid as "constants". */
13024 if (GET_CODE (inner) == UNSPEC)
13025 switch (XINT (inner, 1))
13027 case UNSPEC_GOT:
13028 case UNSPEC_GOTOFF:
13029 case UNSPEC_PLTOFF:
13030 return TARGET_64BIT;
13031 case UNSPEC_TPOFF:
13032 x = XVECEXP (inner, 0, 0);
13033 return (GET_CODE (x) == SYMBOL_REF
13034 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13035 case UNSPEC_MACHOPIC_OFFSET:
13036 return legitimate_pic_address_disp_p (x);
13037 default:
13038 return false;
13040 /* FALLTHRU */
13042 case SYMBOL_REF:
13043 case LABEL_REF:
13044 return legitimate_pic_address_disp_p (x);
13046 default:
13047 return true;
13051 /* Determine if a given CONST RTX is a valid memory displacement
13052 in PIC mode. */
13054 bool
13055 legitimate_pic_address_disp_p (rtx disp)
13057 bool saw_plus;
13059 /* In 64bit mode we can allow direct addresses of symbols and labels
13060 when they are not dynamic symbols. */
13061 if (TARGET_64BIT)
13063 rtx op0 = disp, op1;
13065 switch (GET_CODE (disp))
13067 case LABEL_REF:
13068 return true;
13070 case CONST:
13071 if (GET_CODE (XEXP (disp, 0)) != PLUS)
13072 break;
13073 op0 = XEXP (XEXP (disp, 0), 0);
13074 op1 = XEXP (XEXP (disp, 0), 1);
13075 if (!CONST_INT_P (op1)
13076 || INTVAL (op1) >= 16*1024*1024
13077 || INTVAL (op1) < -16*1024*1024)
13078 break;
13079 if (GET_CODE (op0) == LABEL_REF)
13080 return true;
13081 if (GET_CODE (op0) == CONST
13082 && GET_CODE (XEXP (op0, 0)) == UNSPEC
13083 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
13084 return true;
13085 if (GET_CODE (op0) == UNSPEC
13086 && XINT (op0, 1) == UNSPEC_PCREL)
13087 return true;
13088 if (GET_CODE (op0) != SYMBOL_REF)
13089 break;
13090 /* FALLTHRU */
13092 case SYMBOL_REF:
13093 /* TLS references should always be enclosed in UNSPEC.
13094 The dllimported symbol needs always to be resolved. */
13095 if (SYMBOL_REF_TLS_MODEL (op0)
13096 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
13097 return false;
13099 if (TARGET_PECOFF)
13101 if (is_imported_p (op0))
13102 return true;
13104 if (SYMBOL_REF_FAR_ADDR_P (op0)
13105 || !SYMBOL_REF_LOCAL_P (op0))
13106 break;
13108 /* Function-symbols need to be resolved only for
13109 large-model.
13110 For the small-model we don't need to resolve anything
13111 here. */
13112 if ((ix86_cmodel != CM_LARGE_PIC
13113 && SYMBOL_REF_FUNCTION_P (op0))
13114 || ix86_cmodel == CM_SMALL_PIC)
13115 return true;
13116 /* Non-external symbols don't need to be resolved for
13117 large, and medium-model. */
13118 if ((ix86_cmodel == CM_LARGE_PIC
13119 || ix86_cmodel == CM_MEDIUM_PIC)
13120 && !SYMBOL_REF_EXTERNAL_P (op0))
13121 return true;
13123 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
13124 && (SYMBOL_REF_LOCAL_P (op0)
13125 || (HAVE_LD_PIE_COPYRELOC
13126 && flag_pie
13127 && !SYMBOL_REF_WEAK (op0)
13128 && !SYMBOL_REF_FUNCTION_P (op0)))
13129 && ix86_cmodel != CM_LARGE_PIC)
13130 return true;
13131 break;
13133 default:
13134 break;
13137 if (GET_CODE (disp) != CONST)
13138 return false;
13139 disp = XEXP (disp, 0);
13141 if (TARGET_64BIT)
13143 /* We are unsafe to allow PLUS expressions. This limit allowed distance
13144 of GOT tables. We should not need these anyway. */
13145 if (GET_CODE (disp) != UNSPEC
13146 || (XINT (disp, 1) != UNSPEC_GOTPCREL
13147 && XINT (disp, 1) != UNSPEC_GOTOFF
13148 && XINT (disp, 1) != UNSPEC_PCREL
13149 && XINT (disp, 1) != UNSPEC_PLTOFF))
13150 return false;
13152 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
13153 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
13154 return false;
13155 return true;
13158 saw_plus = false;
13159 if (GET_CODE (disp) == PLUS)
13161 if (!CONST_INT_P (XEXP (disp, 1)))
13162 return false;
13163 disp = XEXP (disp, 0);
13164 saw_plus = true;
13167 if (TARGET_MACHO && darwin_local_data_pic (disp))
13168 return true;
13170 if (GET_CODE (disp) != UNSPEC)
13171 return false;
13173 switch (XINT (disp, 1))
13175 case UNSPEC_GOT:
13176 if (saw_plus)
13177 return false;
13178 /* We need to check for both symbols and labels because VxWorks loads
13179 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
13180 details. */
13181 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13182 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
13183 case UNSPEC_GOTOFF:
13184 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
13185 While ABI specify also 32bit relocation but we don't produce it in
13186 small PIC model at all. */
13187 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13188 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
13189 && !TARGET_64BIT)
13190 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
13191 return false;
13192 case UNSPEC_GOTTPOFF:
13193 case UNSPEC_GOTNTPOFF:
13194 case UNSPEC_INDNTPOFF:
13195 if (saw_plus)
13196 return false;
13197 disp = XVECEXP (disp, 0, 0);
13198 return (GET_CODE (disp) == SYMBOL_REF
13199 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
13200 case UNSPEC_NTPOFF:
13201 disp = XVECEXP (disp, 0, 0);
13202 return (GET_CODE (disp) == SYMBOL_REF
13203 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
13204 case UNSPEC_DTPOFF:
13205 disp = XVECEXP (disp, 0, 0);
13206 return (GET_CODE (disp) == SYMBOL_REF
13207 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
13210 return false;
13213 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
13214 replace the input X, or the original X if no replacement is called for.
13215 The output parameter *WIN is 1 if the calling macro should goto WIN,
13216 0 if it should not. */
13218 bool
13219 ix86_legitimize_reload_address (rtx x, machine_mode, int opnum, int type,
13220 int)
13222 /* Reload can generate:
13224 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
13225 (reg:DI 97))
13226 (reg:DI 2 cx))
13228 This RTX is rejected from ix86_legitimate_address_p due to
13229 non-strictness of base register 97. Following this rejection,
13230 reload pushes all three components into separate registers,
13231 creating invalid memory address RTX.
13233 Following code reloads only the invalid part of the
13234 memory address RTX. */
13236 if (GET_CODE (x) == PLUS
13237 && REG_P (XEXP (x, 1))
13238 && GET_CODE (XEXP (x, 0)) == PLUS
13239 && REG_P (XEXP (XEXP (x, 0), 1)))
13241 rtx base, index;
13242 bool something_reloaded = false;
13244 base = XEXP (XEXP (x, 0), 1);
13245 if (!REG_OK_FOR_BASE_STRICT_P (base))
13247 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
13248 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13249 opnum, (enum reload_type) type);
13250 something_reloaded = true;
13253 index = XEXP (x, 1);
13254 if (!REG_OK_FOR_INDEX_STRICT_P (index))
13256 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
13257 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13258 opnum, (enum reload_type) type);
13259 something_reloaded = true;
13262 gcc_assert (something_reloaded);
13263 return true;
13266 return false;
13269 /* Determine if op is suitable RTX for an address register.
13270 Return naked register if a register or a register subreg is
13271 found, otherwise return NULL_RTX. */
13273 static rtx
13274 ix86_validate_address_register (rtx op)
13276 machine_mode mode = GET_MODE (op);
13278 /* Only SImode or DImode registers can form the address. */
13279 if (mode != SImode && mode != DImode)
13280 return NULL_RTX;
13282 if (REG_P (op))
13283 return op;
13284 else if (GET_CODE (op) == SUBREG)
13286 rtx reg = SUBREG_REG (op);
13288 if (!REG_P (reg))
13289 return NULL_RTX;
13291 mode = GET_MODE (reg);
13293 /* Don't allow SUBREGs that span more than a word. It can
13294 lead to spill failures when the register is one word out
13295 of a two word structure. */
13296 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13297 return NULL_RTX;
13299 /* Allow only SUBREGs of non-eliminable hard registers. */
13300 if (register_no_elim_operand (reg, mode))
13301 return reg;
13304 /* Op is not a register. */
13305 return NULL_RTX;
13308 /* Recognizes RTL expressions that are valid memory addresses for an
13309 instruction. The MODE argument is the machine mode for the MEM
13310 expression that wants to use this address.
13312 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
13313 convert common non-canonical forms to canonical form so that they will
13314 be recognized. */
13316 static bool
13317 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
13319 struct ix86_address parts;
13320 rtx base, index, disp;
13321 HOST_WIDE_INT scale;
13322 enum ix86_address_seg seg;
13324 if (ix86_decompose_address (addr, &parts) <= 0)
13325 /* Decomposition failed. */
13326 return false;
13328 base = parts.base;
13329 index = parts.index;
13330 disp = parts.disp;
13331 scale = parts.scale;
13332 seg = parts.seg;
13334 /* Validate base register. */
13335 if (base)
13337 rtx reg = ix86_validate_address_register (base);
13339 if (reg == NULL_RTX)
13340 return false;
13342 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13343 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13344 /* Base is not valid. */
13345 return false;
13348 /* Validate index register. */
13349 if (index)
13351 rtx reg = ix86_validate_address_register (index);
13353 if (reg == NULL_RTX)
13354 return false;
13356 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13357 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13358 /* Index is not valid. */
13359 return false;
13362 /* Index and base should have the same mode. */
13363 if (base && index
13364 && GET_MODE (base) != GET_MODE (index))
13365 return false;
13367 /* Address override works only on the (%reg) part of %fs:(%reg). */
13368 if (seg != SEG_DEFAULT
13369 && ((base && GET_MODE (base) != word_mode)
13370 || (index && GET_MODE (index) != word_mode)))
13371 return false;
13373 /* Validate scale factor. */
13374 if (scale != 1)
13376 if (!index)
13377 /* Scale without index. */
13378 return false;
13380 if (scale != 2 && scale != 4 && scale != 8)
13381 /* Scale is not a valid multiplier. */
13382 return false;
13385 /* Validate displacement. */
13386 if (disp)
13388 if (GET_CODE (disp) == CONST
13389 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13390 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13391 switch (XINT (XEXP (disp, 0), 1))
13393 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13394 used. While ABI specify also 32bit relocations, we don't produce
13395 them at all and use IP relative instead. */
13396 case UNSPEC_GOT:
13397 case UNSPEC_GOTOFF:
13398 gcc_assert (flag_pic);
13399 if (!TARGET_64BIT)
13400 goto is_legitimate_pic;
13402 /* 64bit address unspec. */
13403 return false;
13405 case UNSPEC_GOTPCREL:
13406 case UNSPEC_PCREL:
13407 gcc_assert (flag_pic);
13408 goto is_legitimate_pic;
13410 case UNSPEC_GOTTPOFF:
13411 case UNSPEC_GOTNTPOFF:
13412 case UNSPEC_INDNTPOFF:
13413 case UNSPEC_NTPOFF:
13414 case UNSPEC_DTPOFF:
13415 break;
13417 case UNSPEC_STACK_CHECK:
13418 gcc_assert (flag_split_stack);
13419 break;
13421 default:
13422 /* Invalid address unspec. */
13423 return false;
13426 else if (SYMBOLIC_CONST (disp)
13427 && (flag_pic
13428 || (TARGET_MACHO
13429 #if TARGET_MACHO
13430 && MACHOPIC_INDIRECT
13431 && !machopic_operand_p (disp)
13432 #endif
13436 is_legitimate_pic:
13437 if (TARGET_64BIT && (index || base))
13439 /* foo@dtpoff(%rX) is ok. */
13440 if (GET_CODE (disp) != CONST
13441 || GET_CODE (XEXP (disp, 0)) != PLUS
13442 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13443 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13444 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13445 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13446 /* Non-constant pic memory reference. */
13447 return false;
13449 else if ((!TARGET_MACHO || flag_pic)
13450 && ! legitimate_pic_address_disp_p (disp))
13451 /* Displacement is an invalid pic construct. */
13452 return false;
13453 #if TARGET_MACHO
13454 else if (MACHO_DYNAMIC_NO_PIC_P
13455 && !ix86_legitimate_constant_p (Pmode, disp))
13456 /* displacment must be referenced via non_lazy_pointer */
13457 return false;
13458 #endif
13460 /* This code used to verify that a symbolic pic displacement
13461 includes the pic_offset_table_rtx register.
13463 While this is good idea, unfortunately these constructs may
13464 be created by "adds using lea" optimization for incorrect
13465 code like:
13467 int a;
13468 int foo(int i)
13470 return *(&a+i);
13473 This code is nonsensical, but results in addressing
13474 GOT table with pic_offset_table_rtx base. We can't
13475 just refuse it easily, since it gets matched by
13476 "addsi3" pattern, that later gets split to lea in the
13477 case output register differs from input. While this
13478 can be handled by separate addsi pattern for this case
13479 that never results in lea, this seems to be easier and
13480 correct fix for crash to disable this test. */
13482 else if (GET_CODE (disp) != LABEL_REF
13483 && !CONST_INT_P (disp)
13484 && (GET_CODE (disp) != CONST
13485 || !ix86_legitimate_constant_p (Pmode, disp))
13486 && (GET_CODE (disp) != SYMBOL_REF
13487 || !ix86_legitimate_constant_p (Pmode, disp)))
13488 /* Displacement is not constant. */
13489 return false;
13490 else if (TARGET_64BIT
13491 && !x86_64_immediate_operand (disp, VOIDmode))
13492 /* Displacement is out of range. */
13493 return false;
13494 /* In x32 mode, constant addresses are sign extended to 64bit, so
13495 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13496 else if (TARGET_X32 && !(index || base)
13497 && CONST_INT_P (disp)
13498 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13499 return false;
13502 /* Everything looks valid. */
13503 return true;
13506 /* Determine if a given RTX is a valid constant address. */
13508 bool
13509 constant_address_p (rtx x)
13511 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13514 /* Return a unique alias set for the GOT. */
13516 static alias_set_type
13517 ix86_GOT_alias_set (void)
13519 static alias_set_type set = -1;
13520 if (set == -1)
13521 set = new_alias_set ();
13522 return set;
13525 /* Set regs_ever_live for PIC base address register
13526 to true if required. */
13527 static void
13528 set_pic_reg_ever_live ()
13530 if (reload_in_progress)
13531 df_set_regs_ever_live (REGNO (pic_offset_table_rtx), true);
13534 /* Return a legitimate reference for ORIG (an address) using the
13535 register REG. If REG is 0, a new pseudo is generated.
13537 There are two types of references that must be handled:
13539 1. Global data references must load the address from the GOT, via
13540 the PIC reg. An insn is emitted to do this load, and the reg is
13541 returned.
13543 2. Static data references, constant pool addresses, and code labels
13544 compute the address as an offset from the GOT, whose base is in
13545 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13546 differentiate them from global data objects. The returned
13547 address is the PIC reg + an unspec constant.
13549 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13550 reg also appears in the address. */
13552 static rtx
13553 legitimize_pic_address (rtx orig, rtx reg)
13555 rtx addr = orig;
13556 rtx new_rtx = orig;
13558 #if TARGET_MACHO
13559 if (TARGET_MACHO && !TARGET_64BIT)
13561 if (reg == 0)
13562 reg = gen_reg_rtx (Pmode);
13563 /* Use the generic Mach-O PIC machinery. */
13564 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13566 #endif
13568 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13570 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13571 if (tmp)
13572 return tmp;
13575 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13576 new_rtx = addr;
13577 else if (TARGET_64BIT && !TARGET_PECOFF
13578 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13580 rtx tmpreg;
13581 /* This symbol may be referenced via a displacement from the PIC
13582 base address (@GOTOFF). */
13584 set_pic_reg_ever_live ();
13585 if (GET_CODE (addr) == CONST)
13586 addr = XEXP (addr, 0);
13587 if (GET_CODE (addr) == PLUS)
13589 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13590 UNSPEC_GOTOFF);
13591 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13593 else
13594 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13595 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13596 if (!reg)
13597 tmpreg = gen_reg_rtx (Pmode);
13598 else
13599 tmpreg = reg;
13600 emit_move_insn (tmpreg, new_rtx);
13602 if (reg != 0)
13604 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13605 tmpreg, 1, OPTAB_DIRECT);
13606 new_rtx = reg;
13608 else
13609 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13611 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13613 /* This symbol may be referenced via a displacement from the PIC
13614 base address (@GOTOFF). */
13616 set_pic_reg_ever_live ();
13617 if (GET_CODE (addr) == CONST)
13618 addr = XEXP (addr, 0);
13619 if (GET_CODE (addr) == PLUS)
13621 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13622 UNSPEC_GOTOFF);
13623 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13625 else
13626 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13627 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13628 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13630 if (reg != 0)
13632 emit_move_insn (reg, new_rtx);
13633 new_rtx = reg;
13636 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13637 /* We can't use @GOTOFF for text labels on VxWorks;
13638 see gotoff_operand. */
13639 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13641 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13642 if (tmp)
13643 return tmp;
13645 /* For x64 PE-COFF there is no GOT table. So we use address
13646 directly. */
13647 if (TARGET_64BIT && TARGET_PECOFF)
13649 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13650 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13652 if (reg == 0)
13653 reg = gen_reg_rtx (Pmode);
13654 emit_move_insn (reg, new_rtx);
13655 new_rtx = reg;
13657 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13659 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13660 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13661 new_rtx = gen_const_mem (Pmode, new_rtx);
13662 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13664 if (reg == 0)
13665 reg = gen_reg_rtx (Pmode);
13666 /* Use directly gen_movsi, otherwise the address is loaded
13667 into register for CSE. We don't want to CSE this addresses,
13668 instead we CSE addresses from the GOT table, so skip this. */
13669 emit_insn (gen_movsi (reg, new_rtx));
13670 new_rtx = reg;
13672 else
13674 /* This symbol must be referenced via a load from the
13675 Global Offset Table (@GOT). */
13677 set_pic_reg_ever_live ();
13678 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13679 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13680 if (TARGET_64BIT)
13681 new_rtx = force_reg (Pmode, new_rtx);
13682 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13683 new_rtx = gen_const_mem (Pmode, new_rtx);
13684 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13686 if (reg == 0)
13687 reg = gen_reg_rtx (Pmode);
13688 emit_move_insn (reg, new_rtx);
13689 new_rtx = reg;
13692 else
13694 if (CONST_INT_P (addr)
13695 && !x86_64_immediate_operand (addr, VOIDmode))
13697 if (reg)
13699 emit_move_insn (reg, addr);
13700 new_rtx = reg;
13702 else
13703 new_rtx = force_reg (Pmode, addr);
13705 else if (GET_CODE (addr) == CONST)
13707 addr = XEXP (addr, 0);
13709 /* We must match stuff we generate before. Assume the only
13710 unspecs that can get here are ours. Not that we could do
13711 anything with them anyway.... */
13712 if (GET_CODE (addr) == UNSPEC
13713 || (GET_CODE (addr) == PLUS
13714 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13715 return orig;
13716 gcc_assert (GET_CODE (addr) == PLUS);
13718 if (GET_CODE (addr) == PLUS)
13720 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13722 /* Check first to see if this is a constant offset from a @GOTOFF
13723 symbol reference. */
13724 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13725 && CONST_INT_P (op1))
13727 if (!TARGET_64BIT)
13729 set_pic_reg_ever_live ();
13730 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13731 UNSPEC_GOTOFF);
13732 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13733 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13734 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13736 if (reg != 0)
13738 emit_move_insn (reg, new_rtx);
13739 new_rtx = reg;
13742 else
13744 if (INTVAL (op1) < -16*1024*1024
13745 || INTVAL (op1) >= 16*1024*1024)
13747 if (!x86_64_immediate_operand (op1, Pmode))
13748 op1 = force_reg (Pmode, op1);
13749 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13753 else
13755 rtx base = legitimize_pic_address (op0, reg);
13756 machine_mode mode = GET_MODE (base);
13757 new_rtx
13758 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13760 if (CONST_INT_P (new_rtx))
13762 if (INTVAL (new_rtx) < -16*1024*1024
13763 || INTVAL (new_rtx) >= 16*1024*1024)
13765 if (!x86_64_immediate_operand (new_rtx, mode))
13766 new_rtx = force_reg (mode, new_rtx);
13767 new_rtx
13768 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13770 else
13771 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13773 else
13775 if (GET_CODE (new_rtx) == PLUS
13776 && CONSTANT_P (XEXP (new_rtx, 1)))
13778 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13779 new_rtx = XEXP (new_rtx, 1);
13781 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13786 return new_rtx;
13789 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13791 static rtx
13792 get_thread_pointer (machine_mode tp_mode, bool to_reg)
13794 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13796 if (GET_MODE (tp) != tp_mode)
13798 gcc_assert (GET_MODE (tp) == SImode);
13799 gcc_assert (tp_mode == DImode);
13801 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13804 if (to_reg)
13805 tp = copy_to_mode_reg (tp_mode, tp);
13807 return tp;
13810 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13812 static GTY(()) rtx ix86_tls_symbol;
13814 static rtx
13815 ix86_tls_get_addr (void)
13817 if (!ix86_tls_symbol)
13819 const char *sym
13820 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13821 ? "___tls_get_addr" : "__tls_get_addr");
13823 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13826 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13828 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13829 UNSPEC_PLTOFF);
13830 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13831 gen_rtx_CONST (Pmode, unspec));
13834 return ix86_tls_symbol;
13837 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13839 static GTY(()) rtx ix86_tls_module_base_symbol;
13842 ix86_tls_module_base (void)
13844 if (!ix86_tls_module_base_symbol)
13846 ix86_tls_module_base_symbol
13847 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13849 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13850 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13853 return ix86_tls_module_base_symbol;
13856 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13857 false if we expect this to be used for a memory address and true if
13858 we expect to load the address into a register. */
13860 static rtx
13861 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13863 rtx dest, base, off;
13864 rtx pic = NULL_RTX, tp = NULL_RTX;
13865 machine_mode tp_mode = Pmode;
13866 int type;
13868 /* Fall back to global dynamic model if tool chain cannot support local
13869 dynamic. */
13870 if (TARGET_SUN_TLS && !TARGET_64BIT
13871 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13872 && model == TLS_MODEL_LOCAL_DYNAMIC)
13873 model = TLS_MODEL_GLOBAL_DYNAMIC;
13875 switch (model)
13877 case TLS_MODEL_GLOBAL_DYNAMIC:
13878 dest = gen_reg_rtx (Pmode);
13880 if (!TARGET_64BIT)
13882 if (flag_pic && !TARGET_PECOFF)
13883 pic = pic_offset_table_rtx;
13884 else
13886 pic = gen_reg_rtx (Pmode);
13887 emit_insn (gen_set_got (pic));
13891 if (TARGET_GNU2_TLS)
13893 if (TARGET_64BIT)
13894 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13895 else
13896 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13898 tp = get_thread_pointer (Pmode, true);
13899 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13901 if (GET_MODE (x) != Pmode)
13902 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13904 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13906 else
13908 rtx caddr = ix86_tls_get_addr ();
13910 if (TARGET_64BIT)
13912 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13913 rtx_insn *insns;
13915 start_sequence ();
13916 emit_call_insn
13917 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13918 insns = get_insns ();
13919 end_sequence ();
13921 if (GET_MODE (x) != Pmode)
13922 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13924 RTL_CONST_CALL_P (insns) = 1;
13925 emit_libcall_block (insns, dest, rax, x);
13927 else
13928 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13930 break;
13932 case TLS_MODEL_LOCAL_DYNAMIC:
13933 base = gen_reg_rtx (Pmode);
13935 if (!TARGET_64BIT)
13937 if (flag_pic)
13938 pic = pic_offset_table_rtx;
13939 else
13941 pic = gen_reg_rtx (Pmode);
13942 emit_insn (gen_set_got (pic));
13946 if (TARGET_GNU2_TLS)
13948 rtx tmp = ix86_tls_module_base ();
13950 if (TARGET_64BIT)
13951 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13952 else
13953 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13955 tp = get_thread_pointer (Pmode, true);
13956 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13957 gen_rtx_MINUS (Pmode, tmp, tp));
13959 else
13961 rtx caddr = ix86_tls_get_addr ();
13963 if (TARGET_64BIT)
13965 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13966 rtx_insn *insns;
13967 rtx eqv;
13969 start_sequence ();
13970 emit_call_insn
13971 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13972 insns = get_insns ();
13973 end_sequence ();
13975 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13976 share the LD_BASE result with other LD model accesses. */
13977 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13978 UNSPEC_TLS_LD_BASE);
13980 RTL_CONST_CALL_P (insns) = 1;
13981 emit_libcall_block (insns, base, rax, eqv);
13983 else
13984 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13987 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13988 off = gen_rtx_CONST (Pmode, off);
13990 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13992 if (TARGET_GNU2_TLS)
13994 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13996 if (GET_MODE (x) != Pmode)
13997 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13999 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14001 break;
14003 case TLS_MODEL_INITIAL_EXEC:
14004 if (TARGET_64BIT)
14006 if (TARGET_SUN_TLS && !TARGET_X32)
14008 /* The Sun linker took the AMD64 TLS spec literally
14009 and can only handle %rax as destination of the
14010 initial executable code sequence. */
14012 dest = gen_reg_rtx (DImode);
14013 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
14014 return dest;
14017 /* Generate DImode references to avoid %fs:(%reg32)
14018 problems and linker IE->LE relaxation bug. */
14019 tp_mode = DImode;
14020 pic = NULL;
14021 type = UNSPEC_GOTNTPOFF;
14023 else if (flag_pic)
14025 set_pic_reg_ever_live ();
14026 pic = pic_offset_table_rtx;
14027 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
14029 else if (!TARGET_ANY_GNU_TLS)
14031 pic = gen_reg_rtx (Pmode);
14032 emit_insn (gen_set_got (pic));
14033 type = UNSPEC_GOTTPOFF;
14035 else
14037 pic = NULL;
14038 type = UNSPEC_INDNTPOFF;
14041 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
14042 off = gen_rtx_CONST (tp_mode, off);
14043 if (pic)
14044 off = gen_rtx_PLUS (tp_mode, pic, off);
14045 off = gen_const_mem (tp_mode, off);
14046 set_mem_alias_set (off, ix86_GOT_alias_set ());
14048 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14050 base = get_thread_pointer (tp_mode,
14051 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14052 off = force_reg (tp_mode, off);
14053 return gen_rtx_PLUS (tp_mode, base, off);
14055 else
14057 base = get_thread_pointer (Pmode, true);
14058 dest = gen_reg_rtx (Pmode);
14059 emit_insn (ix86_gen_sub3 (dest, base, off));
14061 break;
14063 case TLS_MODEL_LOCAL_EXEC:
14064 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14065 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14066 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
14067 off = gen_rtx_CONST (Pmode, off);
14069 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14071 base = get_thread_pointer (Pmode,
14072 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14073 return gen_rtx_PLUS (Pmode, base, off);
14075 else
14077 base = get_thread_pointer (Pmode, true);
14078 dest = gen_reg_rtx (Pmode);
14079 emit_insn (ix86_gen_sub3 (dest, base, off));
14081 break;
14083 default:
14084 gcc_unreachable ();
14087 return dest;
14090 /* Create or return the unique __imp_DECL dllimport symbol corresponding
14091 to symbol DECL if BEIMPORT is true. Otherwise create or return the
14092 unique refptr-DECL symbol corresponding to symbol DECL. */
14094 struct dllimport_hasher : ggc_cache_hasher<tree_map *>
14096 static inline hashval_t hash (tree_map *m) { return m->hash; }
14097 static inline bool
14098 equal (tree_map *a, tree_map *b)
14100 return a->base.from == b->base.from;
14103 static void
14104 handle_cache_entry (tree_map *&m)
14106 extern void gt_ggc_mx (tree_map *&);
14107 if (m == HTAB_EMPTY_ENTRY || m == HTAB_DELETED_ENTRY)
14108 return;
14109 else if (ggc_marked_p (m->base.from))
14110 gt_ggc_mx (m);
14111 else
14112 m = static_cast<tree_map *> (HTAB_DELETED_ENTRY);
14116 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
14118 static tree
14119 get_dllimport_decl (tree decl, bool beimport)
14121 struct tree_map *h, in;
14122 const char *name;
14123 const char *prefix;
14124 size_t namelen, prefixlen;
14125 char *imp_name;
14126 tree to;
14127 rtx rtl;
14129 if (!dllimport_map)
14130 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
14132 in.hash = htab_hash_pointer (decl);
14133 in.base.from = decl;
14134 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
14135 h = *loc;
14136 if (h)
14137 return h->to;
14139 *loc = h = ggc_alloc<tree_map> ();
14140 h->hash = in.hash;
14141 h->base.from = decl;
14142 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
14143 VAR_DECL, NULL, ptr_type_node);
14144 DECL_ARTIFICIAL (to) = 1;
14145 DECL_IGNORED_P (to) = 1;
14146 DECL_EXTERNAL (to) = 1;
14147 TREE_READONLY (to) = 1;
14149 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
14150 name = targetm.strip_name_encoding (name);
14151 if (beimport)
14152 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
14153 ? "*__imp_" : "*__imp__";
14154 else
14155 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
14156 namelen = strlen (name);
14157 prefixlen = strlen (prefix);
14158 imp_name = (char *) alloca (namelen + prefixlen + 1);
14159 memcpy (imp_name, prefix, prefixlen);
14160 memcpy (imp_name + prefixlen, name, namelen + 1);
14162 name = ggc_alloc_string (imp_name, namelen + prefixlen);
14163 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
14164 SET_SYMBOL_REF_DECL (rtl, to);
14165 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
14166 if (!beimport)
14168 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
14169 #ifdef SUB_TARGET_RECORD_STUB
14170 SUB_TARGET_RECORD_STUB (name);
14171 #endif
14174 rtl = gen_const_mem (Pmode, rtl);
14175 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
14177 SET_DECL_RTL (to, rtl);
14178 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
14180 return to;
14183 /* Expand SYMBOL into its corresponding far-addresse symbol.
14184 WANT_REG is true if we require the result be a register. */
14186 static rtx
14187 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
14189 tree imp_decl;
14190 rtx x;
14192 gcc_assert (SYMBOL_REF_DECL (symbol));
14193 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
14195 x = DECL_RTL (imp_decl);
14196 if (want_reg)
14197 x = force_reg (Pmode, x);
14198 return x;
14201 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
14202 true if we require the result be a register. */
14204 static rtx
14205 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
14207 tree imp_decl;
14208 rtx x;
14210 gcc_assert (SYMBOL_REF_DECL (symbol));
14211 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
14213 x = DECL_RTL (imp_decl);
14214 if (want_reg)
14215 x = force_reg (Pmode, x);
14216 return x;
14219 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
14220 is true if we require the result be a register. */
14222 static rtx
14223 legitimize_pe_coff_symbol (rtx addr, bool inreg)
14225 if (!TARGET_PECOFF)
14226 return NULL_RTX;
14228 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14230 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
14231 return legitimize_dllimport_symbol (addr, inreg);
14232 if (GET_CODE (addr) == CONST
14233 && GET_CODE (XEXP (addr, 0)) == PLUS
14234 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14235 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
14237 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
14238 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14242 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
14243 return NULL_RTX;
14244 if (GET_CODE (addr) == SYMBOL_REF
14245 && !is_imported_p (addr)
14246 && SYMBOL_REF_EXTERNAL_P (addr)
14247 && SYMBOL_REF_DECL (addr))
14248 return legitimize_pe_coff_extern_decl (addr, inreg);
14250 if (GET_CODE (addr) == CONST
14251 && GET_CODE (XEXP (addr, 0)) == PLUS
14252 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14253 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
14254 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
14255 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
14257 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
14258 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14260 return NULL_RTX;
14263 /* Try machine-dependent ways of modifying an illegitimate address
14264 to be legitimate. If we find one, return the new, valid address.
14265 This macro is used in only one place: `memory_address' in explow.c.
14267 OLDX is the address as it was before break_out_memory_refs was called.
14268 In some cases it is useful to look at this to decide what needs to be done.
14270 It is always safe for this macro to do nothing. It exists to recognize
14271 opportunities to optimize the output.
14273 For the 80386, we handle X+REG by loading X into a register R and
14274 using R+REG. R will go in a general reg and indexing will be used.
14275 However, if REG is a broken-out memory address or multiplication,
14276 nothing needs to be done because REG can certainly go in a general reg.
14278 When -fpic is used, special handling is needed for symbolic references.
14279 See comments by legitimize_pic_address in i386.c for details. */
14281 static rtx
14282 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
14284 int changed = 0;
14285 unsigned log;
14287 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14288 if (log)
14289 return legitimize_tls_address (x, (enum tls_model) log, false);
14290 if (GET_CODE (x) == CONST
14291 && GET_CODE (XEXP (x, 0)) == PLUS
14292 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14293 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14295 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14296 (enum tls_model) log, false);
14297 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14300 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14302 rtx tmp = legitimize_pe_coff_symbol (x, true);
14303 if (tmp)
14304 return tmp;
14307 if (flag_pic && SYMBOLIC_CONST (x))
14308 return legitimize_pic_address (x, 0);
14310 #if TARGET_MACHO
14311 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14312 return machopic_indirect_data_reference (x, 0);
14313 #endif
14315 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14316 if (GET_CODE (x) == ASHIFT
14317 && CONST_INT_P (XEXP (x, 1))
14318 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14320 changed = 1;
14321 log = INTVAL (XEXP (x, 1));
14322 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14323 GEN_INT (1 << log));
14326 if (GET_CODE (x) == PLUS)
14328 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
14330 if (GET_CODE (XEXP (x, 0)) == ASHIFT
14331 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14332 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14334 changed = 1;
14335 log = INTVAL (XEXP (XEXP (x, 0), 1));
14336 XEXP (x, 0) = gen_rtx_MULT (Pmode,
14337 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14338 GEN_INT (1 << log));
14341 if (GET_CODE (XEXP (x, 1)) == ASHIFT
14342 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14343 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14345 changed = 1;
14346 log = INTVAL (XEXP (XEXP (x, 1), 1));
14347 XEXP (x, 1) = gen_rtx_MULT (Pmode,
14348 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14349 GEN_INT (1 << log));
14352 /* Put multiply first if it isn't already. */
14353 if (GET_CODE (XEXP (x, 1)) == MULT)
14355 rtx tmp = XEXP (x, 0);
14356 XEXP (x, 0) = XEXP (x, 1);
14357 XEXP (x, 1) = tmp;
14358 changed = 1;
14361 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14362 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
14363 created by virtual register instantiation, register elimination, and
14364 similar optimizations. */
14365 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14367 changed = 1;
14368 x = gen_rtx_PLUS (Pmode,
14369 gen_rtx_PLUS (Pmode, XEXP (x, 0),
14370 XEXP (XEXP (x, 1), 0)),
14371 XEXP (XEXP (x, 1), 1));
14374 /* Canonicalize
14375 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14376 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
14377 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14378 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14379 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14380 && CONSTANT_P (XEXP (x, 1)))
14382 rtx constant;
14383 rtx other = NULL_RTX;
14385 if (CONST_INT_P (XEXP (x, 1)))
14387 constant = XEXP (x, 1);
14388 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14390 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14392 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14393 other = XEXP (x, 1);
14395 else
14396 constant = 0;
14398 if (constant)
14400 changed = 1;
14401 x = gen_rtx_PLUS (Pmode,
14402 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14403 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14404 plus_constant (Pmode, other,
14405 INTVAL (constant)));
14409 if (changed && ix86_legitimate_address_p (mode, x, false))
14410 return x;
14412 if (GET_CODE (XEXP (x, 0)) == MULT)
14414 changed = 1;
14415 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14418 if (GET_CODE (XEXP (x, 1)) == MULT)
14420 changed = 1;
14421 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14424 if (changed
14425 && REG_P (XEXP (x, 1))
14426 && REG_P (XEXP (x, 0)))
14427 return x;
14429 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14431 changed = 1;
14432 x = legitimize_pic_address (x, 0);
14435 if (changed && ix86_legitimate_address_p (mode, x, false))
14436 return x;
14438 if (REG_P (XEXP (x, 0)))
14440 rtx temp = gen_reg_rtx (Pmode);
14441 rtx val = force_operand (XEXP (x, 1), temp);
14442 if (val != temp)
14444 val = convert_to_mode (Pmode, val, 1);
14445 emit_move_insn (temp, val);
14448 XEXP (x, 1) = temp;
14449 return x;
14452 else if (REG_P (XEXP (x, 1)))
14454 rtx temp = gen_reg_rtx (Pmode);
14455 rtx val = force_operand (XEXP (x, 0), temp);
14456 if (val != temp)
14458 val = convert_to_mode (Pmode, val, 1);
14459 emit_move_insn (temp, val);
14462 XEXP (x, 0) = temp;
14463 return x;
14467 return x;
14470 /* Print an integer constant expression in assembler syntax. Addition
14471 and subtraction are the only arithmetic that may appear in these
14472 expressions. FILE is the stdio stream to write to, X is the rtx, and
14473 CODE is the operand print code from the output string. */
14475 static void
14476 output_pic_addr_const (FILE *file, rtx x, int code)
14478 char buf[256];
14480 switch (GET_CODE (x))
14482 case PC:
14483 gcc_assert (flag_pic);
14484 putc ('.', file);
14485 break;
14487 case SYMBOL_REF:
14488 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14489 output_addr_const (file, x);
14490 else
14492 const char *name = XSTR (x, 0);
14494 /* Mark the decl as referenced so that cgraph will
14495 output the function. */
14496 if (SYMBOL_REF_DECL (x))
14497 mark_decl_referenced (SYMBOL_REF_DECL (x));
14499 #if TARGET_MACHO
14500 if (MACHOPIC_INDIRECT
14501 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14502 name = machopic_indirection_name (x, /*stub_p=*/true);
14503 #endif
14504 assemble_name (file, name);
14506 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14507 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14508 fputs ("@PLT", file);
14509 break;
14511 case LABEL_REF:
14512 x = XEXP (x, 0);
14513 /* FALLTHRU */
14514 case CODE_LABEL:
14515 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14516 assemble_name (asm_out_file, buf);
14517 break;
14519 case CONST_INT:
14520 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14521 break;
14523 case CONST:
14524 /* This used to output parentheses around the expression,
14525 but that does not work on the 386 (either ATT or BSD assembler). */
14526 output_pic_addr_const (file, XEXP (x, 0), code);
14527 break;
14529 case CONST_DOUBLE:
14530 if (GET_MODE (x) == VOIDmode)
14532 /* We can use %d if the number is <32 bits and positive. */
14533 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14534 fprintf (file, "0x%lx%08lx",
14535 (unsigned long) CONST_DOUBLE_HIGH (x),
14536 (unsigned long) CONST_DOUBLE_LOW (x));
14537 else
14538 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14540 else
14541 /* We can't handle floating point constants;
14542 TARGET_PRINT_OPERAND must handle them. */
14543 output_operand_lossage ("floating constant misused");
14544 break;
14546 case PLUS:
14547 /* Some assemblers need integer constants to appear first. */
14548 if (CONST_INT_P (XEXP (x, 0)))
14550 output_pic_addr_const (file, XEXP (x, 0), code);
14551 putc ('+', file);
14552 output_pic_addr_const (file, XEXP (x, 1), code);
14554 else
14556 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14557 output_pic_addr_const (file, XEXP (x, 1), code);
14558 putc ('+', file);
14559 output_pic_addr_const (file, XEXP (x, 0), code);
14561 break;
14563 case MINUS:
14564 if (!TARGET_MACHO)
14565 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14566 output_pic_addr_const (file, XEXP (x, 0), code);
14567 putc ('-', file);
14568 output_pic_addr_const (file, XEXP (x, 1), code);
14569 if (!TARGET_MACHO)
14570 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14571 break;
14573 case UNSPEC:
14574 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14576 bool f = i386_asm_output_addr_const_extra (file, x);
14577 gcc_assert (f);
14578 break;
14581 gcc_assert (XVECLEN (x, 0) == 1);
14582 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14583 switch (XINT (x, 1))
14585 case UNSPEC_GOT:
14586 fputs ("@GOT", file);
14587 break;
14588 case UNSPEC_GOTOFF:
14589 fputs ("@GOTOFF", file);
14590 break;
14591 case UNSPEC_PLTOFF:
14592 fputs ("@PLTOFF", file);
14593 break;
14594 case UNSPEC_PCREL:
14595 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14596 "(%rip)" : "[rip]", file);
14597 break;
14598 case UNSPEC_GOTPCREL:
14599 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14600 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14601 break;
14602 case UNSPEC_GOTTPOFF:
14603 /* FIXME: This might be @TPOFF in Sun ld too. */
14604 fputs ("@gottpoff", file);
14605 break;
14606 case UNSPEC_TPOFF:
14607 fputs ("@tpoff", file);
14608 break;
14609 case UNSPEC_NTPOFF:
14610 if (TARGET_64BIT)
14611 fputs ("@tpoff", file);
14612 else
14613 fputs ("@ntpoff", file);
14614 break;
14615 case UNSPEC_DTPOFF:
14616 fputs ("@dtpoff", file);
14617 break;
14618 case UNSPEC_GOTNTPOFF:
14619 if (TARGET_64BIT)
14620 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14621 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14622 else
14623 fputs ("@gotntpoff", file);
14624 break;
14625 case UNSPEC_INDNTPOFF:
14626 fputs ("@indntpoff", file);
14627 break;
14628 #if TARGET_MACHO
14629 case UNSPEC_MACHOPIC_OFFSET:
14630 putc ('-', file);
14631 machopic_output_function_base_name (file);
14632 break;
14633 #endif
14634 default:
14635 output_operand_lossage ("invalid UNSPEC as operand");
14636 break;
14638 break;
14640 default:
14641 output_operand_lossage ("invalid expression as operand");
14645 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14646 We need to emit DTP-relative relocations. */
14648 static void ATTRIBUTE_UNUSED
14649 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14651 fputs (ASM_LONG, file);
14652 output_addr_const (file, x);
14653 fputs ("@dtpoff", file);
14654 switch (size)
14656 case 4:
14657 break;
14658 case 8:
14659 fputs (", 0", file);
14660 break;
14661 default:
14662 gcc_unreachable ();
14666 /* Return true if X is a representation of the PIC register. This copes
14667 with calls from ix86_find_base_term, where the register might have
14668 been replaced by a cselib value. */
14670 static bool
14671 ix86_pic_register_p (rtx x)
14673 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14674 return (pic_offset_table_rtx
14675 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14676 else if (!REG_P (x))
14677 return false;
14678 else if (pic_offset_table_rtx)
14680 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14681 return true;
14682 if (HARD_REGISTER_P (x)
14683 && !HARD_REGISTER_P (pic_offset_table_rtx)
14684 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14685 return true;
14686 return false;
14688 else
14689 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14692 /* Helper function for ix86_delegitimize_address.
14693 Attempt to delegitimize TLS local-exec accesses. */
14695 static rtx
14696 ix86_delegitimize_tls_address (rtx orig_x)
14698 rtx x = orig_x, unspec;
14699 struct ix86_address addr;
14701 if (!TARGET_TLS_DIRECT_SEG_REFS)
14702 return orig_x;
14703 if (MEM_P (x))
14704 x = XEXP (x, 0);
14705 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14706 return orig_x;
14707 if (ix86_decompose_address (x, &addr) == 0
14708 || addr.seg != DEFAULT_TLS_SEG_REG
14709 || addr.disp == NULL_RTX
14710 || GET_CODE (addr.disp) != CONST)
14711 return orig_x;
14712 unspec = XEXP (addr.disp, 0);
14713 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14714 unspec = XEXP (unspec, 0);
14715 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14716 return orig_x;
14717 x = XVECEXP (unspec, 0, 0);
14718 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14719 if (unspec != XEXP (addr.disp, 0))
14720 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14721 if (addr.index)
14723 rtx idx = addr.index;
14724 if (addr.scale != 1)
14725 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14726 x = gen_rtx_PLUS (Pmode, idx, x);
14728 if (addr.base)
14729 x = gen_rtx_PLUS (Pmode, addr.base, x);
14730 if (MEM_P (orig_x))
14731 x = replace_equiv_address_nv (orig_x, x);
14732 return x;
14735 /* In the name of slightly smaller debug output, and to cater to
14736 general assembler lossage, recognize PIC+GOTOFF and turn it back
14737 into a direct symbol reference.
14739 On Darwin, this is necessary to avoid a crash, because Darwin
14740 has a different PIC label for each routine but the DWARF debugging
14741 information is not associated with any particular routine, so it's
14742 necessary to remove references to the PIC label from RTL stored by
14743 the DWARF output code. */
14745 static rtx
14746 ix86_delegitimize_address (rtx x)
14748 rtx orig_x = delegitimize_mem_from_attrs (x);
14749 /* addend is NULL or some rtx if x is something+GOTOFF where
14750 something doesn't include the PIC register. */
14751 rtx addend = NULL_RTX;
14752 /* reg_addend is NULL or a multiple of some register. */
14753 rtx reg_addend = NULL_RTX;
14754 /* const_addend is NULL or a const_int. */
14755 rtx const_addend = NULL_RTX;
14756 /* This is the result, or NULL. */
14757 rtx result = NULL_RTX;
14759 x = orig_x;
14761 if (MEM_P (x))
14762 x = XEXP (x, 0);
14764 if (TARGET_64BIT)
14766 if (GET_CODE (x) == CONST
14767 && GET_CODE (XEXP (x, 0)) == PLUS
14768 && GET_MODE (XEXP (x, 0)) == Pmode
14769 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14770 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14771 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14773 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14774 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14775 if (MEM_P (orig_x))
14776 x = replace_equiv_address_nv (orig_x, x);
14777 return x;
14780 if (GET_CODE (x) == CONST
14781 && GET_CODE (XEXP (x, 0)) == UNSPEC
14782 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14783 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14784 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14786 x = XVECEXP (XEXP (x, 0), 0, 0);
14787 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14789 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14790 GET_MODE (x), 0);
14791 if (x == NULL_RTX)
14792 return orig_x;
14794 return x;
14797 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14798 return ix86_delegitimize_tls_address (orig_x);
14800 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14801 and -mcmodel=medium -fpic. */
14804 if (GET_CODE (x) != PLUS
14805 || GET_CODE (XEXP (x, 1)) != CONST)
14806 return ix86_delegitimize_tls_address (orig_x);
14808 if (ix86_pic_register_p (XEXP (x, 0)))
14809 /* %ebx + GOT/GOTOFF */
14811 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14813 /* %ebx + %reg * scale + GOT/GOTOFF */
14814 reg_addend = XEXP (x, 0);
14815 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14816 reg_addend = XEXP (reg_addend, 1);
14817 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14818 reg_addend = XEXP (reg_addend, 0);
14819 else
14821 reg_addend = NULL_RTX;
14822 addend = XEXP (x, 0);
14825 else
14826 addend = XEXP (x, 0);
14828 x = XEXP (XEXP (x, 1), 0);
14829 if (GET_CODE (x) == PLUS
14830 && CONST_INT_P (XEXP (x, 1)))
14832 const_addend = XEXP (x, 1);
14833 x = XEXP (x, 0);
14836 if (GET_CODE (x) == UNSPEC
14837 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14838 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14839 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14840 && !MEM_P (orig_x) && !addend)))
14841 result = XVECEXP (x, 0, 0);
14843 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14844 && !MEM_P (orig_x))
14845 result = XVECEXP (x, 0, 0);
14847 if (! result)
14848 return ix86_delegitimize_tls_address (orig_x);
14850 if (const_addend)
14851 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14852 if (reg_addend)
14853 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14854 if (addend)
14856 /* If the rest of original X doesn't involve the PIC register, add
14857 addend and subtract pic_offset_table_rtx. This can happen e.g.
14858 for code like:
14859 leal (%ebx, %ecx, 4), %ecx
14861 movl foo@GOTOFF(%ecx), %edx
14862 in which case we return (%ecx - %ebx) + foo
14863 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
14864 and reload has completed. */
14865 if (pic_offset_table_rtx
14866 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
14867 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14868 pic_offset_table_rtx),
14869 result);
14870 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
14872 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
14873 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
14874 result = gen_rtx_PLUS (Pmode, tmp, result);
14876 else
14877 return orig_x;
14879 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14881 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14882 if (result == NULL_RTX)
14883 return orig_x;
14885 return result;
14888 /* If X is a machine specific address (i.e. a symbol or label being
14889 referenced as a displacement from the GOT implemented using an
14890 UNSPEC), then return the base term. Otherwise return X. */
14893 ix86_find_base_term (rtx x)
14895 rtx term;
14897 if (TARGET_64BIT)
14899 if (GET_CODE (x) != CONST)
14900 return x;
14901 term = XEXP (x, 0);
14902 if (GET_CODE (term) == PLUS
14903 && (CONST_INT_P (XEXP (term, 1))
14904 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14905 term = XEXP (term, 0);
14906 if (GET_CODE (term) != UNSPEC
14907 || (XINT (term, 1) != UNSPEC_GOTPCREL
14908 && XINT (term, 1) != UNSPEC_PCREL))
14909 return x;
14911 return XVECEXP (term, 0, 0);
14914 return ix86_delegitimize_address (x);
14917 static void
14918 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
14919 bool fp, FILE *file)
14921 const char *suffix;
14923 if (mode == CCFPmode || mode == CCFPUmode)
14925 code = ix86_fp_compare_code_to_integer (code);
14926 mode = CCmode;
14928 if (reverse)
14929 code = reverse_condition (code);
14931 switch (code)
14933 case EQ:
14934 switch (mode)
14936 case CCAmode:
14937 suffix = "a";
14938 break;
14940 case CCCmode:
14941 suffix = "c";
14942 break;
14944 case CCOmode:
14945 suffix = "o";
14946 break;
14948 case CCSmode:
14949 suffix = "s";
14950 break;
14952 default:
14953 suffix = "e";
14955 break;
14956 case NE:
14957 switch (mode)
14959 case CCAmode:
14960 suffix = "na";
14961 break;
14963 case CCCmode:
14964 suffix = "nc";
14965 break;
14967 case CCOmode:
14968 suffix = "no";
14969 break;
14971 case CCSmode:
14972 suffix = "ns";
14973 break;
14975 default:
14976 suffix = "ne";
14978 break;
14979 case GT:
14980 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14981 suffix = "g";
14982 break;
14983 case GTU:
14984 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14985 Those same assemblers have the same but opposite lossage on cmov. */
14986 if (mode == CCmode)
14987 suffix = fp ? "nbe" : "a";
14988 else
14989 gcc_unreachable ();
14990 break;
14991 case LT:
14992 switch (mode)
14994 case CCNOmode:
14995 case CCGOCmode:
14996 suffix = "s";
14997 break;
14999 case CCmode:
15000 case CCGCmode:
15001 suffix = "l";
15002 break;
15004 default:
15005 gcc_unreachable ();
15007 break;
15008 case LTU:
15009 if (mode == CCmode)
15010 suffix = "b";
15011 else if (mode == CCCmode)
15012 suffix = fp ? "b" : "c";
15013 else
15014 gcc_unreachable ();
15015 break;
15016 case GE:
15017 switch (mode)
15019 case CCNOmode:
15020 case CCGOCmode:
15021 suffix = "ns";
15022 break;
15024 case CCmode:
15025 case CCGCmode:
15026 suffix = "ge";
15027 break;
15029 default:
15030 gcc_unreachable ();
15032 break;
15033 case GEU:
15034 if (mode == CCmode)
15035 suffix = "nb";
15036 else if (mode == CCCmode)
15037 suffix = fp ? "nb" : "nc";
15038 else
15039 gcc_unreachable ();
15040 break;
15041 case LE:
15042 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
15043 suffix = "le";
15044 break;
15045 case LEU:
15046 if (mode == CCmode)
15047 suffix = "be";
15048 else
15049 gcc_unreachable ();
15050 break;
15051 case UNORDERED:
15052 suffix = fp ? "u" : "p";
15053 break;
15054 case ORDERED:
15055 suffix = fp ? "nu" : "np";
15056 break;
15057 default:
15058 gcc_unreachable ();
15060 fputs (suffix, file);
15063 /* Print the name of register X to FILE based on its machine mode and number.
15064 If CODE is 'w', pretend the mode is HImode.
15065 If CODE is 'b', pretend the mode is QImode.
15066 If CODE is 'k', pretend the mode is SImode.
15067 If CODE is 'q', pretend the mode is DImode.
15068 If CODE is 'x', pretend the mode is V4SFmode.
15069 If CODE is 't', pretend the mode is V8SFmode.
15070 If CODE is 'g', pretend the mode is V16SFmode.
15071 If CODE is 'h', pretend the reg is the 'high' byte register.
15072 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
15073 If CODE is 'd', duplicate the operand for AVX instruction.
15076 void
15077 print_reg (rtx x, int code, FILE *file)
15079 const char *reg;
15080 unsigned int regno;
15081 bool duplicated = code == 'd' && TARGET_AVX;
15083 if (ASSEMBLER_DIALECT == ASM_ATT)
15084 putc ('%', file);
15086 if (x == pc_rtx)
15088 gcc_assert (TARGET_64BIT);
15089 fputs ("rip", file);
15090 return;
15093 regno = true_regnum (x);
15094 gcc_assert (regno != ARG_POINTER_REGNUM
15095 && regno != FRAME_POINTER_REGNUM
15096 && regno != FLAGS_REG
15097 && regno != FPSR_REG
15098 && regno != FPCR_REG);
15100 if (code == 'w' || MMX_REG_P (x))
15101 code = 2;
15102 else if (code == 'b')
15103 code = 1;
15104 else if (code == 'k')
15105 code = 4;
15106 else if (code == 'q')
15107 code = 8;
15108 else if (code == 'y')
15109 code = 3;
15110 else if (code == 'h')
15111 code = 0;
15112 else if (code == 'x')
15113 code = 16;
15114 else if (code == 't')
15115 code = 32;
15116 else if (code == 'g')
15117 code = 64;
15118 else
15119 code = GET_MODE_SIZE (GET_MODE (x));
15121 /* Irritatingly, AMD extended registers use different naming convention
15122 from the normal registers: "r%d[bwd]" */
15123 if (REX_INT_REGNO_P (regno))
15125 gcc_assert (TARGET_64BIT);
15126 putc ('r', file);
15127 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
15128 switch (code)
15130 case 0:
15131 error ("extended registers have no high halves");
15132 break;
15133 case 1:
15134 putc ('b', file);
15135 break;
15136 case 2:
15137 putc ('w', file);
15138 break;
15139 case 4:
15140 putc ('d', file);
15141 break;
15142 case 8:
15143 /* no suffix */
15144 break;
15145 default:
15146 error ("unsupported operand size for extended register");
15147 break;
15149 return;
15152 reg = NULL;
15153 switch (code)
15155 case 3:
15156 if (STACK_TOP_P (x))
15158 reg = "st(0)";
15159 break;
15161 /* FALLTHRU */
15162 case 8:
15163 case 4:
15164 case 12:
15165 if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x) && ! ANY_BND_REG_P (x))
15166 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
15167 /* FALLTHRU */
15168 case 16:
15169 case 2:
15170 normal:
15171 reg = hi_reg_name[regno];
15172 break;
15173 case 1:
15174 if (regno >= ARRAY_SIZE (qi_reg_name))
15175 goto normal;
15176 reg = qi_reg_name[regno];
15177 break;
15178 case 0:
15179 if (regno >= ARRAY_SIZE (qi_high_reg_name))
15180 goto normal;
15181 reg = qi_high_reg_name[regno];
15182 break;
15183 case 32:
15184 if (SSE_REG_P (x))
15186 gcc_assert (!duplicated);
15187 putc ('y', file);
15188 fputs (hi_reg_name[regno] + 1, file);
15189 return;
15191 case 64:
15192 if (SSE_REG_P (x))
15194 gcc_assert (!duplicated);
15195 putc ('z', file);
15196 fputs (hi_reg_name[REGNO (x)] + 1, file);
15197 return;
15199 break;
15200 default:
15201 gcc_unreachable ();
15204 fputs (reg, file);
15205 if (duplicated)
15207 if (ASSEMBLER_DIALECT == ASM_ATT)
15208 fprintf (file, ", %%%s", reg);
15209 else
15210 fprintf (file, ", %s", reg);
15214 /* Meaning of CODE:
15215 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
15216 C -- print opcode suffix for set/cmov insn.
15217 c -- like C, but print reversed condition
15218 F,f -- likewise, but for floating-point.
15219 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
15220 otherwise nothing
15221 R -- print embeded rounding and sae.
15222 r -- print only sae.
15223 z -- print the opcode suffix for the size of the current operand.
15224 Z -- likewise, with special suffixes for x87 instructions.
15225 * -- print a star (in certain assembler syntax)
15226 A -- print an absolute memory reference.
15227 E -- print address with DImode register names if TARGET_64BIT.
15228 w -- print the operand as if it's a "word" (HImode) even if it isn't.
15229 s -- print a shift double count, followed by the assemblers argument
15230 delimiter.
15231 b -- print the QImode name of the register for the indicated operand.
15232 %b0 would print %al if operands[0] is reg 0.
15233 w -- likewise, print the HImode name of the register.
15234 k -- likewise, print the SImode name of the register.
15235 q -- likewise, print the DImode name of the register.
15236 x -- likewise, print the V4SFmode name of the register.
15237 t -- likewise, print the V8SFmode name of the register.
15238 g -- likewise, print the V16SFmode name of the register.
15239 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
15240 y -- print "st(0)" instead of "st" as a register.
15241 d -- print duplicated register operand for AVX instruction.
15242 D -- print condition for SSE cmp instruction.
15243 P -- if PIC, print an @PLT suffix.
15244 p -- print raw symbol name.
15245 X -- don't print any sort of PIC '@' suffix for a symbol.
15246 & -- print some in-use local-dynamic symbol name.
15247 H -- print a memory address offset by 8; used for sse high-parts
15248 Y -- print condition for XOP pcom* instruction.
15249 + -- print a branch hint as 'cs' or 'ds' prefix
15250 ; -- print a semicolon (after prefixes due to bug in older gas).
15251 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15252 @ -- print a segment register of thread base pointer load
15253 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15254 ! -- print MPX prefix for jxx/call/ret instructions if required.
15257 void
15258 ix86_print_operand (FILE *file, rtx x, int code)
15260 if (code)
15262 switch (code)
15264 case 'A':
15265 switch (ASSEMBLER_DIALECT)
15267 case ASM_ATT:
15268 putc ('*', file);
15269 break;
15271 case ASM_INTEL:
15272 /* Intel syntax. For absolute addresses, registers should not
15273 be surrounded by braces. */
15274 if (!REG_P (x))
15276 putc ('[', file);
15277 ix86_print_operand (file, x, 0);
15278 putc (']', file);
15279 return;
15281 break;
15283 default:
15284 gcc_unreachable ();
15287 ix86_print_operand (file, x, 0);
15288 return;
15290 case 'E':
15291 /* Wrap address in an UNSPEC to declare special handling. */
15292 if (TARGET_64BIT)
15293 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15295 output_address (x);
15296 return;
15298 case 'L':
15299 if (ASSEMBLER_DIALECT == ASM_ATT)
15300 putc ('l', file);
15301 return;
15303 case 'W':
15304 if (ASSEMBLER_DIALECT == ASM_ATT)
15305 putc ('w', file);
15306 return;
15308 case 'B':
15309 if (ASSEMBLER_DIALECT == ASM_ATT)
15310 putc ('b', file);
15311 return;
15313 case 'Q':
15314 if (ASSEMBLER_DIALECT == ASM_ATT)
15315 putc ('l', file);
15316 return;
15318 case 'S':
15319 if (ASSEMBLER_DIALECT == ASM_ATT)
15320 putc ('s', file);
15321 return;
15323 case 'T':
15324 if (ASSEMBLER_DIALECT == ASM_ATT)
15325 putc ('t', file);
15326 return;
15328 case 'O':
15329 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15330 if (ASSEMBLER_DIALECT != ASM_ATT)
15331 return;
15333 switch (GET_MODE_SIZE (GET_MODE (x)))
15335 case 2:
15336 putc ('w', file);
15337 break;
15339 case 4:
15340 putc ('l', file);
15341 break;
15343 case 8:
15344 putc ('q', file);
15345 break;
15347 default:
15348 output_operand_lossage
15349 ("invalid operand size for operand code 'O'");
15350 return;
15353 putc ('.', file);
15354 #endif
15355 return;
15357 case 'z':
15358 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15360 /* Opcodes don't get size suffixes if using Intel opcodes. */
15361 if (ASSEMBLER_DIALECT == ASM_INTEL)
15362 return;
15364 switch (GET_MODE_SIZE (GET_MODE (x)))
15366 case 1:
15367 putc ('b', file);
15368 return;
15370 case 2:
15371 putc ('w', file);
15372 return;
15374 case 4:
15375 putc ('l', file);
15376 return;
15378 case 8:
15379 putc ('q', file);
15380 return;
15382 default:
15383 output_operand_lossage
15384 ("invalid operand size for operand code 'z'");
15385 return;
15389 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15390 warning
15391 (0, "non-integer operand used with operand code 'z'");
15392 /* FALLTHRU */
15394 case 'Z':
15395 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15396 if (ASSEMBLER_DIALECT == ASM_INTEL)
15397 return;
15399 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15401 switch (GET_MODE_SIZE (GET_MODE (x)))
15403 case 2:
15404 #ifdef HAVE_AS_IX86_FILDS
15405 putc ('s', file);
15406 #endif
15407 return;
15409 case 4:
15410 putc ('l', file);
15411 return;
15413 case 8:
15414 #ifdef HAVE_AS_IX86_FILDQ
15415 putc ('q', file);
15416 #else
15417 fputs ("ll", file);
15418 #endif
15419 return;
15421 default:
15422 break;
15425 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15427 /* 387 opcodes don't get size suffixes
15428 if the operands are registers. */
15429 if (STACK_REG_P (x))
15430 return;
15432 switch (GET_MODE_SIZE (GET_MODE (x)))
15434 case 4:
15435 putc ('s', file);
15436 return;
15438 case 8:
15439 putc ('l', file);
15440 return;
15442 case 12:
15443 case 16:
15444 putc ('t', file);
15445 return;
15447 default:
15448 break;
15451 else
15453 output_operand_lossage
15454 ("invalid operand type used with operand code 'Z'");
15455 return;
15458 output_operand_lossage
15459 ("invalid operand size for operand code 'Z'");
15460 return;
15462 case 'd':
15463 case 'b':
15464 case 'w':
15465 case 'k':
15466 case 'q':
15467 case 'h':
15468 case 't':
15469 case 'g':
15470 case 'y':
15471 case 'x':
15472 case 'X':
15473 case 'P':
15474 case 'p':
15475 break;
15477 case 's':
15478 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15480 ix86_print_operand (file, x, 0);
15481 fputs (", ", file);
15483 return;
15485 case 'Y':
15486 switch (GET_CODE (x))
15488 case NE:
15489 fputs ("neq", file);
15490 break;
15491 case EQ:
15492 fputs ("eq", file);
15493 break;
15494 case GE:
15495 case GEU:
15496 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15497 break;
15498 case GT:
15499 case GTU:
15500 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15501 break;
15502 case LE:
15503 case LEU:
15504 fputs ("le", file);
15505 break;
15506 case LT:
15507 case LTU:
15508 fputs ("lt", file);
15509 break;
15510 case UNORDERED:
15511 fputs ("unord", file);
15512 break;
15513 case ORDERED:
15514 fputs ("ord", file);
15515 break;
15516 case UNEQ:
15517 fputs ("ueq", file);
15518 break;
15519 case UNGE:
15520 fputs ("nlt", file);
15521 break;
15522 case UNGT:
15523 fputs ("nle", file);
15524 break;
15525 case UNLE:
15526 fputs ("ule", file);
15527 break;
15528 case UNLT:
15529 fputs ("ult", file);
15530 break;
15531 case LTGT:
15532 fputs ("une", file);
15533 break;
15534 default:
15535 output_operand_lossage ("operand is not a condition code, "
15536 "invalid operand code 'Y'");
15537 return;
15539 return;
15541 case 'D':
15542 /* Little bit of braindamage here. The SSE compare instructions
15543 does use completely different names for the comparisons that the
15544 fp conditional moves. */
15545 switch (GET_CODE (x))
15547 case UNEQ:
15548 if (TARGET_AVX)
15550 fputs ("eq_us", file);
15551 break;
15553 case EQ:
15554 fputs ("eq", file);
15555 break;
15556 case UNLT:
15557 if (TARGET_AVX)
15559 fputs ("nge", file);
15560 break;
15562 case LT:
15563 fputs ("lt", file);
15564 break;
15565 case UNLE:
15566 if (TARGET_AVX)
15568 fputs ("ngt", file);
15569 break;
15571 case LE:
15572 fputs ("le", file);
15573 break;
15574 case UNORDERED:
15575 fputs ("unord", file);
15576 break;
15577 case LTGT:
15578 if (TARGET_AVX)
15580 fputs ("neq_oq", file);
15581 break;
15583 case NE:
15584 fputs ("neq", file);
15585 break;
15586 case GE:
15587 if (TARGET_AVX)
15589 fputs ("ge", file);
15590 break;
15592 case UNGE:
15593 fputs ("nlt", file);
15594 break;
15595 case GT:
15596 if (TARGET_AVX)
15598 fputs ("gt", file);
15599 break;
15601 case UNGT:
15602 fputs ("nle", file);
15603 break;
15604 case ORDERED:
15605 fputs ("ord", file);
15606 break;
15607 default:
15608 output_operand_lossage ("operand is not a condition code, "
15609 "invalid operand code 'D'");
15610 return;
15612 return;
15614 case 'F':
15615 case 'f':
15616 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15617 if (ASSEMBLER_DIALECT == ASM_ATT)
15618 putc ('.', file);
15619 #endif
15621 case 'C':
15622 case 'c':
15623 if (!COMPARISON_P (x))
15625 output_operand_lossage ("operand is not a condition code, "
15626 "invalid operand code '%c'", code);
15627 return;
15629 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15630 code == 'c' || code == 'f',
15631 code == 'F' || code == 'f',
15632 file);
15633 return;
15635 case 'H':
15636 if (!offsettable_memref_p (x))
15638 output_operand_lossage ("operand is not an offsettable memory "
15639 "reference, invalid operand code 'H'");
15640 return;
15642 /* It doesn't actually matter what mode we use here, as we're
15643 only going to use this for printing. */
15644 x = adjust_address_nv (x, DImode, 8);
15645 /* Output 'qword ptr' for intel assembler dialect. */
15646 if (ASSEMBLER_DIALECT == ASM_INTEL)
15647 code = 'q';
15648 break;
15650 case 'K':
15651 gcc_assert (CONST_INT_P (x));
15653 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15654 #ifdef HAVE_AS_IX86_HLE
15655 fputs ("xacquire ", file);
15656 #else
15657 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15658 #endif
15659 else if (INTVAL (x) & IX86_HLE_RELEASE)
15660 #ifdef HAVE_AS_IX86_HLE
15661 fputs ("xrelease ", file);
15662 #else
15663 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15664 #endif
15665 /* We do not want to print value of the operand. */
15666 return;
15668 case 'N':
15669 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15670 fputs ("{z}", file);
15671 return;
15673 case 'r':
15674 gcc_assert (CONST_INT_P (x));
15675 gcc_assert (INTVAL (x) == ROUND_SAE);
15677 if (ASSEMBLER_DIALECT == ASM_INTEL)
15678 fputs (", ", file);
15680 fputs ("{sae}", file);
15682 if (ASSEMBLER_DIALECT == ASM_ATT)
15683 fputs (", ", file);
15685 return;
15687 case 'R':
15688 gcc_assert (CONST_INT_P (x));
15690 if (ASSEMBLER_DIALECT == ASM_INTEL)
15691 fputs (", ", file);
15693 switch (INTVAL (x))
15695 case ROUND_NEAREST_INT | ROUND_SAE:
15696 fputs ("{rn-sae}", file);
15697 break;
15698 case ROUND_NEG_INF | ROUND_SAE:
15699 fputs ("{rd-sae}", file);
15700 break;
15701 case ROUND_POS_INF | ROUND_SAE:
15702 fputs ("{ru-sae}", file);
15703 break;
15704 case ROUND_ZERO | ROUND_SAE:
15705 fputs ("{rz-sae}", file);
15706 break;
15707 default:
15708 gcc_unreachable ();
15711 if (ASSEMBLER_DIALECT == ASM_ATT)
15712 fputs (", ", file);
15714 return;
15716 case '*':
15717 if (ASSEMBLER_DIALECT == ASM_ATT)
15718 putc ('*', file);
15719 return;
15721 case '&':
15723 const char *name = get_some_local_dynamic_name ();
15724 if (name == NULL)
15725 output_operand_lossage ("'%%&' used without any "
15726 "local dynamic TLS references");
15727 else
15728 assemble_name (file, name);
15729 return;
15732 case '+':
15734 rtx x;
15736 if (!optimize
15737 || optimize_function_for_size_p (cfun)
15738 || !TARGET_BRANCH_PREDICTION_HINTS)
15739 return;
15741 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15742 if (x)
15744 int pred_val = XINT (x, 0);
15746 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15747 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15749 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15750 bool cputaken
15751 = final_forward_branch_p (current_output_insn) == 0;
15753 /* Emit hints only in the case default branch prediction
15754 heuristics would fail. */
15755 if (taken != cputaken)
15757 /* We use 3e (DS) prefix for taken branches and
15758 2e (CS) prefix for not taken branches. */
15759 if (taken)
15760 fputs ("ds ; ", file);
15761 else
15762 fputs ("cs ; ", file);
15766 return;
15769 case ';':
15770 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15771 putc (';', file);
15772 #endif
15773 return;
15775 case '@':
15776 if (ASSEMBLER_DIALECT == ASM_ATT)
15777 putc ('%', file);
15779 /* The kernel uses a different segment register for performance
15780 reasons; a system call would not have to trash the userspace
15781 segment register, which would be expensive. */
15782 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15783 fputs ("fs", file);
15784 else
15785 fputs ("gs", file);
15786 return;
15788 case '~':
15789 putc (TARGET_AVX2 ? 'i' : 'f', file);
15790 return;
15792 case '^':
15793 if (TARGET_64BIT && Pmode != word_mode)
15794 fputs ("addr32 ", file);
15795 return;
15797 case '!':
15798 if (ix86_bnd_prefixed_insn_p (current_output_insn))
15799 fputs ("bnd ", file);
15800 return;
15802 default:
15803 output_operand_lossage ("invalid operand code '%c'", code);
15807 if (REG_P (x))
15808 print_reg (x, code, file);
15810 else if (MEM_P (x))
15812 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15813 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15814 && GET_MODE (x) != BLKmode)
15816 const char * size;
15817 switch (GET_MODE_SIZE (GET_MODE (x)))
15819 case 1: size = "BYTE"; break;
15820 case 2: size = "WORD"; break;
15821 case 4: size = "DWORD"; break;
15822 case 8: size = "QWORD"; break;
15823 case 12: size = "TBYTE"; break;
15824 case 16:
15825 if (GET_MODE (x) == XFmode)
15826 size = "TBYTE";
15827 else
15828 size = "XMMWORD";
15829 break;
15830 case 32: size = "YMMWORD"; break;
15831 case 64: size = "ZMMWORD"; break;
15832 default:
15833 gcc_unreachable ();
15836 /* Check for explicit size override (codes 'b', 'w', 'k',
15837 'q' and 'x') */
15838 if (code == 'b')
15839 size = "BYTE";
15840 else if (code == 'w')
15841 size = "WORD";
15842 else if (code == 'k')
15843 size = "DWORD";
15844 else if (code == 'q')
15845 size = "QWORD";
15846 else if (code == 'x')
15847 size = "XMMWORD";
15849 fputs (size, file);
15850 fputs (" PTR ", file);
15853 x = XEXP (x, 0);
15854 /* Avoid (%rip) for call operands. */
15855 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15856 && !CONST_INT_P (x))
15857 output_addr_const (file, x);
15858 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15859 output_operand_lossage ("invalid constraints for operand");
15860 else
15861 output_address (x);
15864 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15866 REAL_VALUE_TYPE r;
15867 long l;
15869 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15870 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15872 if (ASSEMBLER_DIALECT == ASM_ATT)
15873 putc ('$', file);
15874 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15875 if (code == 'q')
15876 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15877 (unsigned long long) (int) l);
15878 else
15879 fprintf (file, "0x%08x", (unsigned int) l);
15882 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15884 REAL_VALUE_TYPE r;
15885 long l[2];
15887 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15888 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15890 if (ASSEMBLER_DIALECT == ASM_ATT)
15891 putc ('$', file);
15892 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15895 /* These float cases don't actually occur as immediate operands. */
15896 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15898 char dstr[30];
15900 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15901 fputs (dstr, file);
15904 else
15906 /* We have patterns that allow zero sets of memory, for instance.
15907 In 64-bit mode, we should probably support all 8-byte vectors,
15908 since we can in fact encode that into an immediate. */
15909 if (GET_CODE (x) == CONST_VECTOR)
15911 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15912 x = const0_rtx;
15915 if (code != 'P' && code != 'p')
15917 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15919 if (ASSEMBLER_DIALECT == ASM_ATT)
15920 putc ('$', file);
15922 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15923 || GET_CODE (x) == LABEL_REF)
15925 if (ASSEMBLER_DIALECT == ASM_ATT)
15926 putc ('$', file);
15927 else
15928 fputs ("OFFSET FLAT:", file);
15931 if (CONST_INT_P (x))
15932 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15933 else if (flag_pic || MACHOPIC_INDIRECT)
15934 output_pic_addr_const (file, x, code);
15935 else
15936 output_addr_const (file, x);
15940 static bool
15941 ix86_print_operand_punct_valid_p (unsigned char code)
15943 return (code == '@' || code == '*' || code == '+' || code == '&'
15944 || code == ';' || code == '~' || code == '^' || code == '!');
15947 /* Print a memory operand whose address is ADDR. */
15949 static void
15950 ix86_print_operand_address (FILE *file, rtx addr)
15952 struct ix86_address parts;
15953 rtx base, index, disp;
15954 int scale;
15955 int ok;
15956 bool vsib = false;
15957 int code = 0;
15959 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15961 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15962 gcc_assert (parts.index == NULL_RTX);
15963 parts.index = XVECEXP (addr, 0, 1);
15964 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15965 addr = XVECEXP (addr, 0, 0);
15966 vsib = true;
15968 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15970 gcc_assert (TARGET_64BIT);
15971 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15972 code = 'q';
15974 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
15976 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
15977 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
15978 if (parts.base != NULL_RTX)
15980 parts.index = parts.base;
15981 parts.scale = 1;
15983 parts.base = XVECEXP (addr, 0, 0);
15984 addr = XVECEXP (addr, 0, 0);
15986 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
15988 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15989 gcc_assert (parts.index == NULL_RTX);
15990 parts.index = XVECEXP (addr, 0, 1);
15991 addr = XVECEXP (addr, 0, 0);
15993 else
15994 ok = ix86_decompose_address (addr, &parts);
15996 gcc_assert (ok);
15998 base = parts.base;
15999 index = parts.index;
16000 disp = parts.disp;
16001 scale = parts.scale;
16003 switch (parts.seg)
16005 case SEG_DEFAULT:
16006 break;
16007 case SEG_FS:
16008 case SEG_GS:
16009 if (ASSEMBLER_DIALECT == ASM_ATT)
16010 putc ('%', file);
16011 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
16012 break;
16013 default:
16014 gcc_unreachable ();
16017 /* Use one byte shorter RIP relative addressing for 64bit mode. */
16018 if (TARGET_64BIT && !base && !index)
16020 rtx symbol = disp;
16022 if (GET_CODE (disp) == CONST
16023 && GET_CODE (XEXP (disp, 0)) == PLUS
16024 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16025 symbol = XEXP (XEXP (disp, 0), 0);
16027 if (GET_CODE (symbol) == LABEL_REF
16028 || (GET_CODE (symbol) == SYMBOL_REF
16029 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
16030 base = pc_rtx;
16032 if (!base && !index)
16034 /* Displacement only requires special attention. */
16036 if (CONST_INT_P (disp))
16038 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
16039 fputs ("ds:", file);
16040 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
16042 else if (flag_pic)
16043 output_pic_addr_const (file, disp, 0);
16044 else
16045 output_addr_const (file, disp);
16047 else
16049 /* Print SImode register names to force addr32 prefix. */
16050 if (SImode_address_operand (addr, VOIDmode))
16052 #ifdef ENABLE_CHECKING
16053 gcc_assert (TARGET_64BIT);
16054 switch (GET_CODE (addr))
16056 case SUBREG:
16057 gcc_assert (GET_MODE (addr) == SImode);
16058 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
16059 break;
16060 case ZERO_EXTEND:
16061 case AND:
16062 gcc_assert (GET_MODE (addr) == DImode);
16063 break;
16064 default:
16065 gcc_unreachable ();
16067 #endif
16068 gcc_assert (!code);
16069 code = 'k';
16071 else if (code == 0
16072 && TARGET_X32
16073 && disp
16074 && CONST_INT_P (disp)
16075 && INTVAL (disp) < -16*1024*1024)
16077 /* X32 runs in 64-bit mode, where displacement, DISP, in
16078 address DISP(%r64), is encoded as 32-bit immediate sign-
16079 extended from 32-bit to 64-bit. For -0x40000300(%r64),
16080 address is %r64 + 0xffffffffbffffd00. When %r64 <
16081 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
16082 which is invalid for x32. The correct address is %r64
16083 - 0x40000300 == 0xf7ffdd64. To properly encode
16084 -0x40000300(%r64) for x32, we zero-extend negative
16085 displacement by forcing addr32 prefix which truncates
16086 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
16087 zero-extend all negative displacements, including -1(%rsp).
16088 However, for small negative displacements, sign-extension
16089 won't cause overflow. We only zero-extend negative
16090 displacements if they < -16*1024*1024, which is also used
16091 to check legitimate address displacements for PIC. */
16092 code = 'k';
16095 if (ASSEMBLER_DIALECT == ASM_ATT)
16097 if (disp)
16099 if (flag_pic)
16100 output_pic_addr_const (file, disp, 0);
16101 else if (GET_CODE (disp) == LABEL_REF)
16102 output_asm_label (disp);
16103 else
16104 output_addr_const (file, disp);
16107 putc ('(', file);
16108 if (base)
16109 print_reg (base, code, file);
16110 if (index)
16112 putc (',', file);
16113 print_reg (index, vsib ? 0 : code, file);
16114 if (scale != 1 || vsib)
16115 fprintf (file, ",%d", scale);
16117 putc (')', file);
16119 else
16121 rtx offset = NULL_RTX;
16123 if (disp)
16125 /* Pull out the offset of a symbol; print any symbol itself. */
16126 if (GET_CODE (disp) == CONST
16127 && GET_CODE (XEXP (disp, 0)) == PLUS
16128 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16130 offset = XEXP (XEXP (disp, 0), 1);
16131 disp = gen_rtx_CONST (VOIDmode,
16132 XEXP (XEXP (disp, 0), 0));
16135 if (flag_pic)
16136 output_pic_addr_const (file, disp, 0);
16137 else if (GET_CODE (disp) == LABEL_REF)
16138 output_asm_label (disp);
16139 else if (CONST_INT_P (disp))
16140 offset = disp;
16141 else
16142 output_addr_const (file, disp);
16145 putc ('[', file);
16146 if (base)
16148 print_reg (base, code, file);
16149 if (offset)
16151 if (INTVAL (offset) >= 0)
16152 putc ('+', file);
16153 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16156 else if (offset)
16157 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16158 else
16159 putc ('0', file);
16161 if (index)
16163 putc ('+', file);
16164 print_reg (index, vsib ? 0 : code, file);
16165 if (scale != 1 || vsib)
16166 fprintf (file, "*%d", scale);
16168 putc (']', file);
16173 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
16175 static bool
16176 i386_asm_output_addr_const_extra (FILE *file, rtx x)
16178 rtx op;
16180 if (GET_CODE (x) != UNSPEC)
16181 return false;
16183 op = XVECEXP (x, 0, 0);
16184 switch (XINT (x, 1))
16186 case UNSPEC_GOTTPOFF:
16187 output_addr_const (file, op);
16188 /* FIXME: This might be @TPOFF in Sun ld. */
16189 fputs ("@gottpoff", file);
16190 break;
16191 case UNSPEC_TPOFF:
16192 output_addr_const (file, op);
16193 fputs ("@tpoff", file);
16194 break;
16195 case UNSPEC_NTPOFF:
16196 output_addr_const (file, op);
16197 if (TARGET_64BIT)
16198 fputs ("@tpoff", file);
16199 else
16200 fputs ("@ntpoff", file);
16201 break;
16202 case UNSPEC_DTPOFF:
16203 output_addr_const (file, op);
16204 fputs ("@dtpoff", file);
16205 break;
16206 case UNSPEC_GOTNTPOFF:
16207 output_addr_const (file, op);
16208 if (TARGET_64BIT)
16209 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16210 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
16211 else
16212 fputs ("@gotntpoff", file);
16213 break;
16214 case UNSPEC_INDNTPOFF:
16215 output_addr_const (file, op);
16216 fputs ("@indntpoff", file);
16217 break;
16218 #if TARGET_MACHO
16219 case UNSPEC_MACHOPIC_OFFSET:
16220 output_addr_const (file, op);
16221 putc ('-', file);
16222 machopic_output_function_base_name (file);
16223 break;
16224 #endif
16226 case UNSPEC_STACK_CHECK:
16228 int offset;
16230 gcc_assert (flag_split_stack);
16232 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
16233 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
16234 #else
16235 gcc_unreachable ();
16236 #endif
16238 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
16240 break;
16242 default:
16243 return false;
16246 return true;
16249 /* Split one or more double-mode RTL references into pairs of half-mode
16250 references. The RTL can be REG, offsettable MEM, integer constant, or
16251 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
16252 split and "num" is its length. lo_half and hi_half are output arrays
16253 that parallel "operands". */
16255 void
16256 split_double_mode (machine_mode mode, rtx operands[],
16257 int num, rtx lo_half[], rtx hi_half[])
16259 machine_mode half_mode;
16260 unsigned int byte;
16262 switch (mode)
16264 case TImode:
16265 half_mode = DImode;
16266 break;
16267 case DImode:
16268 half_mode = SImode;
16269 break;
16270 default:
16271 gcc_unreachable ();
16274 byte = GET_MODE_SIZE (half_mode);
16276 while (num--)
16278 rtx op = operands[num];
16280 /* simplify_subreg refuse to split volatile memory addresses,
16281 but we still have to handle it. */
16282 if (MEM_P (op))
16284 lo_half[num] = adjust_address (op, half_mode, 0);
16285 hi_half[num] = adjust_address (op, half_mode, byte);
16287 else
16289 lo_half[num] = simplify_gen_subreg (half_mode, op,
16290 GET_MODE (op) == VOIDmode
16291 ? mode : GET_MODE (op), 0);
16292 hi_half[num] = simplify_gen_subreg (half_mode, op,
16293 GET_MODE (op) == VOIDmode
16294 ? mode : GET_MODE (op), byte);
16299 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
16300 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
16301 is the expression of the binary operation. The output may either be
16302 emitted here, or returned to the caller, like all output_* functions.
16304 There is no guarantee that the operands are the same mode, as they
16305 might be within FLOAT or FLOAT_EXTEND expressions. */
16307 #ifndef SYSV386_COMPAT
16308 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
16309 wants to fix the assemblers because that causes incompatibility
16310 with gcc. No-one wants to fix gcc because that causes
16311 incompatibility with assemblers... You can use the option of
16312 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
16313 #define SYSV386_COMPAT 1
16314 #endif
16316 const char *
16317 output_387_binary_op (rtx insn, rtx *operands)
16319 static char buf[40];
16320 const char *p;
16321 const char *ssep;
16322 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16324 #ifdef ENABLE_CHECKING
16325 /* Even if we do not want to check the inputs, this documents input
16326 constraints. Which helps in understanding the following code. */
16327 if (STACK_REG_P (operands[0])
16328 && ((REG_P (operands[1])
16329 && REGNO (operands[0]) == REGNO (operands[1])
16330 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16331 || (REG_P (operands[2])
16332 && REGNO (operands[0]) == REGNO (operands[2])
16333 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16334 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16335 ; /* ok */
16336 else
16337 gcc_assert (is_sse);
16338 #endif
16340 switch (GET_CODE (operands[3]))
16342 case PLUS:
16343 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16344 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16345 p = "fiadd";
16346 else
16347 p = "fadd";
16348 ssep = "vadd";
16349 break;
16351 case MINUS:
16352 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16353 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16354 p = "fisub";
16355 else
16356 p = "fsub";
16357 ssep = "vsub";
16358 break;
16360 case MULT:
16361 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16362 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16363 p = "fimul";
16364 else
16365 p = "fmul";
16366 ssep = "vmul";
16367 break;
16369 case DIV:
16370 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16371 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16372 p = "fidiv";
16373 else
16374 p = "fdiv";
16375 ssep = "vdiv";
16376 break;
16378 default:
16379 gcc_unreachable ();
16382 if (is_sse)
16384 if (TARGET_AVX)
16386 strcpy (buf, ssep);
16387 if (GET_MODE (operands[0]) == SFmode)
16388 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16389 else
16390 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16392 else
16394 strcpy (buf, ssep + 1);
16395 if (GET_MODE (operands[0]) == SFmode)
16396 strcat (buf, "ss\t{%2, %0|%0, %2}");
16397 else
16398 strcat (buf, "sd\t{%2, %0|%0, %2}");
16400 return buf;
16402 strcpy (buf, p);
16404 switch (GET_CODE (operands[3]))
16406 case MULT:
16407 case PLUS:
16408 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16410 rtx temp = operands[2];
16411 operands[2] = operands[1];
16412 operands[1] = temp;
16415 /* know operands[0] == operands[1]. */
16417 if (MEM_P (operands[2]))
16419 p = "%Z2\t%2";
16420 break;
16423 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16425 if (STACK_TOP_P (operands[0]))
16426 /* How is it that we are storing to a dead operand[2]?
16427 Well, presumably operands[1] is dead too. We can't
16428 store the result to st(0) as st(0) gets popped on this
16429 instruction. Instead store to operands[2] (which I
16430 think has to be st(1)). st(1) will be popped later.
16431 gcc <= 2.8.1 didn't have this check and generated
16432 assembly code that the Unixware assembler rejected. */
16433 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16434 else
16435 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16436 break;
16439 if (STACK_TOP_P (operands[0]))
16440 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16441 else
16442 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16443 break;
16445 case MINUS:
16446 case DIV:
16447 if (MEM_P (operands[1]))
16449 p = "r%Z1\t%1";
16450 break;
16453 if (MEM_P (operands[2]))
16455 p = "%Z2\t%2";
16456 break;
16459 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16461 #if SYSV386_COMPAT
16462 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16463 derived assemblers, confusingly reverse the direction of
16464 the operation for fsub{r} and fdiv{r} when the
16465 destination register is not st(0). The Intel assembler
16466 doesn't have this brain damage. Read !SYSV386_COMPAT to
16467 figure out what the hardware really does. */
16468 if (STACK_TOP_P (operands[0]))
16469 p = "{p\t%0, %2|rp\t%2, %0}";
16470 else
16471 p = "{rp\t%2, %0|p\t%0, %2}";
16472 #else
16473 if (STACK_TOP_P (operands[0]))
16474 /* As above for fmul/fadd, we can't store to st(0). */
16475 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16476 else
16477 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16478 #endif
16479 break;
16482 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16484 #if SYSV386_COMPAT
16485 if (STACK_TOP_P (operands[0]))
16486 p = "{rp\t%0, %1|p\t%1, %0}";
16487 else
16488 p = "{p\t%1, %0|rp\t%0, %1}";
16489 #else
16490 if (STACK_TOP_P (operands[0]))
16491 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16492 else
16493 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16494 #endif
16495 break;
16498 if (STACK_TOP_P (operands[0]))
16500 if (STACK_TOP_P (operands[1]))
16501 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16502 else
16503 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16504 break;
16506 else if (STACK_TOP_P (operands[1]))
16508 #if SYSV386_COMPAT
16509 p = "{\t%1, %0|r\t%0, %1}";
16510 #else
16511 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16512 #endif
16514 else
16516 #if SYSV386_COMPAT
16517 p = "{r\t%2, %0|\t%0, %2}";
16518 #else
16519 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16520 #endif
16522 break;
16524 default:
16525 gcc_unreachable ();
16528 strcat (buf, p);
16529 return buf;
16532 /* Check if a 256bit AVX register is referenced inside of EXP. */
16534 static bool
16535 ix86_check_avx256_register (const_rtx exp)
16537 if (GET_CODE (exp) == SUBREG)
16538 exp = SUBREG_REG (exp);
16540 return (REG_P (exp)
16541 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16544 /* Return needed mode for entity in optimize_mode_switching pass. */
16546 static int
16547 ix86_avx_u128_mode_needed (rtx_insn *insn)
16549 if (CALL_P (insn))
16551 rtx link;
16553 /* Needed mode is set to AVX_U128_CLEAN if there are
16554 no 256bit modes used in function arguments. */
16555 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16556 link;
16557 link = XEXP (link, 1))
16559 if (GET_CODE (XEXP (link, 0)) == USE)
16561 rtx arg = XEXP (XEXP (link, 0), 0);
16563 if (ix86_check_avx256_register (arg))
16564 return AVX_U128_DIRTY;
16568 return AVX_U128_CLEAN;
16571 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16572 changes state only when a 256bit register is written to, but we need
16573 to prevent the compiler from moving optimal insertion point above
16574 eventual read from 256bit register. */
16575 subrtx_iterator::array_type array;
16576 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16577 if (ix86_check_avx256_register (*iter))
16578 return AVX_U128_DIRTY;
16580 return AVX_U128_ANY;
16583 /* Return mode that i387 must be switched into
16584 prior to the execution of insn. */
16586 static int
16587 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16589 enum attr_i387_cw mode;
16591 /* The mode UNINITIALIZED is used to store control word after a
16592 function call or ASM pattern. The mode ANY specify that function
16593 has no requirements on the control word and make no changes in the
16594 bits we are interested in. */
16596 if (CALL_P (insn)
16597 || (NONJUMP_INSN_P (insn)
16598 && (asm_noperands (PATTERN (insn)) >= 0
16599 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16600 return I387_CW_UNINITIALIZED;
16602 if (recog_memoized (insn) < 0)
16603 return I387_CW_ANY;
16605 mode = get_attr_i387_cw (insn);
16607 switch (entity)
16609 case I387_TRUNC:
16610 if (mode == I387_CW_TRUNC)
16611 return mode;
16612 break;
16614 case I387_FLOOR:
16615 if (mode == I387_CW_FLOOR)
16616 return mode;
16617 break;
16619 case I387_CEIL:
16620 if (mode == I387_CW_CEIL)
16621 return mode;
16622 break;
16624 case I387_MASK_PM:
16625 if (mode == I387_CW_MASK_PM)
16626 return mode;
16627 break;
16629 default:
16630 gcc_unreachable ();
16633 return I387_CW_ANY;
16636 /* Return mode that entity must be switched into
16637 prior to the execution of insn. */
16639 static int
16640 ix86_mode_needed (int entity, rtx_insn *insn)
16642 switch (entity)
16644 case AVX_U128:
16645 return ix86_avx_u128_mode_needed (insn);
16646 case I387_TRUNC:
16647 case I387_FLOOR:
16648 case I387_CEIL:
16649 case I387_MASK_PM:
16650 return ix86_i387_mode_needed (entity, insn);
16651 default:
16652 gcc_unreachable ();
16654 return 0;
16657 /* Check if a 256bit AVX register is referenced in stores. */
16659 static void
16660 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16662 if (ix86_check_avx256_register (dest))
16664 bool *used = (bool *) data;
16665 *used = true;
16669 /* Calculate mode of upper 128bit AVX registers after the insn. */
16671 static int
16672 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16674 rtx pat = PATTERN (insn);
16676 if (vzeroupper_operation (pat, VOIDmode)
16677 || vzeroall_operation (pat, VOIDmode))
16678 return AVX_U128_CLEAN;
16680 /* We know that state is clean after CALL insn if there are no
16681 256bit registers used in the function return register. */
16682 if (CALL_P (insn))
16684 bool avx_reg256_found = false;
16685 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16687 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16690 /* Otherwise, return current mode. Remember that if insn
16691 references AVX 256bit registers, the mode was already changed
16692 to DIRTY from MODE_NEEDED. */
16693 return mode;
16696 /* Return the mode that an insn results in. */
16699 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16701 switch (entity)
16703 case AVX_U128:
16704 return ix86_avx_u128_mode_after (mode, insn);
16705 case I387_TRUNC:
16706 case I387_FLOOR:
16707 case I387_CEIL:
16708 case I387_MASK_PM:
16709 return mode;
16710 default:
16711 gcc_unreachable ();
16715 static int
16716 ix86_avx_u128_mode_entry (void)
16718 tree arg;
16720 /* Entry mode is set to AVX_U128_DIRTY if there are
16721 256bit modes used in function arguments. */
16722 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16723 arg = TREE_CHAIN (arg))
16725 rtx incoming = DECL_INCOMING_RTL (arg);
16727 if (incoming && ix86_check_avx256_register (incoming))
16728 return AVX_U128_DIRTY;
16731 return AVX_U128_CLEAN;
16734 /* Return a mode that ENTITY is assumed to be
16735 switched to at function entry. */
16737 static int
16738 ix86_mode_entry (int entity)
16740 switch (entity)
16742 case AVX_U128:
16743 return ix86_avx_u128_mode_entry ();
16744 case I387_TRUNC:
16745 case I387_FLOOR:
16746 case I387_CEIL:
16747 case I387_MASK_PM:
16748 return I387_CW_ANY;
16749 default:
16750 gcc_unreachable ();
16754 static int
16755 ix86_avx_u128_mode_exit (void)
16757 rtx reg = crtl->return_rtx;
16759 /* Exit mode is set to AVX_U128_DIRTY if there are
16760 256bit modes used in the function return register. */
16761 if (reg && ix86_check_avx256_register (reg))
16762 return AVX_U128_DIRTY;
16764 return AVX_U128_CLEAN;
16767 /* Return a mode that ENTITY is assumed to be
16768 switched to at function exit. */
16770 static int
16771 ix86_mode_exit (int entity)
16773 switch (entity)
16775 case AVX_U128:
16776 return ix86_avx_u128_mode_exit ();
16777 case I387_TRUNC:
16778 case I387_FLOOR:
16779 case I387_CEIL:
16780 case I387_MASK_PM:
16781 return I387_CW_ANY;
16782 default:
16783 gcc_unreachable ();
16787 static int
16788 ix86_mode_priority (int, int n)
16790 return n;
16793 /* Output code to initialize control word copies used by trunc?f?i and
16794 rounding patterns. CURRENT_MODE is set to current control word,
16795 while NEW_MODE is set to new control word. */
16797 static void
16798 emit_i387_cw_initialization (int mode)
16800 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16801 rtx new_mode;
16803 enum ix86_stack_slot slot;
16805 rtx reg = gen_reg_rtx (HImode);
16807 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16808 emit_move_insn (reg, copy_rtx (stored_mode));
16810 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16811 || optimize_insn_for_size_p ())
16813 switch (mode)
16815 case I387_CW_TRUNC:
16816 /* round toward zero (truncate) */
16817 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16818 slot = SLOT_CW_TRUNC;
16819 break;
16821 case I387_CW_FLOOR:
16822 /* round down toward -oo */
16823 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16824 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16825 slot = SLOT_CW_FLOOR;
16826 break;
16828 case I387_CW_CEIL:
16829 /* round up toward +oo */
16830 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16831 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16832 slot = SLOT_CW_CEIL;
16833 break;
16835 case I387_CW_MASK_PM:
16836 /* mask precision exception for nearbyint() */
16837 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16838 slot = SLOT_CW_MASK_PM;
16839 break;
16841 default:
16842 gcc_unreachable ();
16845 else
16847 switch (mode)
16849 case I387_CW_TRUNC:
16850 /* round toward zero (truncate) */
16851 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16852 slot = SLOT_CW_TRUNC;
16853 break;
16855 case I387_CW_FLOOR:
16856 /* round down toward -oo */
16857 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16858 slot = SLOT_CW_FLOOR;
16859 break;
16861 case I387_CW_CEIL:
16862 /* round up toward +oo */
16863 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16864 slot = SLOT_CW_CEIL;
16865 break;
16867 case I387_CW_MASK_PM:
16868 /* mask precision exception for nearbyint() */
16869 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16870 slot = SLOT_CW_MASK_PM;
16871 break;
16873 default:
16874 gcc_unreachable ();
16878 gcc_assert (slot < MAX_386_STACK_LOCALS);
16880 new_mode = assign_386_stack_local (HImode, slot);
16881 emit_move_insn (new_mode, reg);
16884 /* Emit vzeroupper. */
16886 void
16887 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16889 int i;
16891 /* Cancel automatic vzeroupper insertion if there are
16892 live call-saved SSE registers at the insertion point. */
16894 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16895 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16896 return;
16898 if (TARGET_64BIT)
16899 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16900 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16901 return;
16903 emit_insn (gen_avx_vzeroupper ());
16906 /* Generate one or more insns to set ENTITY to MODE. */
16908 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16909 is the set of hard registers live at the point where the insn(s)
16910 are to be inserted. */
16912 static void
16913 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
16914 HARD_REG_SET regs_live)
16916 switch (entity)
16918 case AVX_U128:
16919 if (mode == AVX_U128_CLEAN)
16920 ix86_avx_emit_vzeroupper (regs_live);
16921 break;
16922 case I387_TRUNC:
16923 case I387_FLOOR:
16924 case I387_CEIL:
16925 case I387_MASK_PM:
16926 if (mode != I387_CW_ANY
16927 && mode != I387_CW_UNINITIALIZED)
16928 emit_i387_cw_initialization (mode);
16929 break;
16930 default:
16931 gcc_unreachable ();
16935 /* Output code for INSN to convert a float to a signed int. OPERANDS
16936 are the insn operands. The output may be [HSD]Imode and the input
16937 operand may be [SDX]Fmode. */
16939 const char *
16940 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
16942 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16943 int dimode_p = GET_MODE (operands[0]) == DImode;
16944 int round_mode = get_attr_i387_cw (insn);
16946 /* Jump through a hoop or two for DImode, since the hardware has no
16947 non-popping instruction. We used to do this a different way, but
16948 that was somewhat fragile and broke with post-reload splitters. */
16949 if ((dimode_p || fisttp) && !stack_top_dies)
16950 output_asm_insn ("fld\t%y1", operands);
16952 gcc_assert (STACK_TOP_P (operands[1]));
16953 gcc_assert (MEM_P (operands[0]));
16954 gcc_assert (GET_MODE (operands[1]) != TFmode);
16956 if (fisttp)
16957 output_asm_insn ("fisttp%Z0\t%0", operands);
16958 else
16960 if (round_mode != I387_CW_ANY)
16961 output_asm_insn ("fldcw\t%3", operands);
16962 if (stack_top_dies || dimode_p)
16963 output_asm_insn ("fistp%Z0\t%0", operands);
16964 else
16965 output_asm_insn ("fist%Z0\t%0", operands);
16966 if (round_mode != I387_CW_ANY)
16967 output_asm_insn ("fldcw\t%2", operands);
16970 return "";
16973 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16974 have the values zero or one, indicates the ffreep insn's operand
16975 from the OPERANDS array. */
16977 static const char *
16978 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16980 if (TARGET_USE_FFREEP)
16981 #ifdef HAVE_AS_IX86_FFREEP
16982 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16983 #else
16985 static char retval[32];
16986 int regno = REGNO (operands[opno]);
16988 gcc_assert (STACK_REGNO_P (regno));
16990 regno -= FIRST_STACK_REG;
16992 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16993 return retval;
16995 #endif
16997 return opno ? "fstp\t%y1" : "fstp\t%y0";
17001 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
17002 should be used. UNORDERED_P is true when fucom should be used. */
17004 const char *
17005 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
17007 int stack_top_dies;
17008 rtx cmp_op0, cmp_op1;
17009 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
17011 if (eflags_p)
17013 cmp_op0 = operands[0];
17014 cmp_op1 = operands[1];
17016 else
17018 cmp_op0 = operands[1];
17019 cmp_op1 = operands[2];
17022 if (is_sse)
17024 if (GET_MODE (operands[0]) == SFmode)
17025 if (unordered_p)
17026 return "%vucomiss\t{%1, %0|%0, %1}";
17027 else
17028 return "%vcomiss\t{%1, %0|%0, %1}";
17029 else
17030 if (unordered_p)
17031 return "%vucomisd\t{%1, %0|%0, %1}";
17032 else
17033 return "%vcomisd\t{%1, %0|%0, %1}";
17036 gcc_assert (STACK_TOP_P (cmp_op0));
17038 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17040 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
17042 if (stack_top_dies)
17044 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
17045 return output_387_ffreep (operands, 1);
17047 else
17048 return "ftst\n\tfnstsw\t%0";
17051 if (STACK_REG_P (cmp_op1)
17052 && stack_top_dies
17053 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
17054 && REGNO (cmp_op1) != FIRST_STACK_REG)
17056 /* If both the top of the 387 stack dies, and the other operand
17057 is also a stack register that dies, then this must be a
17058 `fcompp' float compare */
17060 if (eflags_p)
17062 /* There is no double popping fcomi variant. Fortunately,
17063 eflags is immune from the fstp's cc clobbering. */
17064 if (unordered_p)
17065 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
17066 else
17067 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
17068 return output_387_ffreep (operands, 0);
17070 else
17072 if (unordered_p)
17073 return "fucompp\n\tfnstsw\t%0";
17074 else
17075 return "fcompp\n\tfnstsw\t%0";
17078 else
17080 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
17082 static const char * const alt[16] =
17084 "fcom%Z2\t%y2\n\tfnstsw\t%0",
17085 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
17086 "fucom%Z2\t%y2\n\tfnstsw\t%0",
17087 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
17089 "ficom%Z2\t%y2\n\tfnstsw\t%0",
17090 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
17091 NULL,
17092 NULL,
17094 "fcomi\t{%y1, %0|%0, %y1}",
17095 "fcomip\t{%y1, %0|%0, %y1}",
17096 "fucomi\t{%y1, %0|%0, %y1}",
17097 "fucomip\t{%y1, %0|%0, %y1}",
17099 NULL,
17100 NULL,
17101 NULL,
17102 NULL
17105 int mask;
17106 const char *ret;
17108 mask = eflags_p << 3;
17109 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
17110 mask |= unordered_p << 1;
17111 mask |= stack_top_dies;
17113 gcc_assert (mask < 16);
17114 ret = alt[mask];
17115 gcc_assert (ret);
17117 return ret;
17121 void
17122 ix86_output_addr_vec_elt (FILE *file, int value)
17124 const char *directive = ASM_LONG;
17126 #ifdef ASM_QUAD
17127 if (TARGET_LP64)
17128 directive = ASM_QUAD;
17129 #else
17130 gcc_assert (!TARGET_64BIT);
17131 #endif
17133 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
17136 void
17137 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
17139 const char *directive = ASM_LONG;
17141 #ifdef ASM_QUAD
17142 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
17143 directive = ASM_QUAD;
17144 #else
17145 gcc_assert (!TARGET_64BIT);
17146 #endif
17147 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
17148 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
17149 fprintf (file, "%s%s%d-%s%d\n",
17150 directive, LPREFIX, value, LPREFIX, rel);
17151 else if (HAVE_AS_GOTOFF_IN_DATA)
17152 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
17153 #if TARGET_MACHO
17154 else if (TARGET_MACHO)
17156 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
17157 machopic_output_function_base_name (file);
17158 putc ('\n', file);
17160 #endif
17161 else
17162 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
17163 GOT_SYMBOL_NAME, LPREFIX, value);
17166 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
17167 for the target. */
17169 void
17170 ix86_expand_clear (rtx dest)
17172 rtx tmp;
17174 /* We play register width games, which are only valid after reload. */
17175 gcc_assert (reload_completed);
17177 /* Avoid HImode and its attendant prefix byte. */
17178 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
17179 dest = gen_rtx_REG (SImode, REGNO (dest));
17180 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
17182 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
17184 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17185 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
17188 emit_insn (tmp);
17191 /* X is an unchanging MEM. If it is a constant pool reference, return
17192 the constant pool rtx, else NULL. */
17195 maybe_get_pool_constant (rtx x)
17197 x = ix86_delegitimize_address (XEXP (x, 0));
17199 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
17200 return get_pool_constant (x);
17202 return NULL_RTX;
17205 void
17206 ix86_expand_move (machine_mode mode, rtx operands[])
17208 rtx op0, op1;
17209 enum tls_model model;
17211 op0 = operands[0];
17212 op1 = operands[1];
17214 if (GET_CODE (op1) == SYMBOL_REF)
17216 rtx tmp;
17218 model = SYMBOL_REF_TLS_MODEL (op1);
17219 if (model)
17221 op1 = legitimize_tls_address (op1, model, true);
17222 op1 = force_operand (op1, op0);
17223 if (op1 == op0)
17224 return;
17225 op1 = convert_to_mode (mode, op1, 1);
17227 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
17228 op1 = tmp;
17230 else if (GET_CODE (op1) == CONST
17231 && GET_CODE (XEXP (op1, 0)) == PLUS
17232 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
17234 rtx addend = XEXP (XEXP (op1, 0), 1);
17235 rtx symbol = XEXP (XEXP (op1, 0), 0);
17236 rtx tmp;
17238 model = SYMBOL_REF_TLS_MODEL (symbol);
17239 if (model)
17240 tmp = legitimize_tls_address (symbol, model, true);
17241 else
17242 tmp = legitimize_pe_coff_symbol (symbol, true);
17244 if (tmp)
17246 tmp = force_operand (tmp, NULL);
17247 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
17248 op0, 1, OPTAB_DIRECT);
17249 if (tmp == op0)
17250 return;
17251 op1 = convert_to_mode (mode, tmp, 1);
17255 if ((flag_pic || MACHOPIC_INDIRECT)
17256 && symbolic_operand (op1, mode))
17258 if (TARGET_MACHO && !TARGET_64BIT)
17260 #if TARGET_MACHO
17261 /* dynamic-no-pic */
17262 if (MACHOPIC_INDIRECT)
17264 rtx temp = ((reload_in_progress
17265 || ((op0 && REG_P (op0))
17266 && mode == Pmode))
17267 ? op0 : gen_reg_rtx (Pmode));
17268 op1 = machopic_indirect_data_reference (op1, temp);
17269 if (MACHOPIC_PURE)
17270 op1 = machopic_legitimize_pic_address (op1, mode,
17271 temp == op1 ? 0 : temp);
17273 if (op0 != op1 && GET_CODE (op0) != MEM)
17275 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
17276 emit_insn (insn);
17277 return;
17279 if (GET_CODE (op0) == MEM)
17280 op1 = force_reg (Pmode, op1);
17281 else
17283 rtx temp = op0;
17284 if (GET_CODE (temp) != REG)
17285 temp = gen_reg_rtx (Pmode);
17286 temp = legitimize_pic_address (op1, temp);
17287 if (temp == op0)
17288 return;
17289 op1 = temp;
17291 /* dynamic-no-pic */
17292 #endif
17294 else
17296 if (MEM_P (op0))
17297 op1 = force_reg (mode, op1);
17298 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17300 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17301 op1 = legitimize_pic_address (op1, reg);
17302 if (op0 == op1)
17303 return;
17304 op1 = convert_to_mode (mode, op1, 1);
17308 else
17310 if (MEM_P (op0)
17311 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17312 || !push_operand (op0, mode))
17313 && MEM_P (op1))
17314 op1 = force_reg (mode, op1);
17316 if (push_operand (op0, mode)
17317 && ! general_no_elim_operand (op1, mode))
17318 op1 = copy_to_mode_reg (mode, op1);
17320 /* Force large constants in 64bit compilation into register
17321 to get them CSEed. */
17322 if (can_create_pseudo_p ()
17323 && (mode == DImode) && TARGET_64BIT
17324 && immediate_operand (op1, mode)
17325 && !x86_64_zext_immediate_operand (op1, VOIDmode)
17326 && !register_operand (op0, mode)
17327 && optimize)
17328 op1 = copy_to_mode_reg (mode, op1);
17330 if (can_create_pseudo_p ()
17331 && FLOAT_MODE_P (mode)
17332 && GET_CODE (op1) == CONST_DOUBLE)
17334 /* If we are loading a floating point constant to a register,
17335 force the value to memory now, since we'll get better code
17336 out the back end. */
17338 op1 = validize_mem (force_const_mem (mode, op1));
17339 if (!register_operand (op0, mode))
17341 rtx temp = gen_reg_rtx (mode);
17342 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
17343 emit_move_insn (op0, temp);
17344 return;
17349 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17352 void
17353 ix86_expand_vector_move (machine_mode mode, rtx operands[])
17355 rtx op0 = operands[0], op1 = operands[1];
17356 unsigned int align = GET_MODE_ALIGNMENT (mode);
17358 if (push_operand (op0, VOIDmode))
17359 op0 = emit_move_resolve_push (mode, op0);
17361 /* Force constants other than zero into memory. We do not know how
17362 the instructions used to build constants modify the upper 64 bits
17363 of the register, once we have that information we may be able
17364 to handle some of them more efficiently. */
17365 if (can_create_pseudo_p ()
17366 && register_operand (op0, mode)
17367 && (CONSTANT_P (op1)
17368 || (GET_CODE (op1) == SUBREG
17369 && CONSTANT_P (SUBREG_REG (op1))))
17370 && !standard_sse_constant_p (op1))
17371 op1 = validize_mem (force_const_mem (mode, op1));
17373 /* We need to check memory alignment for SSE mode since attribute
17374 can make operands unaligned. */
17375 if (can_create_pseudo_p ()
17376 && SSE_REG_MODE_P (mode)
17377 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17378 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17380 rtx tmp[2];
17382 /* ix86_expand_vector_move_misalign() does not like constants ... */
17383 if (CONSTANT_P (op1)
17384 || (GET_CODE (op1) == SUBREG
17385 && CONSTANT_P (SUBREG_REG (op1))))
17386 op1 = validize_mem (force_const_mem (mode, op1));
17388 /* ... nor both arguments in memory. */
17389 if (!register_operand (op0, mode)
17390 && !register_operand (op1, mode))
17391 op1 = force_reg (mode, op1);
17393 tmp[0] = op0; tmp[1] = op1;
17394 ix86_expand_vector_move_misalign (mode, tmp);
17395 return;
17398 /* Make operand1 a register if it isn't already. */
17399 if (can_create_pseudo_p ()
17400 && !register_operand (op0, mode)
17401 && !register_operand (op1, mode))
17403 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17404 return;
17407 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17410 /* Split 32-byte AVX unaligned load and store if needed. */
17412 static void
17413 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17415 rtx m;
17416 rtx (*extract) (rtx, rtx, rtx);
17417 rtx (*load_unaligned) (rtx, rtx);
17418 rtx (*store_unaligned) (rtx, rtx);
17419 machine_mode mode;
17421 switch (GET_MODE (op0))
17423 default:
17424 gcc_unreachable ();
17425 case V32QImode:
17426 extract = gen_avx_vextractf128v32qi;
17427 load_unaligned = gen_avx_loaddquv32qi;
17428 store_unaligned = gen_avx_storedquv32qi;
17429 mode = V16QImode;
17430 break;
17431 case V8SFmode:
17432 extract = gen_avx_vextractf128v8sf;
17433 load_unaligned = gen_avx_loadups256;
17434 store_unaligned = gen_avx_storeups256;
17435 mode = V4SFmode;
17436 break;
17437 case V4DFmode:
17438 extract = gen_avx_vextractf128v4df;
17439 load_unaligned = gen_avx_loadupd256;
17440 store_unaligned = gen_avx_storeupd256;
17441 mode = V2DFmode;
17442 break;
17445 if (MEM_P (op1))
17447 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
17449 rtx r = gen_reg_rtx (mode);
17450 m = adjust_address (op1, mode, 0);
17451 emit_move_insn (r, m);
17452 m = adjust_address (op1, mode, 16);
17453 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17454 emit_move_insn (op0, r);
17456 /* Normal *mov<mode>_internal pattern will handle
17457 unaligned loads just fine if misaligned_operand
17458 is true, and without the UNSPEC it can be combined
17459 with arithmetic instructions. */
17460 else if (misaligned_operand (op1, GET_MODE (op1)))
17461 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17462 else
17463 emit_insn (load_unaligned (op0, op1));
17465 else if (MEM_P (op0))
17467 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
17469 m = adjust_address (op0, mode, 0);
17470 emit_insn (extract (m, op1, const0_rtx));
17471 m = adjust_address (op0, mode, 16);
17472 emit_insn (extract (m, op1, const1_rtx));
17474 else
17475 emit_insn (store_unaligned (op0, op1));
17477 else
17478 gcc_unreachable ();
17481 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17482 straight to ix86_expand_vector_move. */
17483 /* Code generation for scalar reg-reg moves of single and double precision data:
17484 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17485 movaps reg, reg
17486 else
17487 movss reg, reg
17488 if (x86_sse_partial_reg_dependency == true)
17489 movapd reg, reg
17490 else
17491 movsd reg, reg
17493 Code generation for scalar loads of double precision data:
17494 if (x86_sse_split_regs == true)
17495 movlpd mem, reg (gas syntax)
17496 else
17497 movsd mem, reg
17499 Code generation for unaligned packed loads of single precision data
17500 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17501 if (x86_sse_unaligned_move_optimal)
17502 movups mem, reg
17504 if (x86_sse_partial_reg_dependency == true)
17506 xorps reg, reg
17507 movlps mem, reg
17508 movhps mem+8, reg
17510 else
17512 movlps mem, reg
17513 movhps mem+8, reg
17516 Code generation for unaligned packed loads of double precision data
17517 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17518 if (x86_sse_unaligned_move_optimal)
17519 movupd mem, reg
17521 if (x86_sse_split_regs == true)
17523 movlpd mem, reg
17524 movhpd mem+8, reg
17526 else
17528 movsd mem, reg
17529 movhpd mem+8, reg
17533 void
17534 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
17536 rtx op0, op1, orig_op0 = NULL_RTX, m;
17537 rtx (*load_unaligned) (rtx, rtx);
17538 rtx (*store_unaligned) (rtx, rtx);
17540 op0 = operands[0];
17541 op1 = operands[1];
17543 if (GET_MODE_SIZE (mode) == 64)
17545 switch (GET_MODE_CLASS (mode))
17547 case MODE_VECTOR_INT:
17548 case MODE_INT:
17549 if (GET_MODE (op0) != V16SImode)
17551 if (!MEM_P (op0))
17553 orig_op0 = op0;
17554 op0 = gen_reg_rtx (V16SImode);
17556 else
17557 op0 = gen_lowpart (V16SImode, op0);
17559 op1 = gen_lowpart (V16SImode, op1);
17560 /* FALLTHRU */
17562 case MODE_VECTOR_FLOAT:
17563 switch (GET_MODE (op0))
17565 default:
17566 gcc_unreachable ();
17567 case V16SImode:
17568 load_unaligned = gen_avx512f_loaddquv16si;
17569 store_unaligned = gen_avx512f_storedquv16si;
17570 break;
17571 case V16SFmode:
17572 load_unaligned = gen_avx512f_loadups512;
17573 store_unaligned = gen_avx512f_storeups512;
17574 break;
17575 case V8DFmode:
17576 load_unaligned = gen_avx512f_loadupd512;
17577 store_unaligned = gen_avx512f_storeupd512;
17578 break;
17581 if (MEM_P (op1))
17582 emit_insn (load_unaligned (op0, op1));
17583 else if (MEM_P (op0))
17584 emit_insn (store_unaligned (op0, op1));
17585 else
17586 gcc_unreachable ();
17587 if (orig_op0)
17588 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17589 break;
17591 default:
17592 gcc_unreachable ();
17595 return;
17598 if (TARGET_AVX
17599 && GET_MODE_SIZE (mode) == 32)
17601 switch (GET_MODE_CLASS (mode))
17603 case MODE_VECTOR_INT:
17604 case MODE_INT:
17605 if (GET_MODE (op0) != V32QImode)
17607 if (!MEM_P (op0))
17609 orig_op0 = op0;
17610 op0 = gen_reg_rtx (V32QImode);
17612 else
17613 op0 = gen_lowpart (V32QImode, op0);
17615 op1 = gen_lowpart (V32QImode, op1);
17616 /* FALLTHRU */
17618 case MODE_VECTOR_FLOAT:
17619 ix86_avx256_split_vector_move_misalign (op0, op1);
17620 if (orig_op0)
17621 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17622 break;
17624 default:
17625 gcc_unreachable ();
17628 return;
17631 if (MEM_P (op1))
17633 /* Normal *mov<mode>_internal pattern will handle
17634 unaligned loads just fine if misaligned_operand
17635 is true, and without the UNSPEC it can be combined
17636 with arithmetic instructions. */
17637 if (TARGET_AVX
17638 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17639 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17640 && misaligned_operand (op1, GET_MODE (op1)))
17641 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17642 /* ??? If we have typed data, then it would appear that using
17643 movdqu is the only way to get unaligned data loaded with
17644 integer type. */
17645 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17647 if (GET_MODE (op0) != V16QImode)
17649 orig_op0 = op0;
17650 op0 = gen_reg_rtx (V16QImode);
17652 op1 = gen_lowpart (V16QImode, op1);
17653 /* We will eventually emit movups based on insn attributes. */
17654 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17655 if (orig_op0)
17656 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17658 else if (TARGET_SSE2 && mode == V2DFmode)
17660 rtx zero;
17662 if (TARGET_AVX
17663 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17664 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17665 || optimize_insn_for_size_p ())
17667 /* We will eventually emit movups based on insn attributes. */
17668 emit_insn (gen_sse2_loadupd (op0, op1));
17669 return;
17672 /* When SSE registers are split into halves, we can avoid
17673 writing to the top half twice. */
17674 if (TARGET_SSE_SPLIT_REGS)
17676 emit_clobber (op0);
17677 zero = op0;
17679 else
17681 /* ??? Not sure about the best option for the Intel chips.
17682 The following would seem to satisfy; the register is
17683 entirely cleared, breaking the dependency chain. We
17684 then store to the upper half, with a dependency depth
17685 of one. A rumor has it that Intel recommends two movsd
17686 followed by an unpacklpd, but this is unconfirmed. And
17687 given that the dependency depth of the unpacklpd would
17688 still be one, I'm not sure why this would be better. */
17689 zero = CONST0_RTX (V2DFmode);
17692 m = adjust_address (op1, DFmode, 0);
17693 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17694 m = adjust_address (op1, DFmode, 8);
17695 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17697 else
17699 rtx t;
17701 if (TARGET_AVX
17702 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17703 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17704 || optimize_insn_for_size_p ())
17706 if (GET_MODE (op0) != V4SFmode)
17708 orig_op0 = op0;
17709 op0 = gen_reg_rtx (V4SFmode);
17711 op1 = gen_lowpart (V4SFmode, op1);
17712 emit_insn (gen_sse_loadups (op0, op1));
17713 if (orig_op0)
17714 emit_move_insn (orig_op0,
17715 gen_lowpart (GET_MODE (orig_op0), op0));
17716 return;
17719 if (mode != V4SFmode)
17720 t = gen_reg_rtx (V4SFmode);
17721 else
17722 t = op0;
17724 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17725 emit_move_insn (t, CONST0_RTX (V4SFmode));
17726 else
17727 emit_clobber (t);
17729 m = adjust_address (op1, V2SFmode, 0);
17730 emit_insn (gen_sse_loadlps (t, t, m));
17731 m = adjust_address (op1, V2SFmode, 8);
17732 emit_insn (gen_sse_loadhps (t, t, m));
17733 if (mode != V4SFmode)
17734 emit_move_insn (op0, gen_lowpart (mode, t));
17737 else if (MEM_P (op0))
17739 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17741 op0 = gen_lowpart (V16QImode, op0);
17742 op1 = gen_lowpart (V16QImode, op1);
17743 /* We will eventually emit movups based on insn attributes. */
17744 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17746 else if (TARGET_SSE2 && mode == V2DFmode)
17748 if (TARGET_AVX
17749 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17750 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17751 || optimize_insn_for_size_p ())
17752 /* We will eventually emit movups based on insn attributes. */
17753 emit_insn (gen_sse2_storeupd (op0, op1));
17754 else
17756 m = adjust_address (op0, DFmode, 0);
17757 emit_insn (gen_sse2_storelpd (m, op1));
17758 m = adjust_address (op0, DFmode, 8);
17759 emit_insn (gen_sse2_storehpd (m, op1));
17762 else
17764 if (mode != V4SFmode)
17765 op1 = gen_lowpart (V4SFmode, op1);
17767 if (TARGET_AVX
17768 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17769 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17770 || optimize_insn_for_size_p ())
17772 op0 = gen_lowpart (V4SFmode, op0);
17773 emit_insn (gen_sse_storeups (op0, op1));
17775 else
17777 m = adjust_address (op0, V2SFmode, 0);
17778 emit_insn (gen_sse_storelps (m, op1));
17779 m = adjust_address (op0, V2SFmode, 8);
17780 emit_insn (gen_sse_storehps (m, op1));
17784 else
17785 gcc_unreachable ();
17788 /* Helper function of ix86_fixup_binary_operands to canonicalize
17789 operand order. Returns true if the operands should be swapped. */
17791 static bool
17792 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
17793 rtx operands[])
17795 rtx dst = operands[0];
17796 rtx src1 = operands[1];
17797 rtx src2 = operands[2];
17799 /* If the operation is not commutative, we can't do anything. */
17800 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17801 return false;
17803 /* Highest priority is that src1 should match dst. */
17804 if (rtx_equal_p (dst, src1))
17805 return false;
17806 if (rtx_equal_p (dst, src2))
17807 return true;
17809 /* Next highest priority is that immediate constants come second. */
17810 if (immediate_operand (src2, mode))
17811 return false;
17812 if (immediate_operand (src1, mode))
17813 return true;
17815 /* Lowest priority is that memory references should come second. */
17816 if (MEM_P (src2))
17817 return false;
17818 if (MEM_P (src1))
17819 return true;
17821 return false;
17825 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17826 destination to use for the operation. If different from the true
17827 destination in operands[0], a copy operation will be required. */
17830 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
17831 rtx operands[])
17833 rtx dst = operands[0];
17834 rtx src1 = operands[1];
17835 rtx src2 = operands[2];
17837 /* Canonicalize operand order. */
17838 if (ix86_swap_binary_operands_p (code, mode, operands))
17840 /* It is invalid to swap operands of different modes. */
17841 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17843 std::swap (src1, src2);
17846 /* Both source operands cannot be in memory. */
17847 if (MEM_P (src1) && MEM_P (src2))
17849 /* Optimization: Only read from memory once. */
17850 if (rtx_equal_p (src1, src2))
17852 src2 = force_reg (mode, src2);
17853 src1 = src2;
17855 else if (rtx_equal_p (dst, src1))
17856 src2 = force_reg (mode, src2);
17857 else
17858 src1 = force_reg (mode, src1);
17861 /* If the destination is memory, and we do not have matching source
17862 operands, do things in registers. */
17863 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17864 dst = gen_reg_rtx (mode);
17866 /* Source 1 cannot be a constant. */
17867 if (CONSTANT_P (src1))
17868 src1 = force_reg (mode, src1);
17870 /* Source 1 cannot be a non-matching memory. */
17871 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17872 src1 = force_reg (mode, src1);
17874 /* Improve address combine. */
17875 if (code == PLUS
17876 && GET_MODE_CLASS (mode) == MODE_INT
17877 && MEM_P (src2))
17878 src2 = force_reg (mode, src2);
17880 operands[1] = src1;
17881 operands[2] = src2;
17882 return dst;
17885 /* Similarly, but assume that the destination has already been
17886 set up properly. */
17888 void
17889 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17890 machine_mode mode, rtx operands[])
17892 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17893 gcc_assert (dst == operands[0]);
17896 /* Attempt to expand a binary operator. Make the expansion closer to the
17897 actual machine, then just general_operand, which will allow 3 separate
17898 memory references (one output, two input) in a single insn. */
17900 void
17901 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
17902 rtx operands[])
17904 rtx src1, src2, dst, op, clob;
17906 dst = ix86_fixup_binary_operands (code, mode, operands);
17907 src1 = operands[1];
17908 src2 = operands[2];
17910 /* Emit the instruction. */
17912 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17913 if (reload_in_progress)
17915 /* Reload doesn't know about the flags register, and doesn't know that
17916 it doesn't want to clobber it. We can only do this with PLUS. */
17917 gcc_assert (code == PLUS);
17918 emit_insn (op);
17920 else if (reload_completed
17921 && code == PLUS
17922 && !rtx_equal_p (dst, src1))
17924 /* This is going to be an LEA; avoid splitting it later. */
17925 emit_insn (op);
17927 else
17929 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17930 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17933 /* Fix up the destination if needed. */
17934 if (dst != operands[0])
17935 emit_move_insn (operands[0], dst);
17938 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17939 the given OPERANDS. */
17941 void
17942 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
17943 rtx operands[])
17945 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17946 if (GET_CODE (operands[1]) == SUBREG)
17948 op1 = operands[1];
17949 op2 = operands[2];
17951 else if (GET_CODE (operands[2]) == SUBREG)
17953 op1 = operands[2];
17954 op2 = operands[1];
17956 /* Optimize (__m128i) d | (__m128i) e and similar code
17957 when d and e are float vectors into float vector logical
17958 insn. In C/C++ without using intrinsics there is no other way
17959 to express vector logical operation on float vectors than
17960 to cast them temporarily to integer vectors. */
17961 if (op1
17962 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17963 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17964 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17965 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17966 && SUBREG_BYTE (op1) == 0
17967 && (GET_CODE (op2) == CONST_VECTOR
17968 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17969 && SUBREG_BYTE (op2) == 0))
17970 && can_create_pseudo_p ())
17972 rtx dst;
17973 switch (GET_MODE (SUBREG_REG (op1)))
17975 case V4SFmode:
17976 case V8SFmode:
17977 case V16SFmode:
17978 case V2DFmode:
17979 case V4DFmode:
17980 case V8DFmode:
17981 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17982 if (GET_CODE (op2) == CONST_VECTOR)
17984 op2 = gen_lowpart (GET_MODE (dst), op2);
17985 op2 = force_reg (GET_MODE (dst), op2);
17987 else
17989 op1 = operands[1];
17990 op2 = SUBREG_REG (operands[2]);
17991 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17992 op2 = force_reg (GET_MODE (dst), op2);
17994 op1 = SUBREG_REG (op1);
17995 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17996 op1 = force_reg (GET_MODE (dst), op1);
17997 emit_insn (gen_rtx_SET (VOIDmode, dst,
17998 gen_rtx_fmt_ee (code, GET_MODE (dst),
17999 op1, op2)));
18000 emit_move_insn (operands[0], gen_lowpart (mode, dst));
18001 return;
18002 default:
18003 break;
18006 if (!nonimmediate_operand (operands[1], mode))
18007 operands[1] = force_reg (mode, operands[1]);
18008 if (!nonimmediate_operand (operands[2], mode))
18009 operands[2] = force_reg (mode, operands[2]);
18010 ix86_fixup_binary_operands_no_copy (code, mode, operands);
18011 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18012 gen_rtx_fmt_ee (code, mode, operands[1],
18013 operands[2])));
18016 /* Return TRUE or FALSE depending on whether the binary operator meets the
18017 appropriate constraints. */
18019 bool
18020 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
18021 rtx operands[3])
18023 rtx dst = operands[0];
18024 rtx src1 = operands[1];
18025 rtx src2 = operands[2];
18027 /* Both source operands cannot be in memory. */
18028 if (MEM_P (src1) && MEM_P (src2))
18029 return false;
18031 /* Canonicalize operand order for commutative operators. */
18032 if (ix86_swap_binary_operands_p (code, mode, operands))
18033 std::swap (src1, src2);
18035 /* If the destination is memory, we must have a matching source operand. */
18036 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18037 return false;
18039 /* Source 1 cannot be a constant. */
18040 if (CONSTANT_P (src1))
18041 return false;
18043 /* Source 1 cannot be a non-matching memory. */
18044 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18045 /* Support "andhi/andsi/anddi" as a zero-extending move. */
18046 return (code == AND
18047 && (mode == HImode
18048 || mode == SImode
18049 || (TARGET_64BIT && mode == DImode))
18050 && satisfies_constraint_L (src2));
18052 return true;
18055 /* Attempt to expand a unary operator. Make the expansion closer to the
18056 actual machine, then just general_operand, which will allow 2 separate
18057 memory references (one output, one input) in a single insn. */
18059 void
18060 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
18061 rtx operands[])
18063 int matching_memory;
18064 rtx src, dst, op, clob;
18066 dst = operands[0];
18067 src = operands[1];
18069 /* If the destination is memory, and we do not have matching source
18070 operands, do things in registers. */
18071 matching_memory = 0;
18072 if (MEM_P (dst))
18074 if (rtx_equal_p (dst, src))
18075 matching_memory = 1;
18076 else
18077 dst = gen_reg_rtx (mode);
18080 /* When source operand is memory, destination must match. */
18081 if (MEM_P (src) && !matching_memory)
18082 src = force_reg (mode, src);
18084 /* Emit the instruction. */
18086 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
18087 if (reload_in_progress || code == NOT)
18089 /* Reload doesn't know about the flags register, and doesn't know that
18090 it doesn't want to clobber it. */
18091 gcc_assert (code == NOT);
18092 emit_insn (op);
18094 else
18096 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18097 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18100 /* Fix up the destination if needed. */
18101 if (dst != operands[0])
18102 emit_move_insn (operands[0], dst);
18105 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
18106 divisor are within the range [0-255]. */
18108 void
18109 ix86_split_idivmod (machine_mode mode, rtx operands[],
18110 bool signed_p)
18112 rtx_code_label *end_label, *qimode_label;
18113 rtx insn, div, mod;
18114 rtx scratch, tmp0, tmp1, tmp2;
18115 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
18116 rtx (*gen_zero_extend) (rtx, rtx);
18117 rtx (*gen_test_ccno_1) (rtx, rtx);
18119 switch (mode)
18121 case SImode:
18122 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
18123 gen_test_ccno_1 = gen_testsi_ccno_1;
18124 gen_zero_extend = gen_zero_extendqisi2;
18125 break;
18126 case DImode:
18127 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
18128 gen_test_ccno_1 = gen_testdi_ccno_1;
18129 gen_zero_extend = gen_zero_extendqidi2;
18130 break;
18131 default:
18132 gcc_unreachable ();
18135 end_label = gen_label_rtx ();
18136 qimode_label = gen_label_rtx ();
18138 scratch = gen_reg_rtx (mode);
18140 /* Use 8bit unsigned divimod if dividend and divisor are within
18141 the range [0-255]. */
18142 emit_move_insn (scratch, operands[2]);
18143 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
18144 scratch, 1, OPTAB_DIRECT);
18145 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
18146 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
18147 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
18148 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
18149 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
18150 pc_rtx);
18151 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
18152 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18153 JUMP_LABEL (insn) = qimode_label;
18155 /* Generate original signed/unsigned divimod. */
18156 div = gen_divmod4_1 (operands[0], operands[1],
18157 operands[2], operands[3]);
18158 emit_insn (div);
18160 /* Branch to the end. */
18161 emit_jump_insn (gen_jump (end_label));
18162 emit_barrier ();
18164 /* Generate 8bit unsigned divide. */
18165 emit_label (qimode_label);
18166 /* Don't use operands[0] for result of 8bit divide since not all
18167 registers support QImode ZERO_EXTRACT. */
18168 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
18169 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
18170 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
18171 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
18173 if (signed_p)
18175 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
18176 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
18178 else
18180 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
18181 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
18184 /* Extract remainder from AH. */
18185 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
18186 if (REG_P (operands[1]))
18187 insn = emit_move_insn (operands[1], tmp1);
18188 else
18190 /* Need a new scratch register since the old one has result
18191 of 8bit divide. */
18192 scratch = gen_reg_rtx (mode);
18193 emit_move_insn (scratch, tmp1);
18194 insn = emit_move_insn (operands[1], scratch);
18196 set_unique_reg_note (insn, REG_EQUAL, mod);
18198 /* Zero extend quotient from AL. */
18199 tmp1 = gen_lowpart (QImode, tmp0);
18200 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
18201 set_unique_reg_note (insn, REG_EQUAL, div);
18203 emit_label (end_label);
18206 #define LEA_MAX_STALL (3)
18207 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
18209 /* Increase given DISTANCE in half-cycles according to
18210 dependencies between PREV and NEXT instructions.
18211 Add 1 half-cycle if there is no dependency and
18212 go to next cycle if there is some dependecy. */
18214 static unsigned int
18215 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
18217 df_ref def, use;
18219 if (!prev || !next)
18220 return distance + (distance & 1) + 2;
18222 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
18223 return distance + 1;
18225 FOR_EACH_INSN_USE (use, next)
18226 FOR_EACH_INSN_DEF (def, prev)
18227 if (!DF_REF_IS_ARTIFICIAL (def)
18228 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
18229 return distance + (distance & 1) + 2;
18231 return distance + 1;
18234 /* Function checks if instruction INSN defines register number
18235 REGNO1 or REGNO2. */
18237 static bool
18238 insn_defines_reg (unsigned int regno1, unsigned int regno2,
18239 rtx insn)
18241 df_ref def;
18243 FOR_EACH_INSN_DEF (def, insn)
18244 if (DF_REF_REG_DEF_P (def)
18245 && !DF_REF_IS_ARTIFICIAL (def)
18246 && (regno1 == DF_REF_REGNO (def)
18247 || regno2 == DF_REF_REGNO (def)))
18248 return true;
18250 return false;
18253 /* Function checks if instruction INSN uses register number
18254 REGNO as a part of address expression. */
18256 static bool
18257 insn_uses_reg_mem (unsigned int regno, rtx insn)
18259 df_ref use;
18261 FOR_EACH_INSN_USE (use, insn)
18262 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
18263 return true;
18265 return false;
18268 /* Search backward for non-agu definition of register number REGNO1
18269 or register number REGNO2 in basic block starting from instruction
18270 START up to head of basic block or instruction INSN.
18272 Function puts true value into *FOUND var if definition was found
18273 and false otherwise.
18275 Distance in half-cycles between START and found instruction or head
18276 of BB is added to DISTANCE and returned. */
18278 static int
18279 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18280 rtx_insn *insn, int distance,
18281 rtx_insn *start, bool *found)
18283 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18284 rtx_insn *prev = start;
18285 rtx_insn *next = NULL;
18287 *found = false;
18289 while (prev
18290 && prev != insn
18291 && distance < LEA_SEARCH_THRESHOLD)
18293 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18295 distance = increase_distance (prev, next, distance);
18296 if (insn_defines_reg (regno1, regno2, prev))
18298 if (recog_memoized (prev) < 0
18299 || get_attr_type (prev) != TYPE_LEA)
18301 *found = true;
18302 return distance;
18306 next = prev;
18308 if (prev == BB_HEAD (bb))
18309 break;
18311 prev = PREV_INSN (prev);
18314 return distance;
18317 /* Search backward for non-agu definition of register number REGNO1
18318 or register number REGNO2 in INSN's basic block until
18319 1. Pass LEA_SEARCH_THRESHOLD instructions, or
18320 2. Reach neighbour BBs boundary, or
18321 3. Reach agu definition.
18322 Returns the distance between the non-agu definition point and INSN.
18323 If no definition point, returns -1. */
18325 static int
18326 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18327 rtx_insn *insn)
18329 basic_block bb = BLOCK_FOR_INSN (insn);
18330 int distance = 0;
18331 bool found = false;
18333 if (insn != BB_HEAD (bb))
18334 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18335 distance, PREV_INSN (insn),
18336 &found);
18338 if (!found && distance < LEA_SEARCH_THRESHOLD)
18340 edge e;
18341 edge_iterator ei;
18342 bool simple_loop = false;
18344 FOR_EACH_EDGE (e, ei, bb->preds)
18345 if (e->src == bb)
18347 simple_loop = true;
18348 break;
18351 if (simple_loop)
18352 distance = distance_non_agu_define_in_bb (regno1, regno2,
18353 insn, distance,
18354 BB_END (bb), &found);
18355 else
18357 int shortest_dist = -1;
18358 bool found_in_bb = false;
18360 FOR_EACH_EDGE (e, ei, bb->preds)
18362 int bb_dist
18363 = distance_non_agu_define_in_bb (regno1, regno2,
18364 insn, distance,
18365 BB_END (e->src),
18366 &found_in_bb);
18367 if (found_in_bb)
18369 if (shortest_dist < 0)
18370 shortest_dist = bb_dist;
18371 else if (bb_dist > 0)
18372 shortest_dist = MIN (bb_dist, shortest_dist);
18374 found = true;
18378 distance = shortest_dist;
18382 /* get_attr_type may modify recog data. We want to make sure
18383 that recog data is valid for instruction INSN, on which
18384 distance_non_agu_define is called. INSN is unchanged here. */
18385 extract_insn_cached (insn);
18387 if (!found)
18388 return -1;
18390 return distance >> 1;
18393 /* Return the distance in half-cycles between INSN and the next
18394 insn that uses register number REGNO in memory address added
18395 to DISTANCE. Return -1 if REGNO0 is set.
18397 Put true value into *FOUND if register usage was found and
18398 false otherwise.
18399 Put true value into *REDEFINED if register redefinition was
18400 found and false otherwise. */
18402 static int
18403 distance_agu_use_in_bb (unsigned int regno,
18404 rtx_insn *insn, int distance, rtx_insn *start,
18405 bool *found, bool *redefined)
18407 basic_block bb = NULL;
18408 rtx_insn *next = start;
18409 rtx_insn *prev = NULL;
18411 *found = false;
18412 *redefined = false;
18414 if (start != NULL_RTX)
18416 bb = BLOCK_FOR_INSN (start);
18417 if (start != BB_HEAD (bb))
18418 /* If insn and start belong to the same bb, set prev to insn,
18419 so the call to increase_distance will increase the distance
18420 between insns by 1. */
18421 prev = insn;
18424 while (next
18425 && next != insn
18426 && distance < LEA_SEARCH_THRESHOLD)
18428 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18430 distance = increase_distance(prev, next, distance);
18431 if (insn_uses_reg_mem (regno, next))
18433 /* Return DISTANCE if OP0 is used in memory
18434 address in NEXT. */
18435 *found = true;
18436 return distance;
18439 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18441 /* Return -1 if OP0 is set in NEXT. */
18442 *redefined = true;
18443 return -1;
18446 prev = next;
18449 if (next == BB_END (bb))
18450 break;
18452 next = NEXT_INSN (next);
18455 return distance;
18458 /* Return the distance between INSN and the next insn that uses
18459 register number REGNO0 in memory address. Return -1 if no such
18460 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18462 static int
18463 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18465 basic_block bb = BLOCK_FOR_INSN (insn);
18466 int distance = 0;
18467 bool found = false;
18468 bool redefined = false;
18470 if (insn != BB_END (bb))
18471 distance = distance_agu_use_in_bb (regno0, insn, distance,
18472 NEXT_INSN (insn),
18473 &found, &redefined);
18475 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18477 edge e;
18478 edge_iterator ei;
18479 bool simple_loop = false;
18481 FOR_EACH_EDGE (e, ei, bb->succs)
18482 if (e->dest == bb)
18484 simple_loop = true;
18485 break;
18488 if (simple_loop)
18489 distance = distance_agu_use_in_bb (regno0, insn,
18490 distance, BB_HEAD (bb),
18491 &found, &redefined);
18492 else
18494 int shortest_dist = -1;
18495 bool found_in_bb = false;
18496 bool redefined_in_bb = false;
18498 FOR_EACH_EDGE (e, ei, bb->succs)
18500 int bb_dist
18501 = distance_agu_use_in_bb (regno0, insn,
18502 distance, BB_HEAD (e->dest),
18503 &found_in_bb, &redefined_in_bb);
18504 if (found_in_bb)
18506 if (shortest_dist < 0)
18507 shortest_dist = bb_dist;
18508 else if (bb_dist > 0)
18509 shortest_dist = MIN (bb_dist, shortest_dist);
18511 found = true;
18515 distance = shortest_dist;
18519 if (!found || redefined)
18520 return -1;
18522 return distance >> 1;
18525 /* Define this macro to tune LEA priority vs ADD, it take effect when
18526 there is a dilemma of choicing LEA or ADD
18527 Negative value: ADD is more preferred than LEA
18528 Zero: Netrual
18529 Positive value: LEA is more preferred than ADD*/
18530 #define IX86_LEA_PRIORITY 0
18532 /* Return true if usage of lea INSN has performance advantage
18533 over a sequence of instructions. Instructions sequence has
18534 SPLIT_COST cycles higher latency than lea latency. */
18536 static bool
18537 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18538 unsigned int regno2, int split_cost, bool has_scale)
18540 int dist_define, dist_use;
18542 /* For Silvermont if using a 2-source or 3-source LEA for
18543 non-destructive destination purposes, or due to wanting
18544 ability to use SCALE, the use of LEA is justified. */
18545 if (TARGET_SILVERMONT || TARGET_INTEL)
18547 if (has_scale)
18548 return true;
18549 if (split_cost < 1)
18550 return false;
18551 if (regno0 == regno1 || regno0 == regno2)
18552 return false;
18553 return true;
18556 dist_define = distance_non_agu_define (regno1, regno2, insn);
18557 dist_use = distance_agu_use (regno0, insn);
18559 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18561 /* If there is no non AGU operand definition, no AGU
18562 operand usage and split cost is 0 then both lea
18563 and non lea variants have same priority. Currently
18564 we prefer lea for 64 bit code and non lea on 32 bit
18565 code. */
18566 if (dist_use < 0 && split_cost == 0)
18567 return TARGET_64BIT || IX86_LEA_PRIORITY;
18568 else
18569 return true;
18572 /* With longer definitions distance lea is more preferable.
18573 Here we change it to take into account splitting cost and
18574 lea priority. */
18575 dist_define += split_cost + IX86_LEA_PRIORITY;
18577 /* If there is no use in memory addess then we just check
18578 that split cost exceeds AGU stall. */
18579 if (dist_use < 0)
18580 return dist_define > LEA_MAX_STALL;
18582 /* If this insn has both backward non-agu dependence and forward
18583 agu dependence, the one with short distance takes effect. */
18584 return dist_define >= dist_use;
18587 /* Return true if it is legal to clobber flags by INSN and
18588 false otherwise. */
18590 static bool
18591 ix86_ok_to_clobber_flags (rtx_insn *insn)
18593 basic_block bb = BLOCK_FOR_INSN (insn);
18594 df_ref use;
18595 bitmap live;
18597 while (insn)
18599 if (NONDEBUG_INSN_P (insn))
18601 FOR_EACH_INSN_USE (use, insn)
18602 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18603 return false;
18605 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18606 return true;
18609 if (insn == BB_END (bb))
18610 break;
18612 insn = NEXT_INSN (insn);
18615 live = df_get_live_out(bb);
18616 return !REGNO_REG_SET_P (live, FLAGS_REG);
18619 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18620 move and add to avoid AGU stalls. */
18622 bool
18623 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18625 unsigned int regno0, regno1, regno2;
18627 /* Check if we need to optimize. */
18628 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18629 return false;
18631 /* Check it is correct to split here. */
18632 if (!ix86_ok_to_clobber_flags(insn))
18633 return false;
18635 regno0 = true_regnum (operands[0]);
18636 regno1 = true_regnum (operands[1]);
18637 regno2 = true_regnum (operands[2]);
18639 /* We need to split only adds with non destructive
18640 destination operand. */
18641 if (regno0 == regno1 || regno0 == regno2)
18642 return false;
18643 else
18644 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18647 /* Return true if we should emit lea instruction instead of mov
18648 instruction. */
18650 bool
18651 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18653 unsigned int regno0, regno1;
18655 /* Check if we need to optimize. */
18656 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18657 return false;
18659 /* Use lea for reg to reg moves only. */
18660 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18661 return false;
18663 regno0 = true_regnum (operands[0]);
18664 regno1 = true_regnum (operands[1]);
18666 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18669 /* Return true if we need to split lea into a sequence of
18670 instructions to avoid AGU stalls. */
18672 bool
18673 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18675 unsigned int regno0, regno1, regno2;
18676 int split_cost;
18677 struct ix86_address parts;
18678 int ok;
18680 /* Check we need to optimize. */
18681 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18682 return false;
18684 /* The "at least two components" test below might not catch simple
18685 move or zero extension insns if parts.base is non-NULL and parts.disp
18686 is const0_rtx as the only components in the address, e.g. if the
18687 register is %rbp or %r13. As this test is much cheaper and moves or
18688 zero extensions are the common case, do this check first. */
18689 if (REG_P (operands[1])
18690 || (SImode_address_operand (operands[1], VOIDmode)
18691 && REG_P (XEXP (operands[1], 0))))
18692 return false;
18694 /* Check if it is OK to split here. */
18695 if (!ix86_ok_to_clobber_flags (insn))
18696 return false;
18698 ok = ix86_decompose_address (operands[1], &parts);
18699 gcc_assert (ok);
18701 /* There should be at least two components in the address. */
18702 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18703 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18704 return false;
18706 /* We should not split into add if non legitimate pic
18707 operand is used as displacement. */
18708 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18709 return false;
18711 regno0 = true_regnum (operands[0]) ;
18712 regno1 = INVALID_REGNUM;
18713 regno2 = INVALID_REGNUM;
18715 if (parts.base)
18716 regno1 = true_regnum (parts.base);
18717 if (parts.index)
18718 regno2 = true_regnum (parts.index);
18720 split_cost = 0;
18722 /* Compute how many cycles we will add to execution time
18723 if split lea into a sequence of instructions. */
18724 if (parts.base || parts.index)
18726 /* Have to use mov instruction if non desctructive
18727 destination form is used. */
18728 if (regno1 != regno0 && regno2 != regno0)
18729 split_cost += 1;
18731 /* Have to add index to base if both exist. */
18732 if (parts.base && parts.index)
18733 split_cost += 1;
18735 /* Have to use shift and adds if scale is 2 or greater. */
18736 if (parts.scale > 1)
18738 if (regno0 != regno1)
18739 split_cost += 1;
18740 else if (regno2 == regno0)
18741 split_cost += 4;
18742 else
18743 split_cost += parts.scale;
18746 /* Have to use add instruction with immediate if
18747 disp is non zero. */
18748 if (parts.disp && parts.disp != const0_rtx)
18749 split_cost += 1;
18751 /* Subtract the price of lea. */
18752 split_cost -= 1;
18755 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18756 parts.scale > 1);
18759 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18760 matches destination. RTX includes clobber of FLAGS_REG. */
18762 static void
18763 ix86_emit_binop (enum rtx_code code, machine_mode mode,
18764 rtx dst, rtx src)
18766 rtx op, clob;
18768 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18769 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18771 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18774 /* Return true if regno1 def is nearest to the insn. */
18776 static bool
18777 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18779 rtx_insn *prev = insn;
18780 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18782 if (insn == start)
18783 return false;
18784 while (prev && prev != start)
18786 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18788 prev = PREV_INSN (prev);
18789 continue;
18791 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18792 return true;
18793 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18794 return false;
18795 prev = PREV_INSN (prev);
18798 /* None of the regs is defined in the bb. */
18799 return false;
18802 /* Split lea instructions into a sequence of instructions
18803 which are executed on ALU to avoid AGU stalls.
18804 It is assumed that it is allowed to clobber flags register
18805 at lea position. */
18807 void
18808 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
18810 unsigned int regno0, regno1, regno2;
18811 struct ix86_address parts;
18812 rtx target, tmp;
18813 int ok, adds;
18815 ok = ix86_decompose_address (operands[1], &parts);
18816 gcc_assert (ok);
18818 target = gen_lowpart (mode, operands[0]);
18820 regno0 = true_regnum (target);
18821 regno1 = INVALID_REGNUM;
18822 regno2 = INVALID_REGNUM;
18824 if (parts.base)
18826 parts.base = gen_lowpart (mode, parts.base);
18827 regno1 = true_regnum (parts.base);
18830 if (parts.index)
18832 parts.index = gen_lowpart (mode, parts.index);
18833 regno2 = true_regnum (parts.index);
18836 if (parts.disp)
18837 parts.disp = gen_lowpart (mode, parts.disp);
18839 if (parts.scale > 1)
18841 /* Case r1 = r1 + ... */
18842 if (regno1 == regno0)
18844 /* If we have a case r1 = r1 + C * r2 then we
18845 should use multiplication which is very
18846 expensive. Assume cost model is wrong if we
18847 have such case here. */
18848 gcc_assert (regno2 != regno0);
18850 for (adds = parts.scale; adds > 0; adds--)
18851 ix86_emit_binop (PLUS, mode, target, parts.index);
18853 else
18855 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18856 if (regno0 != regno2)
18857 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18859 /* Use shift for scaling. */
18860 ix86_emit_binop (ASHIFT, mode, target,
18861 GEN_INT (exact_log2 (parts.scale)));
18863 if (parts.base)
18864 ix86_emit_binop (PLUS, mode, target, parts.base);
18866 if (parts.disp && parts.disp != const0_rtx)
18867 ix86_emit_binop (PLUS, mode, target, parts.disp);
18870 else if (!parts.base && !parts.index)
18872 gcc_assert(parts.disp);
18873 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18875 else
18877 if (!parts.base)
18879 if (regno0 != regno2)
18880 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18882 else if (!parts.index)
18884 if (regno0 != regno1)
18885 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18887 else
18889 if (regno0 == regno1)
18890 tmp = parts.index;
18891 else if (regno0 == regno2)
18892 tmp = parts.base;
18893 else
18895 rtx tmp1;
18897 /* Find better operand for SET instruction, depending
18898 on which definition is farther from the insn. */
18899 if (find_nearest_reg_def (insn, regno1, regno2))
18900 tmp = parts.index, tmp1 = parts.base;
18901 else
18902 tmp = parts.base, tmp1 = parts.index;
18904 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18906 if (parts.disp && parts.disp != const0_rtx)
18907 ix86_emit_binop (PLUS, mode, target, parts.disp);
18909 ix86_emit_binop (PLUS, mode, target, tmp1);
18910 return;
18913 ix86_emit_binop (PLUS, mode, target, tmp);
18916 if (parts.disp && parts.disp != const0_rtx)
18917 ix86_emit_binop (PLUS, mode, target, parts.disp);
18921 /* Return true if it is ok to optimize an ADD operation to LEA
18922 operation to avoid flag register consumation. For most processors,
18923 ADD is faster than LEA. For the processors like BONNELL, if the
18924 destination register of LEA holds an actual address which will be
18925 used soon, LEA is better and otherwise ADD is better. */
18927 bool
18928 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
18930 unsigned int regno0 = true_regnum (operands[0]);
18931 unsigned int regno1 = true_regnum (operands[1]);
18932 unsigned int regno2 = true_regnum (operands[2]);
18934 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18935 if (regno0 != regno1 && regno0 != regno2)
18936 return true;
18938 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18939 return false;
18941 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18944 /* Return true if destination reg of SET_BODY is shift count of
18945 USE_BODY. */
18947 static bool
18948 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18950 rtx set_dest;
18951 rtx shift_rtx;
18952 int i;
18954 /* Retrieve destination of SET_BODY. */
18955 switch (GET_CODE (set_body))
18957 case SET:
18958 set_dest = SET_DEST (set_body);
18959 if (!set_dest || !REG_P (set_dest))
18960 return false;
18961 break;
18962 case PARALLEL:
18963 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18964 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18965 use_body))
18966 return true;
18967 default:
18968 return false;
18969 break;
18972 /* Retrieve shift count of USE_BODY. */
18973 switch (GET_CODE (use_body))
18975 case SET:
18976 shift_rtx = XEXP (use_body, 1);
18977 break;
18978 case PARALLEL:
18979 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18980 if (ix86_dep_by_shift_count_body (set_body,
18981 XVECEXP (use_body, 0, i)))
18982 return true;
18983 default:
18984 return false;
18985 break;
18988 if (shift_rtx
18989 && (GET_CODE (shift_rtx) == ASHIFT
18990 || GET_CODE (shift_rtx) == LSHIFTRT
18991 || GET_CODE (shift_rtx) == ASHIFTRT
18992 || GET_CODE (shift_rtx) == ROTATE
18993 || GET_CODE (shift_rtx) == ROTATERT))
18995 rtx shift_count = XEXP (shift_rtx, 1);
18997 /* Return true if shift count is dest of SET_BODY. */
18998 if (REG_P (shift_count))
19000 /* Add check since it can be invoked before register
19001 allocation in pre-reload schedule. */
19002 if (reload_completed
19003 && true_regnum (set_dest) == true_regnum (shift_count))
19004 return true;
19005 else if (REGNO(set_dest) == REGNO(shift_count))
19006 return true;
19010 return false;
19013 /* Return true if destination reg of SET_INSN is shift count of
19014 USE_INSN. */
19016 bool
19017 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
19019 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
19020 PATTERN (use_insn));
19023 /* Return TRUE or FALSE depending on whether the unary operator meets the
19024 appropriate constraints. */
19026 bool
19027 ix86_unary_operator_ok (enum rtx_code,
19028 machine_mode,
19029 rtx operands[2])
19031 /* If one of operands is memory, source and destination must match. */
19032 if ((MEM_P (operands[0])
19033 || MEM_P (operands[1]))
19034 && ! rtx_equal_p (operands[0], operands[1]))
19035 return false;
19036 return true;
19039 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
19040 are ok, keeping in mind the possible movddup alternative. */
19042 bool
19043 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
19045 if (MEM_P (operands[0]))
19046 return rtx_equal_p (operands[0], operands[1 + high]);
19047 if (MEM_P (operands[1]) && MEM_P (operands[2]))
19048 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
19049 return true;
19052 /* Post-reload splitter for converting an SF or DFmode value in an
19053 SSE register into an unsigned SImode. */
19055 void
19056 ix86_split_convert_uns_si_sse (rtx operands[])
19058 machine_mode vecmode;
19059 rtx value, large, zero_or_two31, input, two31, x;
19061 large = operands[1];
19062 zero_or_two31 = operands[2];
19063 input = operands[3];
19064 two31 = operands[4];
19065 vecmode = GET_MODE (large);
19066 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
19068 /* Load up the value into the low element. We must ensure that the other
19069 elements are valid floats -- zero is the easiest such value. */
19070 if (MEM_P (input))
19072 if (vecmode == V4SFmode)
19073 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
19074 else
19075 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
19077 else
19079 input = gen_rtx_REG (vecmode, REGNO (input));
19080 emit_move_insn (value, CONST0_RTX (vecmode));
19081 if (vecmode == V4SFmode)
19082 emit_insn (gen_sse_movss (value, value, input));
19083 else
19084 emit_insn (gen_sse2_movsd (value, value, input));
19087 emit_move_insn (large, two31);
19088 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
19090 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
19091 emit_insn (gen_rtx_SET (VOIDmode, large, x));
19093 x = gen_rtx_AND (vecmode, zero_or_two31, large);
19094 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
19096 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
19097 emit_insn (gen_rtx_SET (VOIDmode, value, x));
19099 large = gen_rtx_REG (V4SImode, REGNO (large));
19100 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
19102 x = gen_rtx_REG (V4SImode, REGNO (value));
19103 if (vecmode == V4SFmode)
19104 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
19105 else
19106 emit_insn (gen_sse2_cvttpd2dq (x, value));
19107 value = x;
19109 emit_insn (gen_xorv4si3 (value, value, large));
19112 /* Convert an unsigned DImode value into a DFmode, using only SSE.
19113 Expects the 64-bit DImode to be supplied in a pair of integral
19114 registers. Requires SSE2; will use SSE3 if available. For x86_32,
19115 -mfpmath=sse, !optimize_size only. */
19117 void
19118 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
19120 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
19121 rtx int_xmm, fp_xmm;
19122 rtx biases, exponents;
19123 rtx x;
19125 int_xmm = gen_reg_rtx (V4SImode);
19126 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
19127 emit_insn (gen_movdi_to_sse (int_xmm, input));
19128 else if (TARGET_SSE_SPLIT_REGS)
19130 emit_clobber (int_xmm);
19131 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
19133 else
19135 x = gen_reg_rtx (V2DImode);
19136 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
19137 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
19140 x = gen_rtx_CONST_VECTOR (V4SImode,
19141 gen_rtvec (4, GEN_INT (0x43300000UL),
19142 GEN_INT (0x45300000UL),
19143 const0_rtx, const0_rtx));
19144 exponents = validize_mem (force_const_mem (V4SImode, x));
19146 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
19147 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
19149 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
19150 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
19151 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
19152 (0x1.0p84 + double(fp_value_hi_xmm)).
19153 Note these exponents differ by 32. */
19155 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
19157 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
19158 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
19159 real_ldexp (&bias_lo_rvt, &dconst1, 52);
19160 real_ldexp (&bias_hi_rvt, &dconst1, 84);
19161 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
19162 x = const_double_from_real_value (bias_hi_rvt, DFmode);
19163 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
19164 biases = validize_mem (force_const_mem (V2DFmode, biases));
19165 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
19167 /* Add the upper and lower DFmode values together. */
19168 if (TARGET_SSE3)
19169 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
19170 else
19172 x = copy_to_mode_reg (V2DFmode, fp_xmm);
19173 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
19174 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
19177 ix86_expand_vector_extract (false, target, fp_xmm, 0);
19180 /* Not used, but eases macroization of patterns. */
19181 void
19182 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
19184 gcc_unreachable ();
19187 /* Convert an unsigned SImode value into a DFmode. Only currently used
19188 for SSE, but applicable anywhere. */
19190 void
19191 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
19193 REAL_VALUE_TYPE TWO31r;
19194 rtx x, fp;
19196 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
19197 NULL, 1, OPTAB_DIRECT);
19199 fp = gen_reg_rtx (DFmode);
19200 emit_insn (gen_floatsidf2 (fp, x));
19202 real_ldexp (&TWO31r, &dconst1, 31);
19203 x = const_double_from_real_value (TWO31r, DFmode);
19205 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
19206 if (x != target)
19207 emit_move_insn (target, x);
19210 /* Convert a signed DImode value into a DFmode. Only used for SSE in
19211 32-bit mode; otherwise we have a direct convert instruction. */
19213 void
19214 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
19216 REAL_VALUE_TYPE TWO32r;
19217 rtx fp_lo, fp_hi, x;
19219 fp_lo = gen_reg_rtx (DFmode);
19220 fp_hi = gen_reg_rtx (DFmode);
19222 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
19224 real_ldexp (&TWO32r, &dconst1, 32);
19225 x = const_double_from_real_value (TWO32r, DFmode);
19226 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
19228 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
19230 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
19231 0, OPTAB_DIRECT);
19232 if (x != target)
19233 emit_move_insn (target, x);
19236 /* Convert an unsigned SImode value into a SFmode, using only SSE.
19237 For x86_32, -mfpmath=sse, !optimize_size only. */
19238 void
19239 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19241 REAL_VALUE_TYPE ONE16r;
19242 rtx fp_hi, fp_lo, int_hi, int_lo, x;
19244 real_ldexp (&ONE16r, &dconst1, 16);
19245 x = const_double_from_real_value (ONE16r, SFmode);
19246 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19247 NULL, 0, OPTAB_DIRECT);
19248 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19249 NULL, 0, OPTAB_DIRECT);
19250 fp_hi = gen_reg_rtx (SFmode);
19251 fp_lo = gen_reg_rtx (SFmode);
19252 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19253 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19254 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19255 0, OPTAB_DIRECT);
19256 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19257 0, OPTAB_DIRECT);
19258 if (!rtx_equal_p (target, fp_hi))
19259 emit_move_insn (target, fp_hi);
19262 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
19263 a vector of unsigned ints VAL to vector of floats TARGET. */
19265 void
19266 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19268 rtx tmp[8];
19269 REAL_VALUE_TYPE TWO16r;
19270 machine_mode intmode = GET_MODE (val);
19271 machine_mode fltmode = GET_MODE (target);
19272 rtx (*cvt) (rtx, rtx);
19274 if (intmode == V4SImode)
19275 cvt = gen_floatv4siv4sf2;
19276 else
19277 cvt = gen_floatv8siv8sf2;
19278 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19279 tmp[0] = force_reg (intmode, tmp[0]);
19280 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19281 OPTAB_DIRECT);
19282 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19283 NULL_RTX, 1, OPTAB_DIRECT);
19284 tmp[3] = gen_reg_rtx (fltmode);
19285 emit_insn (cvt (tmp[3], tmp[1]));
19286 tmp[4] = gen_reg_rtx (fltmode);
19287 emit_insn (cvt (tmp[4], tmp[2]));
19288 real_ldexp (&TWO16r, &dconst1, 16);
19289 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19290 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19291 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19292 OPTAB_DIRECT);
19293 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19294 OPTAB_DIRECT);
19295 if (tmp[7] != target)
19296 emit_move_insn (target, tmp[7]);
19299 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19300 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19301 This is done by doing just signed conversion if < 0x1p31, and otherwise by
19302 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
19305 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19307 REAL_VALUE_TYPE TWO31r;
19308 rtx two31r, tmp[4];
19309 machine_mode mode = GET_MODE (val);
19310 machine_mode scalarmode = GET_MODE_INNER (mode);
19311 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19312 rtx (*cmp) (rtx, rtx, rtx, rtx);
19313 int i;
19315 for (i = 0; i < 3; i++)
19316 tmp[i] = gen_reg_rtx (mode);
19317 real_ldexp (&TWO31r, &dconst1, 31);
19318 two31r = const_double_from_real_value (TWO31r, scalarmode);
19319 two31r = ix86_build_const_vector (mode, 1, two31r);
19320 two31r = force_reg (mode, two31r);
19321 switch (mode)
19323 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19324 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19325 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19326 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19327 default: gcc_unreachable ();
19329 tmp[3] = gen_rtx_LE (mode, two31r, val);
19330 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19331 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19332 0, OPTAB_DIRECT);
19333 if (intmode == V4SImode || TARGET_AVX2)
19334 *xorp = expand_simple_binop (intmode, ASHIFT,
19335 gen_lowpart (intmode, tmp[0]),
19336 GEN_INT (31), NULL_RTX, 0,
19337 OPTAB_DIRECT);
19338 else
19340 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
19341 two31 = ix86_build_const_vector (intmode, 1, two31);
19342 *xorp = expand_simple_binop (intmode, AND,
19343 gen_lowpart (intmode, tmp[0]),
19344 two31, NULL_RTX, 0,
19345 OPTAB_DIRECT);
19347 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19348 0, OPTAB_DIRECT);
19351 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
19352 then replicate the value for all elements of the vector
19353 register. */
19356 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
19358 int i, n_elt;
19359 rtvec v;
19360 machine_mode scalar_mode;
19362 switch (mode)
19364 case V64QImode:
19365 case V32QImode:
19366 case V16QImode:
19367 case V32HImode:
19368 case V16HImode:
19369 case V8HImode:
19370 case V16SImode:
19371 case V8SImode:
19372 case V4SImode:
19373 case V8DImode:
19374 case V4DImode:
19375 case V2DImode:
19376 gcc_assert (vect);
19377 case V16SFmode:
19378 case V8SFmode:
19379 case V4SFmode:
19380 case V8DFmode:
19381 case V4DFmode:
19382 case V2DFmode:
19383 n_elt = GET_MODE_NUNITS (mode);
19384 v = rtvec_alloc (n_elt);
19385 scalar_mode = GET_MODE_INNER (mode);
19387 RTVEC_ELT (v, 0) = value;
19389 for (i = 1; i < n_elt; ++i)
19390 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19392 return gen_rtx_CONST_VECTOR (mode, v);
19394 default:
19395 gcc_unreachable ();
19399 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19400 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19401 for an SSE register. If VECT is true, then replicate the mask for
19402 all elements of the vector register. If INVERT is true, then create
19403 a mask excluding the sign bit. */
19406 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
19408 machine_mode vec_mode, imode;
19409 HOST_WIDE_INT hi, lo;
19410 int shift = 63;
19411 rtx v;
19412 rtx mask;
19414 /* Find the sign bit, sign extended to 2*HWI. */
19415 switch (mode)
19417 case V16SImode:
19418 case V16SFmode:
19419 case V8SImode:
19420 case V4SImode:
19421 case V8SFmode:
19422 case V4SFmode:
19423 vec_mode = mode;
19424 mode = GET_MODE_INNER (mode);
19425 imode = SImode;
19426 lo = 0x80000000, hi = lo < 0;
19427 break;
19429 case V8DImode:
19430 case V4DImode:
19431 case V2DImode:
19432 case V8DFmode:
19433 case V4DFmode:
19434 case V2DFmode:
19435 vec_mode = mode;
19436 mode = GET_MODE_INNER (mode);
19437 imode = DImode;
19438 if (HOST_BITS_PER_WIDE_INT >= 64)
19439 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
19440 else
19441 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19442 break;
19444 case TImode:
19445 case TFmode:
19446 vec_mode = VOIDmode;
19447 if (HOST_BITS_PER_WIDE_INT >= 64)
19449 imode = TImode;
19450 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19452 else
19454 rtvec vec;
19456 imode = DImode;
19457 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19459 if (invert)
19461 lo = ~lo, hi = ~hi;
19462 v = constm1_rtx;
19464 else
19465 v = const0_rtx;
19467 mask = immed_double_const (lo, hi, imode);
19469 vec = gen_rtvec (2, v, mask);
19470 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19471 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19473 return v;
19475 break;
19477 default:
19478 gcc_unreachable ();
19481 if (invert)
19482 lo = ~lo, hi = ~hi;
19484 /* Force this value into the low part of a fp vector constant. */
19485 mask = immed_double_const (lo, hi, imode);
19486 mask = gen_lowpart (mode, mask);
19488 if (vec_mode == VOIDmode)
19489 return force_reg (mode, mask);
19491 v = ix86_build_const_vector (vec_mode, vect, mask);
19492 return force_reg (vec_mode, v);
19495 /* Generate code for floating point ABS or NEG. */
19497 void
19498 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
19499 rtx operands[])
19501 rtx mask, set, dst, src;
19502 bool use_sse = false;
19503 bool vector_mode = VECTOR_MODE_P (mode);
19504 machine_mode vmode = mode;
19506 if (vector_mode)
19507 use_sse = true;
19508 else if (mode == TFmode)
19509 use_sse = true;
19510 else if (TARGET_SSE_MATH)
19512 use_sse = SSE_FLOAT_MODE_P (mode);
19513 if (mode == SFmode)
19514 vmode = V4SFmode;
19515 else if (mode == DFmode)
19516 vmode = V2DFmode;
19519 /* NEG and ABS performed with SSE use bitwise mask operations.
19520 Create the appropriate mask now. */
19521 if (use_sse)
19522 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19523 else
19524 mask = NULL_RTX;
19526 dst = operands[0];
19527 src = operands[1];
19529 set = gen_rtx_fmt_e (code, mode, src);
19530 set = gen_rtx_SET (VOIDmode, dst, set);
19532 if (mask)
19534 rtx use, clob;
19535 rtvec par;
19537 use = gen_rtx_USE (VOIDmode, mask);
19538 if (vector_mode)
19539 par = gen_rtvec (2, set, use);
19540 else
19542 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19543 par = gen_rtvec (3, set, use, clob);
19545 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19547 else
19548 emit_insn (set);
19551 /* Expand a copysign operation. Special case operand 0 being a constant. */
19553 void
19554 ix86_expand_copysign (rtx operands[])
19556 machine_mode mode, vmode;
19557 rtx dest, op0, op1, mask, nmask;
19559 dest = operands[0];
19560 op0 = operands[1];
19561 op1 = operands[2];
19563 mode = GET_MODE (dest);
19565 if (mode == SFmode)
19566 vmode = V4SFmode;
19567 else if (mode == DFmode)
19568 vmode = V2DFmode;
19569 else
19570 vmode = mode;
19572 if (GET_CODE (op0) == CONST_DOUBLE)
19574 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19576 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19577 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19579 if (mode == SFmode || mode == DFmode)
19581 if (op0 == CONST0_RTX (mode))
19582 op0 = CONST0_RTX (vmode);
19583 else
19585 rtx v = ix86_build_const_vector (vmode, false, op0);
19587 op0 = force_reg (vmode, v);
19590 else if (op0 != CONST0_RTX (mode))
19591 op0 = force_reg (mode, op0);
19593 mask = ix86_build_signbit_mask (vmode, 0, 0);
19595 if (mode == SFmode)
19596 copysign_insn = gen_copysignsf3_const;
19597 else if (mode == DFmode)
19598 copysign_insn = gen_copysigndf3_const;
19599 else
19600 copysign_insn = gen_copysigntf3_const;
19602 emit_insn (copysign_insn (dest, op0, op1, mask));
19604 else
19606 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19608 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19609 mask = ix86_build_signbit_mask (vmode, 0, 0);
19611 if (mode == SFmode)
19612 copysign_insn = gen_copysignsf3_var;
19613 else if (mode == DFmode)
19614 copysign_insn = gen_copysigndf3_var;
19615 else
19616 copysign_insn = gen_copysigntf3_var;
19618 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19622 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19623 be a constant, and so has already been expanded into a vector constant. */
19625 void
19626 ix86_split_copysign_const (rtx operands[])
19628 machine_mode mode, vmode;
19629 rtx dest, op0, mask, x;
19631 dest = operands[0];
19632 op0 = operands[1];
19633 mask = operands[3];
19635 mode = GET_MODE (dest);
19636 vmode = GET_MODE (mask);
19638 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19639 x = gen_rtx_AND (vmode, dest, mask);
19640 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19642 if (op0 != CONST0_RTX (vmode))
19644 x = gen_rtx_IOR (vmode, dest, op0);
19645 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19649 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19650 so we have to do two masks. */
19652 void
19653 ix86_split_copysign_var (rtx operands[])
19655 machine_mode mode, vmode;
19656 rtx dest, scratch, op0, op1, mask, nmask, x;
19658 dest = operands[0];
19659 scratch = operands[1];
19660 op0 = operands[2];
19661 op1 = operands[3];
19662 nmask = operands[4];
19663 mask = operands[5];
19665 mode = GET_MODE (dest);
19666 vmode = GET_MODE (mask);
19668 if (rtx_equal_p (op0, op1))
19670 /* Shouldn't happen often (it's useless, obviously), but when it does
19671 we'd generate incorrect code if we continue below. */
19672 emit_move_insn (dest, op0);
19673 return;
19676 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19678 gcc_assert (REGNO (op1) == REGNO (scratch));
19680 x = gen_rtx_AND (vmode, scratch, mask);
19681 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19683 dest = mask;
19684 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19685 x = gen_rtx_NOT (vmode, dest);
19686 x = gen_rtx_AND (vmode, x, op0);
19687 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19689 else
19691 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19693 x = gen_rtx_AND (vmode, scratch, mask);
19695 else /* alternative 2,4 */
19697 gcc_assert (REGNO (mask) == REGNO (scratch));
19698 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19699 x = gen_rtx_AND (vmode, scratch, op1);
19701 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19703 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19705 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19706 x = gen_rtx_AND (vmode, dest, nmask);
19708 else /* alternative 3,4 */
19710 gcc_assert (REGNO (nmask) == REGNO (dest));
19711 dest = nmask;
19712 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19713 x = gen_rtx_AND (vmode, dest, op0);
19715 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19718 x = gen_rtx_IOR (vmode, dest, scratch);
19719 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19722 /* Return TRUE or FALSE depending on whether the first SET in INSN
19723 has source and destination with matching CC modes, and that the
19724 CC mode is at least as constrained as REQ_MODE. */
19726 bool
19727 ix86_match_ccmode (rtx insn, machine_mode req_mode)
19729 rtx set;
19730 machine_mode set_mode;
19732 set = PATTERN (insn);
19733 if (GET_CODE (set) == PARALLEL)
19734 set = XVECEXP (set, 0, 0);
19735 gcc_assert (GET_CODE (set) == SET);
19736 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19738 set_mode = GET_MODE (SET_DEST (set));
19739 switch (set_mode)
19741 case CCNOmode:
19742 if (req_mode != CCNOmode
19743 && (req_mode != CCmode
19744 || XEXP (SET_SRC (set), 1) != const0_rtx))
19745 return false;
19746 break;
19747 case CCmode:
19748 if (req_mode == CCGCmode)
19749 return false;
19750 /* FALLTHRU */
19751 case CCGCmode:
19752 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19753 return false;
19754 /* FALLTHRU */
19755 case CCGOCmode:
19756 if (req_mode == CCZmode)
19757 return false;
19758 /* FALLTHRU */
19759 case CCZmode:
19760 break;
19762 case CCAmode:
19763 case CCCmode:
19764 case CCOmode:
19765 case CCSmode:
19766 if (set_mode != req_mode)
19767 return false;
19768 break;
19770 default:
19771 gcc_unreachable ();
19774 return GET_MODE (SET_SRC (set)) == set_mode;
19777 /* Generate insn patterns to do an integer compare of OPERANDS. */
19779 static rtx
19780 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19782 machine_mode cmpmode;
19783 rtx tmp, flags;
19785 cmpmode = SELECT_CC_MODE (code, op0, op1);
19786 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19788 /* This is very simple, but making the interface the same as in the
19789 FP case makes the rest of the code easier. */
19790 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19791 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19793 /* Return the test that should be put into the flags user, i.e.
19794 the bcc, scc, or cmov instruction. */
19795 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19798 /* Figure out whether to use ordered or unordered fp comparisons.
19799 Return the appropriate mode to use. */
19801 machine_mode
19802 ix86_fp_compare_mode (enum rtx_code)
19804 /* ??? In order to make all comparisons reversible, we do all comparisons
19805 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19806 all forms trapping and nontrapping comparisons, we can make inequality
19807 comparisons trapping again, since it results in better code when using
19808 FCOM based compares. */
19809 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19812 machine_mode
19813 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19815 machine_mode mode = GET_MODE (op0);
19817 if (SCALAR_FLOAT_MODE_P (mode))
19819 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19820 return ix86_fp_compare_mode (code);
19823 switch (code)
19825 /* Only zero flag is needed. */
19826 case EQ: /* ZF=0 */
19827 case NE: /* ZF!=0 */
19828 return CCZmode;
19829 /* Codes needing carry flag. */
19830 case GEU: /* CF=0 */
19831 case LTU: /* CF=1 */
19832 /* Detect overflow checks. They need just the carry flag. */
19833 if (GET_CODE (op0) == PLUS
19834 && rtx_equal_p (op1, XEXP (op0, 0)))
19835 return CCCmode;
19836 else
19837 return CCmode;
19838 case GTU: /* CF=0 & ZF=0 */
19839 case LEU: /* CF=1 | ZF=1 */
19840 return CCmode;
19841 /* Codes possibly doable only with sign flag when
19842 comparing against zero. */
19843 case GE: /* SF=OF or SF=0 */
19844 case LT: /* SF<>OF or SF=1 */
19845 if (op1 == const0_rtx)
19846 return CCGOCmode;
19847 else
19848 /* For other cases Carry flag is not required. */
19849 return CCGCmode;
19850 /* Codes doable only with sign flag when comparing
19851 against zero, but we miss jump instruction for it
19852 so we need to use relational tests against overflow
19853 that thus needs to be zero. */
19854 case GT: /* ZF=0 & SF=OF */
19855 case LE: /* ZF=1 | SF<>OF */
19856 if (op1 == const0_rtx)
19857 return CCNOmode;
19858 else
19859 return CCGCmode;
19860 /* strcmp pattern do (use flags) and combine may ask us for proper
19861 mode. */
19862 case USE:
19863 return CCmode;
19864 default:
19865 gcc_unreachable ();
19869 /* Return the fixed registers used for condition codes. */
19871 static bool
19872 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19874 *p1 = FLAGS_REG;
19875 *p2 = FPSR_REG;
19876 return true;
19879 /* If two condition code modes are compatible, return a condition code
19880 mode which is compatible with both. Otherwise, return
19881 VOIDmode. */
19883 static machine_mode
19884 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
19886 if (m1 == m2)
19887 return m1;
19889 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19890 return VOIDmode;
19892 if ((m1 == CCGCmode && m2 == CCGOCmode)
19893 || (m1 == CCGOCmode && m2 == CCGCmode))
19894 return CCGCmode;
19896 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19897 return m2;
19898 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19899 return m1;
19901 switch (m1)
19903 default:
19904 gcc_unreachable ();
19906 case CCmode:
19907 case CCGCmode:
19908 case CCGOCmode:
19909 case CCNOmode:
19910 case CCAmode:
19911 case CCCmode:
19912 case CCOmode:
19913 case CCSmode:
19914 case CCZmode:
19915 switch (m2)
19917 default:
19918 return VOIDmode;
19920 case CCmode:
19921 case CCGCmode:
19922 case CCGOCmode:
19923 case CCNOmode:
19924 case CCAmode:
19925 case CCCmode:
19926 case CCOmode:
19927 case CCSmode:
19928 case CCZmode:
19929 return CCmode;
19932 case CCFPmode:
19933 case CCFPUmode:
19934 /* These are only compatible with themselves, which we already
19935 checked above. */
19936 return VOIDmode;
19941 /* Return a comparison we can do and that it is equivalent to
19942 swap_condition (code) apart possibly from orderedness.
19943 But, never change orderedness if TARGET_IEEE_FP, returning
19944 UNKNOWN in that case if necessary. */
19946 static enum rtx_code
19947 ix86_fp_swap_condition (enum rtx_code code)
19949 switch (code)
19951 case GT: /* GTU - CF=0 & ZF=0 */
19952 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19953 case GE: /* GEU - CF=0 */
19954 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19955 case UNLT: /* LTU - CF=1 */
19956 return TARGET_IEEE_FP ? UNKNOWN : GT;
19957 case UNLE: /* LEU - CF=1 | ZF=1 */
19958 return TARGET_IEEE_FP ? UNKNOWN : GE;
19959 default:
19960 return swap_condition (code);
19964 /* Return cost of comparison CODE using the best strategy for performance.
19965 All following functions do use number of instructions as a cost metrics.
19966 In future this should be tweaked to compute bytes for optimize_size and
19967 take into account performance of various instructions on various CPUs. */
19969 static int
19970 ix86_fp_comparison_cost (enum rtx_code code)
19972 int arith_cost;
19974 /* The cost of code using bit-twiddling on %ah. */
19975 switch (code)
19977 case UNLE:
19978 case UNLT:
19979 case LTGT:
19980 case GT:
19981 case GE:
19982 case UNORDERED:
19983 case ORDERED:
19984 case UNEQ:
19985 arith_cost = 4;
19986 break;
19987 case LT:
19988 case NE:
19989 case EQ:
19990 case UNGE:
19991 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19992 break;
19993 case LE:
19994 case UNGT:
19995 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19996 break;
19997 default:
19998 gcc_unreachable ();
20001 switch (ix86_fp_comparison_strategy (code))
20003 case IX86_FPCMP_COMI:
20004 return arith_cost > 4 ? 3 : 2;
20005 case IX86_FPCMP_SAHF:
20006 return arith_cost > 4 ? 4 : 3;
20007 default:
20008 return arith_cost;
20012 /* Return strategy to use for floating-point. We assume that fcomi is always
20013 preferrable where available, since that is also true when looking at size
20014 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
20016 enum ix86_fpcmp_strategy
20017 ix86_fp_comparison_strategy (enum rtx_code)
20019 /* Do fcomi/sahf based test when profitable. */
20021 if (TARGET_CMOVE)
20022 return IX86_FPCMP_COMI;
20024 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
20025 return IX86_FPCMP_SAHF;
20027 return IX86_FPCMP_ARITH;
20030 /* Swap, force into registers, or otherwise massage the two operands
20031 to a fp comparison. The operands are updated in place; the new
20032 comparison code is returned. */
20034 static enum rtx_code
20035 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
20037 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
20038 rtx op0 = *pop0, op1 = *pop1;
20039 machine_mode op_mode = GET_MODE (op0);
20040 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
20042 /* All of the unordered compare instructions only work on registers.
20043 The same is true of the fcomi compare instructions. The XFmode
20044 compare instructions require registers except when comparing
20045 against zero or when converting operand 1 from fixed point to
20046 floating point. */
20048 if (!is_sse
20049 && (fpcmp_mode == CCFPUmode
20050 || (op_mode == XFmode
20051 && ! (standard_80387_constant_p (op0) == 1
20052 || standard_80387_constant_p (op1) == 1)
20053 && GET_CODE (op1) != FLOAT)
20054 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
20056 op0 = force_reg (op_mode, op0);
20057 op1 = force_reg (op_mode, op1);
20059 else
20061 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
20062 things around if they appear profitable, otherwise force op0
20063 into a register. */
20065 if (standard_80387_constant_p (op0) == 0
20066 || (MEM_P (op0)
20067 && ! (standard_80387_constant_p (op1) == 0
20068 || MEM_P (op1))))
20070 enum rtx_code new_code = ix86_fp_swap_condition (code);
20071 if (new_code != UNKNOWN)
20073 std::swap (op0, op1);
20074 code = new_code;
20078 if (!REG_P (op0))
20079 op0 = force_reg (op_mode, op0);
20081 if (CONSTANT_P (op1))
20083 int tmp = standard_80387_constant_p (op1);
20084 if (tmp == 0)
20085 op1 = validize_mem (force_const_mem (op_mode, op1));
20086 else if (tmp == 1)
20088 if (TARGET_CMOVE)
20089 op1 = force_reg (op_mode, op1);
20091 else
20092 op1 = force_reg (op_mode, op1);
20096 /* Try to rearrange the comparison to make it cheaper. */
20097 if (ix86_fp_comparison_cost (code)
20098 > ix86_fp_comparison_cost (swap_condition (code))
20099 && (REG_P (op1) || can_create_pseudo_p ()))
20101 std::swap (op0, op1);
20102 code = swap_condition (code);
20103 if (!REG_P (op0))
20104 op0 = force_reg (op_mode, op0);
20107 *pop0 = op0;
20108 *pop1 = op1;
20109 return code;
20112 /* Convert comparison codes we use to represent FP comparison to integer
20113 code that will result in proper branch. Return UNKNOWN if no such code
20114 is available. */
20116 enum rtx_code
20117 ix86_fp_compare_code_to_integer (enum rtx_code code)
20119 switch (code)
20121 case GT:
20122 return GTU;
20123 case GE:
20124 return GEU;
20125 case ORDERED:
20126 case UNORDERED:
20127 return code;
20128 break;
20129 case UNEQ:
20130 return EQ;
20131 break;
20132 case UNLT:
20133 return LTU;
20134 break;
20135 case UNLE:
20136 return LEU;
20137 break;
20138 case LTGT:
20139 return NE;
20140 break;
20141 default:
20142 return UNKNOWN;
20146 /* Generate insn patterns to do a floating point compare of OPERANDS. */
20148 static rtx
20149 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
20151 machine_mode fpcmp_mode, intcmp_mode;
20152 rtx tmp, tmp2;
20154 fpcmp_mode = ix86_fp_compare_mode (code);
20155 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
20157 /* Do fcomi/sahf based test when profitable. */
20158 switch (ix86_fp_comparison_strategy (code))
20160 case IX86_FPCMP_COMI:
20161 intcmp_mode = fpcmp_mode;
20162 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20163 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20164 tmp);
20165 emit_insn (tmp);
20166 break;
20168 case IX86_FPCMP_SAHF:
20169 intcmp_mode = fpcmp_mode;
20170 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20171 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20172 tmp);
20174 if (!scratch)
20175 scratch = gen_reg_rtx (HImode);
20176 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
20177 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
20178 break;
20180 case IX86_FPCMP_ARITH:
20181 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
20182 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20183 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
20184 if (!scratch)
20185 scratch = gen_reg_rtx (HImode);
20186 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
20188 /* In the unordered case, we have to check C2 for NaN's, which
20189 doesn't happen to work out to anything nice combination-wise.
20190 So do some bit twiddling on the value we've got in AH to come
20191 up with an appropriate set of condition codes. */
20193 intcmp_mode = CCNOmode;
20194 switch (code)
20196 case GT:
20197 case UNGT:
20198 if (code == GT || !TARGET_IEEE_FP)
20200 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20201 code = EQ;
20203 else
20205 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20206 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20207 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
20208 intcmp_mode = CCmode;
20209 code = GEU;
20211 break;
20212 case LT:
20213 case UNLT:
20214 if (code == LT && TARGET_IEEE_FP)
20216 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20217 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
20218 intcmp_mode = CCmode;
20219 code = EQ;
20221 else
20223 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
20224 code = NE;
20226 break;
20227 case GE:
20228 case UNGE:
20229 if (code == GE || !TARGET_IEEE_FP)
20231 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
20232 code = EQ;
20234 else
20236 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20237 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20238 code = NE;
20240 break;
20241 case LE:
20242 case UNLE:
20243 if (code == LE && TARGET_IEEE_FP)
20245 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20246 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20247 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20248 intcmp_mode = CCmode;
20249 code = LTU;
20251 else
20253 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20254 code = NE;
20256 break;
20257 case EQ:
20258 case UNEQ:
20259 if (code == EQ && TARGET_IEEE_FP)
20261 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20262 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20263 intcmp_mode = CCmode;
20264 code = EQ;
20266 else
20268 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20269 code = NE;
20271 break;
20272 case NE:
20273 case LTGT:
20274 if (code == NE && TARGET_IEEE_FP)
20276 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20277 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20278 GEN_INT (0x40)));
20279 code = NE;
20281 else
20283 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20284 code = EQ;
20286 break;
20288 case UNORDERED:
20289 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20290 code = NE;
20291 break;
20292 case ORDERED:
20293 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20294 code = EQ;
20295 break;
20297 default:
20298 gcc_unreachable ();
20300 break;
20302 default:
20303 gcc_unreachable();
20306 /* Return the test that should be put into the flags user, i.e.
20307 the bcc, scc, or cmov instruction. */
20308 return gen_rtx_fmt_ee (code, VOIDmode,
20309 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20310 const0_rtx);
20313 static rtx
20314 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20316 rtx ret;
20318 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20319 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20321 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20323 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20324 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20326 else
20327 ret = ix86_expand_int_compare (code, op0, op1);
20329 return ret;
20332 void
20333 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20335 machine_mode mode = GET_MODE (op0);
20336 rtx tmp;
20338 switch (mode)
20340 case SFmode:
20341 case DFmode:
20342 case XFmode:
20343 case QImode:
20344 case HImode:
20345 case SImode:
20346 simple:
20347 tmp = ix86_expand_compare (code, op0, op1);
20348 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20349 gen_rtx_LABEL_REF (VOIDmode, label),
20350 pc_rtx);
20351 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20352 return;
20354 case DImode:
20355 if (TARGET_64BIT)
20356 goto simple;
20357 case TImode:
20358 /* Expand DImode branch into multiple compare+branch. */
20360 rtx lo[2], hi[2];
20361 rtx_code_label *label2;
20362 enum rtx_code code1, code2, code3;
20363 machine_mode submode;
20365 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20367 std::swap (op0, op1);
20368 code = swap_condition (code);
20371 split_double_mode (mode, &op0, 1, lo+0, hi+0);
20372 split_double_mode (mode, &op1, 1, lo+1, hi+1);
20374 submode = mode == DImode ? SImode : DImode;
20376 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20377 avoid two branches. This costs one extra insn, so disable when
20378 optimizing for size. */
20380 if ((code == EQ || code == NE)
20381 && (!optimize_insn_for_size_p ()
20382 || hi[1] == const0_rtx || lo[1] == const0_rtx))
20384 rtx xor0, xor1;
20386 xor1 = hi[0];
20387 if (hi[1] != const0_rtx)
20388 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20389 NULL_RTX, 0, OPTAB_WIDEN);
20391 xor0 = lo[0];
20392 if (lo[1] != const0_rtx)
20393 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20394 NULL_RTX, 0, OPTAB_WIDEN);
20396 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20397 NULL_RTX, 0, OPTAB_WIDEN);
20399 ix86_expand_branch (code, tmp, const0_rtx, label);
20400 return;
20403 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20404 op1 is a constant and the low word is zero, then we can just
20405 examine the high word. Similarly for low word -1 and
20406 less-or-equal-than or greater-than. */
20408 if (CONST_INT_P (hi[1]))
20409 switch (code)
20411 case LT: case LTU: case GE: case GEU:
20412 if (lo[1] == const0_rtx)
20414 ix86_expand_branch (code, hi[0], hi[1], label);
20415 return;
20417 break;
20418 case LE: case LEU: case GT: case GTU:
20419 if (lo[1] == constm1_rtx)
20421 ix86_expand_branch (code, hi[0], hi[1], label);
20422 return;
20424 break;
20425 default:
20426 break;
20429 /* Otherwise, we need two or three jumps. */
20431 label2 = gen_label_rtx ();
20433 code1 = code;
20434 code2 = swap_condition (code);
20435 code3 = unsigned_condition (code);
20437 switch (code)
20439 case LT: case GT: case LTU: case GTU:
20440 break;
20442 case LE: code1 = LT; code2 = GT; break;
20443 case GE: code1 = GT; code2 = LT; break;
20444 case LEU: code1 = LTU; code2 = GTU; break;
20445 case GEU: code1 = GTU; code2 = LTU; break;
20447 case EQ: code1 = UNKNOWN; code2 = NE; break;
20448 case NE: code2 = UNKNOWN; break;
20450 default:
20451 gcc_unreachable ();
20455 * a < b =>
20456 * if (hi(a) < hi(b)) goto true;
20457 * if (hi(a) > hi(b)) goto false;
20458 * if (lo(a) < lo(b)) goto true;
20459 * false:
20462 if (code1 != UNKNOWN)
20463 ix86_expand_branch (code1, hi[0], hi[1], label);
20464 if (code2 != UNKNOWN)
20465 ix86_expand_branch (code2, hi[0], hi[1], label2);
20467 ix86_expand_branch (code3, lo[0], lo[1], label);
20469 if (code2 != UNKNOWN)
20470 emit_label (label2);
20471 return;
20474 default:
20475 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20476 goto simple;
20480 /* Split branch based on floating point condition. */
20481 void
20482 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20483 rtx target1, rtx target2, rtx tmp)
20485 rtx condition;
20486 rtx i;
20488 if (target2 != pc_rtx)
20490 rtx tmp = target2;
20491 code = reverse_condition_maybe_unordered (code);
20492 target2 = target1;
20493 target1 = tmp;
20496 condition = ix86_expand_fp_compare (code, op1, op2,
20497 tmp);
20499 i = emit_jump_insn (gen_rtx_SET
20500 (VOIDmode, pc_rtx,
20501 gen_rtx_IF_THEN_ELSE (VOIDmode,
20502 condition, target1, target2)));
20503 if (split_branch_probability >= 0)
20504 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20507 void
20508 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20510 rtx ret;
20512 gcc_assert (GET_MODE (dest) == QImode);
20514 ret = ix86_expand_compare (code, op0, op1);
20515 PUT_MODE (ret, QImode);
20516 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20519 /* Expand comparison setting or clearing carry flag. Return true when
20520 successful and set pop for the operation. */
20521 static bool
20522 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20524 machine_mode mode =
20525 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20527 /* Do not handle double-mode compares that go through special path. */
20528 if (mode == (TARGET_64BIT ? TImode : DImode))
20529 return false;
20531 if (SCALAR_FLOAT_MODE_P (mode))
20533 rtx compare_op;
20534 rtx_insn *compare_seq;
20536 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20538 /* Shortcut: following common codes never translate
20539 into carry flag compares. */
20540 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20541 || code == ORDERED || code == UNORDERED)
20542 return false;
20544 /* These comparisons require zero flag; swap operands so they won't. */
20545 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20546 && !TARGET_IEEE_FP)
20548 std::swap (op0, op1);
20549 code = swap_condition (code);
20552 /* Try to expand the comparison and verify that we end up with
20553 carry flag based comparison. This fails to be true only when
20554 we decide to expand comparison using arithmetic that is not
20555 too common scenario. */
20556 start_sequence ();
20557 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20558 compare_seq = get_insns ();
20559 end_sequence ();
20561 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20562 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20563 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20564 else
20565 code = GET_CODE (compare_op);
20567 if (code != LTU && code != GEU)
20568 return false;
20570 emit_insn (compare_seq);
20571 *pop = compare_op;
20572 return true;
20575 if (!INTEGRAL_MODE_P (mode))
20576 return false;
20578 switch (code)
20580 case LTU:
20581 case GEU:
20582 break;
20584 /* Convert a==0 into (unsigned)a<1. */
20585 case EQ:
20586 case NE:
20587 if (op1 != const0_rtx)
20588 return false;
20589 op1 = const1_rtx;
20590 code = (code == EQ ? LTU : GEU);
20591 break;
20593 /* Convert a>b into b<a or a>=b-1. */
20594 case GTU:
20595 case LEU:
20596 if (CONST_INT_P (op1))
20598 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20599 /* Bail out on overflow. We still can swap operands but that
20600 would force loading of the constant into register. */
20601 if (op1 == const0_rtx
20602 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20603 return false;
20604 code = (code == GTU ? GEU : LTU);
20606 else
20608 std::swap (op1, op0);
20609 code = (code == GTU ? LTU : GEU);
20611 break;
20613 /* Convert a>=0 into (unsigned)a<0x80000000. */
20614 case LT:
20615 case GE:
20616 if (mode == DImode || op1 != const0_rtx)
20617 return false;
20618 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20619 code = (code == LT ? GEU : LTU);
20620 break;
20621 case LE:
20622 case GT:
20623 if (mode == DImode || op1 != constm1_rtx)
20624 return false;
20625 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20626 code = (code == LE ? GEU : LTU);
20627 break;
20629 default:
20630 return false;
20632 /* Swapping operands may cause constant to appear as first operand. */
20633 if (!nonimmediate_operand (op0, VOIDmode))
20635 if (!can_create_pseudo_p ())
20636 return false;
20637 op0 = force_reg (mode, op0);
20639 *pop = ix86_expand_compare (code, op0, op1);
20640 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20641 return true;
20644 bool
20645 ix86_expand_int_movcc (rtx operands[])
20647 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20648 rtx_insn *compare_seq;
20649 rtx compare_op;
20650 machine_mode mode = GET_MODE (operands[0]);
20651 bool sign_bit_compare_p = false;
20652 rtx op0 = XEXP (operands[1], 0);
20653 rtx op1 = XEXP (operands[1], 1);
20655 if (GET_MODE (op0) == TImode
20656 || (GET_MODE (op0) == DImode
20657 && !TARGET_64BIT))
20658 return false;
20660 start_sequence ();
20661 compare_op = ix86_expand_compare (code, op0, op1);
20662 compare_seq = get_insns ();
20663 end_sequence ();
20665 compare_code = GET_CODE (compare_op);
20667 if ((op1 == const0_rtx && (code == GE || code == LT))
20668 || (op1 == constm1_rtx && (code == GT || code == LE)))
20669 sign_bit_compare_p = true;
20671 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20672 HImode insns, we'd be swallowed in word prefix ops. */
20674 if ((mode != HImode || TARGET_FAST_PREFIX)
20675 && (mode != (TARGET_64BIT ? TImode : DImode))
20676 && CONST_INT_P (operands[2])
20677 && CONST_INT_P (operands[3]))
20679 rtx out = operands[0];
20680 HOST_WIDE_INT ct = INTVAL (operands[2]);
20681 HOST_WIDE_INT cf = INTVAL (operands[3]);
20682 HOST_WIDE_INT diff;
20684 diff = ct - cf;
20685 /* Sign bit compares are better done using shifts than we do by using
20686 sbb. */
20687 if (sign_bit_compare_p
20688 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20690 /* Detect overlap between destination and compare sources. */
20691 rtx tmp = out;
20693 if (!sign_bit_compare_p)
20695 rtx flags;
20696 bool fpcmp = false;
20698 compare_code = GET_CODE (compare_op);
20700 flags = XEXP (compare_op, 0);
20702 if (GET_MODE (flags) == CCFPmode
20703 || GET_MODE (flags) == CCFPUmode)
20705 fpcmp = true;
20706 compare_code
20707 = ix86_fp_compare_code_to_integer (compare_code);
20710 /* To simplify rest of code, restrict to the GEU case. */
20711 if (compare_code == LTU)
20713 HOST_WIDE_INT tmp = ct;
20714 ct = cf;
20715 cf = tmp;
20716 compare_code = reverse_condition (compare_code);
20717 code = reverse_condition (code);
20719 else
20721 if (fpcmp)
20722 PUT_CODE (compare_op,
20723 reverse_condition_maybe_unordered
20724 (GET_CODE (compare_op)));
20725 else
20726 PUT_CODE (compare_op,
20727 reverse_condition (GET_CODE (compare_op)));
20729 diff = ct - cf;
20731 if (reg_overlap_mentioned_p (out, op0)
20732 || reg_overlap_mentioned_p (out, op1))
20733 tmp = gen_reg_rtx (mode);
20735 if (mode == DImode)
20736 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20737 else
20738 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20739 flags, compare_op));
20741 else
20743 if (code == GT || code == GE)
20744 code = reverse_condition (code);
20745 else
20747 HOST_WIDE_INT tmp = ct;
20748 ct = cf;
20749 cf = tmp;
20750 diff = ct - cf;
20752 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20755 if (diff == 1)
20758 * cmpl op0,op1
20759 * sbbl dest,dest
20760 * [addl dest, ct]
20762 * Size 5 - 8.
20764 if (ct)
20765 tmp = expand_simple_binop (mode, PLUS,
20766 tmp, GEN_INT (ct),
20767 copy_rtx (tmp), 1, OPTAB_DIRECT);
20769 else if (cf == -1)
20772 * cmpl op0,op1
20773 * sbbl dest,dest
20774 * orl $ct, dest
20776 * Size 8.
20778 tmp = expand_simple_binop (mode, IOR,
20779 tmp, GEN_INT (ct),
20780 copy_rtx (tmp), 1, OPTAB_DIRECT);
20782 else if (diff == -1 && ct)
20785 * cmpl op0,op1
20786 * sbbl dest,dest
20787 * notl dest
20788 * [addl dest, cf]
20790 * Size 8 - 11.
20792 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20793 if (cf)
20794 tmp = expand_simple_binop (mode, PLUS,
20795 copy_rtx (tmp), GEN_INT (cf),
20796 copy_rtx (tmp), 1, OPTAB_DIRECT);
20798 else
20801 * cmpl op0,op1
20802 * sbbl dest,dest
20803 * [notl dest]
20804 * andl cf - ct, dest
20805 * [addl dest, ct]
20807 * Size 8 - 11.
20810 if (cf == 0)
20812 cf = ct;
20813 ct = 0;
20814 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20817 tmp = expand_simple_binop (mode, AND,
20818 copy_rtx (tmp),
20819 gen_int_mode (cf - ct, mode),
20820 copy_rtx (tmp), 1, OPTAB_DIRECT);
20821 if (ct)
20822 tmp = expand_simple_binop (mode, PLUS,
20823 copy_rtx (tmp), GEN_INT (ct),
20824 copy_rtx (tmp), 1, OPTAB_DIRECT);
20827 if (!rtx_equal_p (tmp, out))
20828 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20830 return true;
20833 if (diff < 0)
20835 machine_mode cmp_mode = GET_MODE (op0);
20837 std::swap (ct, cf);
20838 diff = -diff;
20840 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20842 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20844 /* We may be reversing unordered compare to normal compare, that
20845 is not valid in general (we may convert non-trapping condition
20846 to trapping one), however on i386 we currently emit all
20847 comparisons unordered. */
20848 compare_code = reverse_condition_maybe_unordered (compare_code);
20849 code = reverse_condition_maybe_unordered (code);
20851 else
20853 compare_code = reverse_condition (compare_code);
20854 code = reverse_condition (code);
20858 compare_code = UNKNOWN;
20859 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20860 && CONST_INT_P (op1))
20862 if (op1 == const0_rtx
20863 && (code == LT || code == GE))
20864 compare_code = code;
20865 else if (op1 == constm1_rtx)
20867 if (code == LE)
20868 compare_code = LT;
20869 else if (code == GT)
20870 compare_code = GE;
20874 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20875 if (compare_code != UNKNOWN
20876 && GET_MODE (op0) == GET_MODE (out)
20877 && (cf == -1 || ct == -1))
20879 /* If lea code below could be used, only optimize
20880 if it results in a 2 insn sequence. */
20882 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20883 || diff == 3 || diff == 5 || diff == 9)
20884 || (compare_code == LT && ct == -1)
20885 || (compare_code == GE && cf == -1))
20888 * notl op1 (if necessary)
20889 * sarl $31, op1
20890 * orl cf, op1
20892 if (ct != -1)
20894 cf = ct;
20895 ct = -1;
20896 code = reverse_condition (code);
20899 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20901 out = expand_simple_binop (mode, IOR,
20902 out, GEN_INT (cf),
20903 out, 1, OPTAB_DIRECT);
20904 if (out != operands[0])
20905 emit_move_insn (operands[0], out);
20907 return true;
20912 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20913 || diff == 3 || diff == 5 || diff == 9)
20914 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20915 && (mode != DImode
20916 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20919 * xorl dest,dest
20920 * cmpl op1,op2
20921 * setcc dest
20922 * lea cf(dest*(ct-cf)),dest
20924 * Size 14.
20926 * This also catches the degenerate setcc-only case.
20929 rtx tmp;
20930 int nops;
20932 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20934 nops = 0;
20935 /* On x86_64 the lea instruction operates on Pmode, so we need
20936 to get arithmetics done in proper mode to match. */
20937 if (diff == 1)
20938 tmp = copy_rtx (out);
20939 else
20941 rtx out1;
20942 out1 = copy_rtx (out);
20943 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20944 nops++;
20945 if (diff & 1)
20947 tmp = gen_rtx_PLUS (mode, tmp, out1);
20948 nops++;
20951 if (cf != 0)
20953 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20954 nops++;
20956 if (!rtx_equal_p (tmp, out))
20958 if (nops == 1)
20959 out = force_operand (tmp, copy_rtx (out));
20960 else
20961 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20963 if (!rtx_equal_p (out, operands[0]))
20964 emit_move_insn (operands[0], copy_rtx (out));
20966 return true;
20970 * General case: Jumpful:
20971 * xorl dest,dest cmpl op1, op2
20972 * cmpl op1, op2 movl ct, dest
20973 * setcc dest jcc 1f
20974 * decl dest movl cf, dest
20975 * andl (cf-ct),dest 1:
20976 * addl ct,dest
20978 * Size 20. Size 14.
20980 * This is reasonably steep, but branch mispredict costs are
20981 * high on modern cpus, so consider failing only if optimizing
20982 * for space.
20985 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20986 && BRANCH_COST (optimize_insn_for_speed_p (),
20987 false) >= 2)
20989 if (cf == 0)
20991 machine_mode cmp_mode = GET_MODE (op0);
20993 cf = ct;
20994 ct = 0;
20996 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20998 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
21000 /* We may be reversing unordered compare to normal compare,
21001 that is not valid in general (we may convert non-trapping
21002 condition to trapping one), however on i386 we currently
21003 emit all comparisons unordered. */
21004 code = reverse_condition_maybe_unordered (code);
21006 else
21008 code = reverse_condition (code);
21009 if (compare_code != UNKNOWN)
21010 compare_code = reverse_condition (compare_code);
21014 if (compare_code != UNKNOWN)
21016 /* notl op1 (if needed)
21017 sarl $31, op1
21018 andl (cf-ct), op1
21019 addl ct, op1
21021 For x < 0 (resp. x <= -1) there will be no notl,
21022 so if possible swap the constants to get rid of the
21023 complement.
21024 True/false will be -1/0 while code below (store flag
21025 followed by decrement) is 0/-1, so the constants need
21026 to be exchanged once more. */
21028 if (compare_code == GE || !cf)
21030 code = reverse_condition (code);
21031 compare_code = LT;
21033 else
21034 std::swap (cf, ct);
21036 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21038 else
21040 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21042 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
21043 constm1_rtx,
21044 copy_rtx (out), 1, OPTAB_DIRECT);
21047 out = expand_simple_binop (mode, AND, copy_rtx (out),
21048 gen_int_mode (cf - ct, mode),
21049 copy_rtx (out), 1, OPTAB_DIRECT);
21050 if (ct)
21051 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
21052 copy_rtx (out), 1, OPTAB_DIRECT);
21053 if (!rtx_equal_p (out, operands[0]))
21054 emit_move_insn (operands[0], copy_rtx (out));
21056 return true;
21060 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21062 /* Try a few things more with specific constants and a variable. */
21064 optab op;
21065 rtx var, orig_out, out, tmp;
21067 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
21068 return false;
21070 /* If one of the two operands is an interesting constant, load a
21071 constant with the above and mask it in with a logical operation. */
21073 if (CONST_INT_P (operands[2]))
21075 var = operands[3];
21076 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
21077 operands[3] = constm1_rtx, op = and_optab;
21078 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
21079 operands[3] = const0_rtx, op = ior_optab;
21080 else
21081 return false;
21083 else if (CONST_INT_P (operands[3]))
21085 var = operands[2];
21086 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
21087 operands[2] = constm1_rtx, op = and_optab;
21088 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
21089 operands[2] = const0_rtx, op = ior_optab;
21090 else
21091 return false;
21093 else
21094 return false;
21096 orig_out = operands[0];
21097 tmp = gen_reg_rtx (mode);
21098 operands[0] = tmp;
21100 /* Recurse to get the constant loaded. */
21101 if (ix86_expand_int_movcc (operands) == 0)
21102 return false;
21104 /* Mask in the interesting variable. */
21105 out = expand_binop (mode, op, var, tmp, orig_out, 0,
21106 OPTAB_WIDEN);
21107 if (!rtx_equal_p (out, orig_out))
21108 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
21110 return true;
21114 * For comparison with above,
21116 * movl cf,dest
21117 * movl ct,tmp
21118 * cmpl op1,op2
21119 * cmovcc tmp,dest
21121 * Size 15.
21124 if (! nonimmediate_operand (operands[2], mode))
21125 operands[2] = force_reg (mode, operands[2]);
21126 if (! nonimmediate_operand (operands[3], mode))
21127 operands[3] = force_reg (mode, operands[3]);
21129 if (! register_operand (operands[2], VOIDmode)
21130 && (mode == QImode
21131 || ! register_operand (operands[3], VOIDmode)))
21132 operands[2] = force_reg (mode, operands[2]);
21134 if (mode == QImode
21135 && ! register_operand (operands[3], VOIDmode))
21136 operands[3] = force_reg (mode, operands[3]);
21138 emit_insn (compare_seq);
21139 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21140 gen_rtx_IF_THEN_ELSE (mode,
21141 compare_op, operands[2],
21142 operands[3])));
21143 return true;
21146 /* Swap, force into registers, or otherwise massage the two operands
21147 to an sse comparison with a mask result. Thus we differ a bit from
21148 ix86_prepare_fp_compare_args which expects to produce a flags result.
21150 The DEST operand exists to help determine whether to commute commutative
21151 operators. The POP0/POP1 operands are updated in place. The new
21152 comparison code is returned, or UNKNOWN if not implementable. */
21154 static enum rtx_code
21155 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
21156 rtx *pop0, rtx *pop1)
21158 switch (code)
21160 case LTGT:
21161 case UNEQ:
21162 /* AVX supports all the needed comparisons. */
21163 if (TARGET_AVX)
21164 break;
21165 /* We have no LTGT as an operator. We could implement it with
21166 NE & ORDERED, but this requires an extra temporary. It's
21167 not clear that it's worth it. */
21168 return UNKNOWN;
21170 case LT:
21171 case LE:
21172 case UNGT:
21173 case UNGE:
21174 /* These are supported directly. */
21175 break;
21177 case EQ:
21178 case NE:
21179 case UNORDERED:
21180 case ORDERED:
21181 /* AVX has 3 operand comparisons, no need to swap anything. */
21182 if (TARGET_AVX)
21183 break;
21184 /* For commutative operators, try to canonicalize the destination
21185 operand to be first in the comparison - this helps reload to
21186 avoid extra moves. */
21187 if (!dest || !rtx_equal_p (dest, *pop1))
21188 break;
21189 /* FALLTHRU */
21191 case GE:
21192 case GT:
21193 case UNLE:
21194 case UNLT:
21195 /* These are not supported directly before AVX, and furthermore
21196 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
21197 comparison operands to transform into something that is
21198 supported. */
21199 std::swap (*pop0, *pop1);
21200 code = swap_condition (code);
21201 break;
21203 default:
21204 gcc_unreachable ();
21207 return code;
21210 /* Detect conditional moves that exactly match min/max operational
21211 semantics. Note that this is IEEE safe, as long as we don't
21212 interchange the operands.
21214 Returns FALSE if this conditional move doesn't match a MIN/MAX,
21215 and TRUE if the operation is successful and instructions are emitted. */
21217 static bool
21218 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
21219 rtx cmp_op1, rtx if_true, rtx if_false)
21221 machine_mode mode;
21222 bool is_min;
21223 rtx tmp;
21225 if (code == LT)
21227 else if (code == UNGE)
21228 std::swap (if_true, if_false);
21229 else
21230 return false;
21232 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21233 is_min = true;
21234 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21235 is_min = false;
21236 else
21237 return false;
21239 mode = GET_MODE (dest);
21241 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21242 but MODE may be a vector mode and thus not appropriate. */
21243 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21245 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21246 rtvec v;
21248 if_true = force_reg (mode, if_true);
21249 v = gen_rtvec (2, if_true, if_false);
21250 tmp = gen_rtx_UNSPEC (mode, v, u);
21252 else
21254 code = is_min ? SMIN : SMAX;
21255 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21258 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
21259 return true;
21262 /* Expand an sse vector comparison. Return the register with the result. */
21264 static rtx
21265 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21266 rtx op_true, rtx op_false)
21268 machine_mode mode = GET_MODE (dest);
21269 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21271 /* In general case result of comparison can differ from operands' type. */
21272 machine_mode cmp_mode;
21274 /* In AVX512F the result of comparison is an integer mask. */
21275 bool maskcmp = false;
21276 rtx x;
21278 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21280 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21281 gcc_assert (cmp_mode != BLKmode);
21283 maskcmp = true;
21285 else
21286 cmp_mode = cmp_ops_mode;
21289 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21290 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21291 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21293 if (optimize
21294 || reg_overlap_mentioned_p (dest, op_true)
21295 || reg_overlap_mentioned_p (dest, op_false))
21296 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21298 /* Compare patterns for int modes are unspec in AVX512F only. */
21299 if (maskcmp && (code == GT || code == EQ))
21301 rtx (*gen)(rtx, rtx, rtx);
21303 switch (cmp_ops_mode)
21305 case V16SImode:
21306 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21307 break;
21308 case V8DImode:
21309 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21310 break;
21311 default:
21312 gen = NULL;
21315 if (gen)
21317 emit_insn (gen (dest, cmp_op0, cmp_op1));
21318 return dest;
21321 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21323 if (cmp_mode != mode && !maskcmp)
21325 x = force_reg (cmp_ops_mode, x);
21326 convert_move (dest, x, false);
21328 else
21329 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21331 return dest;
21334 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21335 operations. This is used for both scalar and vector conditional moves. */
21337 static void
21338 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21340 machine_mode mode = GET_MODE (dest);
21341 machine_mode cmpmode = GET_MODE (cmp);
21343 /* In AVX512F the result of comparison is an integer mask. */
21344 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21346 rtx t2, t3, x;
21348 if (vector_all_ones_operand (op_true, mode)
21349 && rtx_equal_p (op_false, CONST0_RTX (mode))
21350 && !maskcmp)
21352 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
21354 else if (op_false == CONST0_RTX (mode)
21355 && !maskcmp)
21357 op_true = force_reg (mode, op_true);
21358 x = gen_rtx_AND (mode, cmp, op_true);
21359 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21361 else if (op_true == CONST0_RTX (mode)
21362 && !maskcmp)
21364 op_false = force_reg (mode, op_false);
21365 x = gen_rtx_NOT (mode, cmp);
21366 x = gen_rtx_AND (mode, x, op_false);
21367 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21369 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21370 && !maskcmp)
21372 op_false = force_reg (mode, op_false);
21373 x = gen_rtx_IOR (mode, cmp, op_false);
21374 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21376 else if (TARGET_XOP
21377 && !maskcmp)
21379 op_true = force_reg (mode, op_true);
21381 if (!nonimmediate_operand (op_false, mode))
21382 op_false = force_reg (mode, op_false);
21384 emit_insn (gen_rtx_SET (mode, dest,
21385 gen_rtx_IF_THEN_ELSE (mode, cmp,
21386 op_true,
21387 op_false)));
21389 else
21391 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21392 rtx d = dest;
21394 if (!nonimmediate_operand (op_true, mode))
21395 op_true = force_reg (mode, op_true);
21397 op_false = force_reg (mode, op_false);
21399 switch (mode)
21401 case V4SFmode:
21402 if (TARGET_SSE4_1)
21403 gen = gen_sse4_1_blendvps;
21404 break;
21405 case V2DFmode:
21406 if (TARGET_SSE4_1)
21407 gen = gen_sse4_1_blendvpd;
21408 break;
21409 case V16QImode:
21410 case V8HImode:
21411 case V4SImode:
21412 case V2DImode:
21413 if (TARGET_SSE4_1)
21415 gen = gen_sse4_1_pblendvb;
21416 if (mode != V16QImode)
21417 d = gen_reg_rtx (V16QImode);
21418 op_false = gen_lowpart (V16QImode, op_false);
21419 op_true = gen_lowpart (V16QImode, op_true);
21420 cmp = gen_lowpart (V16QImode, cmp);
21422 break;
21423 case V8SFmode:
21424 if (TARGET_AVX)
21425 gen = gen_avx_blendvps256;
21426 break;
21427 case V4DFmode:
21428 if (TARGET_AVX)
21429 gen = gen_avx_blendvpd256;
21430 break;
21431 case V32QImode:
21432 case V16HImode:
21433 case V8SImode:
21434 case V4DImode:
21435 if (TARGET_AVX2)
21437 gen = gen_avx2_pblendvb;
21438 if (mode != V32QImode)
21439 d = gen_reg_rtx (V32QImode);
21440 op_false = gen_lowpart (V32QImode, op_false);
21441 op_true = gen_lowpart (V32QImode, op_true);
21442 cmp = gen_lowpart (V32QImode, cmp);
21444 break;
21446 case V64QImode:
21447 gen = gen_avx512bw_blendmv64qi;
21448 break;
21449 case V32HImode:
21450 gen = gen_avx512bw_blendmv32hi;
21451 break;
21452 case V16SImode:
21453 gen = gen_avx512f_blendmv16si;
21454 break;
21455 case V8DImode:
21456 gen = gen_avx512f_blendmv8di;
21457 break;
21458 case V8DFmode:
21459 gen = gen_avx512f_blendmv8df;
21460 break;
21461 case V16SFmode:
21462 gen = gen_avx512f_blendmv16sf;
21463 break;
21465 default:
21466 break;
21469 if (gen != NULL)
21471 emit_insn (gen (d, op_false, op_true, cmp));
21472 if (d != dest)
21473 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21475 else
21477 op_true = force_reg (mode, op_true);
21479 t2 = gen_reg_rtx (mode);
21480 if (optimize)
21481 t3 = gen_reg_rtx (mode);
21482 else
21483 t3 = dest;
21485 x = gen_rtx_AND (mode, op_true, cmp);
21486 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21488 x = gen_rtx_NOT (mode, cmp);
21489 x = gen_rtx_AND (mode, x, op_false);
21490 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21492 x = gen_rtx_IOR (mode, t3, t2);
21493 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21498 /* Expand a floating-point conditional move. Return true if successful. */
21500 bool
21501 ix86_expand_fp_movcc (rtx operands[])
21503 machine_mode mode = GET_MODE (operands[0]);
21504 enum rtx_code code = GET_CODE (operands[1]);
21505 rtx tmp, compare_op;
21506 rtx op0 = XEXP (operands[1], 0);
21507 rtx op1 = XEXP (operands[1], 1);
21509 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21511 machine_mode cmode;
21513 /* Since we've no cmove for sse registers, don't force bad register
21514 allocation just to gain access to it. Deny movcc when the
21515 comparison mode doesn't match the move mode. */
21516 cmode = GET_MODE (op0);
21517 if (cmode == VOIDmode)
21518 cmode = GET_MODE (op1);
21519 if (cmode != mode)
21520 return false;
21522 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21523 if (code == UNKNOWN)
21524 return false;
21526 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21527 operands[2], operands[3]))
21528 return true;
21530 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21531 operands[2], operands[3]);
21532 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21533 return true;
21536 if (GET_MODE (op0) == TImode
21537 || (GET_MODE (op0) == DImode
21538 && !TARGET_64BIT))
21539 return false;
21541 /* The floating point conditional move instructions don't directly
21542 support conditions resulting from a signed integer comparison. */
21544 compare_op = ix86_expand_compare (code, op0, op1);
21545 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21547 tmp = gen_reg_rtx (QImode);
21548 ix86_expand_setcc (tmp, code, op0, op1);
21550 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21553 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21554 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21555 operands[2], operands[3])));
21557 return true;
21560 /* Expand a floating-point vector conditional move; a vcond operation
21561 rather than a movcc operation. */
21563 bool
21564 ix86_expand_fp_vcond (rtx operands[])
21566 enum rtx_code code = GET_CODE (operands[3]);
21567 rtx cmp;
21569 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21570 &operands[4], &operands[5]);
21571 if (code == UNKNOWN)
21573 rtx temp;
21574 switch (GET_CODE (operands[3]))
21576 case LTGT:
21577 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21578 operands[5], operands[0], operands[0]);
21579 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21580 operands[5], operands[1], operands[2]);
21581 code = AND;
21582 break;
21583 case UNEQ:
21584 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21585 operands[5], operands[0], operands[0]);
21586 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21587 operands[5], operands[1], operands[2]);
21588 code = IOR;
21589 break;
21590 default:
21591 gcc_unreachable ();
21593 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21594 OPTAB_DIRECT);
21595 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21596 return true;
21599 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21600 operands[5], operands[1], operands[2]))
21601 return true;
21603 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21604 operands[1], operands[2]);
21605 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21606 return true;
21609 /* Expand a signed/unsigned integral vector conditional move. */
21611 bool
21612 ix86_expand_int_vcond (rtx operands[])
21614 machine_mode data_mode = GET_MODE (operands[0]);
21615 machine_mode mode = GET_MODE (operands[4]);
21616 enum rtx_code code = GET_CODE (operands[3]);
21617 bool negate = false;
21618 rtx x, cop0, cop1;
21620 cop0 = operands[4];
21621 cop1 = operands[5];
21623 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21624 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21625 if ((code == LT || code == GE)
21626 && data_mode == mode
21627 && cop1 == CONST0_RTX (mode)
21628 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21629 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21630 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21631 && (GET_MODE_SIZE (data_mode) == 16
21632 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21634 rtx negop = operands[2 - (code == LT)];
21635 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21636 if (negop == CONST1_RTX (data_mode))
21638 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21639 operands[0], 1, OPTAB_DIRECT);
21640 if (res != operands[0])
21641 emit_move_insn (operands[0], res);
21642 return true;
21644 else if (GET_MODE_INNER (data_mode) != DImode
21645 && vector_all_ones_operand (negop, data_mode))
21647 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21648 operands[0], 0, OPTAB_DIRECT);
21649 if (res != operands[0])
21650 emit_move_insn (operands[0], res);
21651 return true;
21655 if (!nonimmediate_operand (cop1, mode))
21656 cop1 = force_reg (mode, cop1);
21657 if (!general_operand (operands[1], data_mode))
21658 operands[1] = force_reg (data_mode, operands[1]);
21659 if (!general_operand (operands[2], data_mode))
21660 operands[2] = force_reg (data_mode, operands[2]);
21662 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21663 if (TARGET_XOP
21664 && (mode == V16QImode || mode == V8HImode
21665 || mode == V4SImode || mode == V2DImode))
21667 else
21669 /* Canonicalize the comparison to EQ, GT, GTU. */
21670 switch (code)
21672 case EQ:
21673 case GT:
21674 case GTU:
21675 break;
21677 case NE:
21678 case LE:
21679 case LEU:
21680 code = reverse_condition (code);
21681 negate = true;
21682 break;
21684 case GE:
21685 case GEU:
21686 code = reverse_condition (code);
21687 negate = true;
21688 /* FALLTHRU */
21690 case LT:
21691 case LTU:
21692 std::swap (cop0, cop1);
21693 code = swap_condition (code);
21694 break;
21696 default:
21697 gcc_unreachable ();
21700 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21701 if (mode == V2DImode)
21703 switch (code)
21705 case EQ:
21706 /* SSE4.1 supports EQ. */
21707 if (!TARGET_SSE4_1)
21708 return false;
21709 break;
21711 case GT:
21712 case GTU:
21713 /* SSE4.2 supports GT/GTU. */
21714 if (!TARGET_SSE4_2)
21715 return false;
21716 break;
21718 default:
21719 gcc_unreachable ();
21723 /* Unsigned parallel compare is not supported by the hardware.
21724 Play some tricks to turn this into a signed comparison
21725 against 0. */
21726 if (code == GTU)
21728 cop0 = force_reg (mode, cop0);
21730 switch (mode)
21732 case V16SImode:
21733 case V8DImode:
21734 case V8SImode:
21735 case V4DImode:
21736 case V4SImode:
21737 case V2DImode:
21739 rtx t1, t2, mask;
21740 rtx (*gen_sub3) (rtx, rtx, rtx);
21742 switch (mode)
21744 case V16SImode: gen_sub3 = gen_subv16si3; break;
21745 case V8DImode: gen_sub3 = gen_subv8di3; break;
21746 case V8SImode: gen_sub3 = gen_subv8si3; break;
21747 case V4DImode: gen_sub3 = gen_subv4di3; break;
21748 case V4SImode: gen_sub3 = gen_subv4si3; break;
21749 case V2DImode: gen_sub3 = gen_subv2di3; break;
21750 default:
21751 gcc_unreachable ();
21753 /* Subtract (-(INT MAX) - 1) from both operands to make
21754 them signed. */
21755 mask = ix86_build_signbit_mask (mode, true, false);
21756 t1 = gen_reg_rtx (mode);
21757 emit_insn (gen_sub3 (t1, cop0, mask));
21759 t2 = gen_reg_rtx (mode);
21760 emit_insn (gen_sub3 (t2, cop1, mask));
21762 cop0 = t1;
21763 cop1 = t2;
21764 code = GT;
21766 break;
21768 case V64QImode:
21769 case V32HImode:
21770 case V32QImode:
21771 case V16HImode:
21772 case V16QImode:
21773 case V8HImode:
21774 /* Perform a parallel unsigned saturating subtraction. */
21775 x = gen_reg_rtx (mode);
21776 emit_insn (gen_rtx_SET (VOIDmode, x,
21777 gen_rtx_US_MINUS (mode, cop0, cop1)));
21779 cop0 = x;
21780 cop1 = CONST0_RTX (mode);
21781 code = EQ;
21782 negate = !negate;
21783 break;
21785 default:
21786 gcc_unreachable ();
21791 /* Allow the comparison to be done in one mode, but the movcc to
21792 happen in another mode. */
21793 if (data_mode == mode)
21795 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21796 operands[1+negate], operands[2-negate]);
21798 else
21800 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21801 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21802 operands[1+negate], operands[2-negate]);
21803 if (GET_MODE (x) == mode)
21804 x = gen_lowpart (data_mode, x);
21807 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21808 operands[2-negate]);
21809 return true;
21812 /* AVX512F does support 64-byte integer vector operations,
21813 thus the longest vector we are faced with is V64QImode. */
21814 #define MAX_VECT_LEN 64
21816 struct expand_vec_perm_d
21818 rtx target, op0, op1;
21819 unsigned char perm[MAX_VECT_LEN];
21820 machine_mode vmode;
21821 unsigned char nelt;
21822 bool one_operand_p;
21823 bool testing_p;
21826 static bool
21827 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
21828 struct expand_vec_perm_d *d)
21830 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21831 expander, so args are either in d, or in op0, op1 etc. */
21832 machine_mode mode = GET_MODE (d ? d->op0 : op0);
21833 machine_mode maskmode = mode;
21834 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21836 switch (mode)
21838 case V8HImode:
21839 if (TARGET_AVX512VL && TARGET_AVX512BW)
21840 gen = gen_avx512vl_vpermi2varv8hi3;
21841 break;
21842 case V16HImode:
21843 if (TARGET_AVX512VL && TARGET_AVX512BW)
21844 gen = gen_avx512vl_vpermi2varv16hi3;
21845 break;
21846 case V64QImode:
21847 if (TARGET_AVX512VBMI)
21848 gen = gen_avx512bw_vpermi2varv64qi3;
21849 break;
21850 case V32HImode:
21851 if (TARGET_AVX512BW)
21852 gen = gen_avx512bw_vpermi2varv32hi3;
21853 break;
21854 case V4SImode:
21855 if (TARGET_AVX512VL)
21856 gen = gen_avx512vl_vpermi2varv4si3;
21857 break;
21858 case V8SImode:
21859 if (TARGET_AVX512VL)
21860 gen = gen_avx512vl_vpermi2varv8si3;
21861 break;
21862 case V16SImode:
21863 if (TARGET_AVX512F)
21864 gen = gen_avx512f_vpermi2varv16si3;
21865 break;
21866 case V4SFmode:
21867 if (TARGET_AVX512VL)
21869 gen = gen_avx512vl_vpermi2varv4sf3;
21870 maskmode = V4SImode;
21872 break;
21873 case V8SFmode:
21874 if (TARGET_AVX512VL)
21876 gen = gen_avx512vl_vpermi2varv8sf3;
21877 maskmode = V8SImode;
21879 break;
21880 case V16SFmode:
21881 if (TARGET_AVX512F)
21883 gen = gen_avx512f_vpermi2varv16sf3;
21884 maskmode = V16SImode;
21886 break;
21887 case V2DImode:
21888 if (TARGET_AVX512VL)
21889 gen = gen_avx512vl_vpermi2varv2di3;
21890 break;
21891 case V4DImode:
21892 if (TARGET_AVX512VL)
21893 gen = gen_avx512vl_vpermi2varv4di3;
21894 break;
21895 case V8DImode:
21896 if (TARGET_AVX512F)
21897 gen = gen_avx512f_vpermi2varv8di3;
21898 break;
21899 case V2DFmode:
21900 if (TARGET_AVX512VL)
21902 gen = gen_avx512vl_vpermi2varv2df3;
21903 maskmode = V2DImode;
21905 break;
21906 case V4DFmode:
21907 if (TARGET_AVX512VL)
21909 gen = gen_avx512vl_vpermi2varv4df3;
21910 maskmode = V4DImode;
21912 break;
21913 case V8DFmode:
21914 if (TARGET_AVX512F)
21916 gen = gen_avx512f_vpermi2varv8df3;
21917 maskmode = V8DImode;
21919 break;
21920 default:
21921 break;
21924 if (gen == NULL)
21925 return false;
21927 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21928 expander, so args are either in d, or in op0, op1 etc. */
21929 if (d)
21931 rtx vec[64];
21932 target = d->target;
21933 op0 = d->op0;
21934 op1 = d->op1;
21935 for (int i = 0; i < d->nelt; ++i)
21936 vec[i] = GEN_INT (d->perm[i]);
21937 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
21940 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
21941 return true;
21944 /* Expand a variable vector permutation. */
21946 void
21947 ix86_expand_vec_perm (rtx operands[])
21949 rtx target = operands[0];
21950 rtx op0 = operands[1];
21951 rtx op1 = operands[2];
21952 rtx mask = operands[3];
21953 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21954 machine_mode mode = GET_MODE (op0);
21955 machine_mode maskmode = GET_MODE (mask);
21956 int w, e, i;
21957 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21959 /* Number of elements in the vector. */
21960 w = GET_MODE_NUNITS (mode);
21961 e = GET_MODE_UNIT_SIZE (mode);
21962 gcc_assert (w <= 64);
21964 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
21965 return;
21967 if (TARGET_AVX2)
21969 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21971 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21972 an constant shuffle operand. With a tiny bit of effort we can
21973 use VPERMD instead. A re-interpretation stall for V4DFmode is
21974 unfortunate but there's no avoiding it.
21975 Similarly for V16HImode we don't have instructions for variable
21976 shuffling, while for V32QImode we can use after preparing suitable
21977 masks vpshufb; vpshufb; vpermq; vpor. */
21979 if (mode == V16HImode)
21981 maskmode = mode = V32QImode;
21982 w = 32;
21983 e = 1;
21985 else
21987 maskmode = mode = V8SImode;
21988 w = 8;
21989 e = 4;
21991 t1 = gen_reg_rtx (maskmode);
21993 /* Replicate the low bits of the V4DImode mask into V8SImode:
21994 mask = { A B C D }
21995 t1 = { A A B B C C D D }. */
21996 for (i = 0; i < w / 2; ++i)
21997 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21998 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21999 vt = force_reg (maskmode, vt);
22000 mask = gen_lowpart (maskmode, mask);
22001 if (maskmode == V8SImode)
22002 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
22003 else
22004 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
22006 /* Multiply the shuffle indicies by two. */
22007 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
22008 OPTAB_DIRECT);
22010 /* Add one to the odd shuffle indicies:
22011 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
22012 for (i = 0; i < w / 2; ++i)
22014 vec[i * 2] = const0_rtx;
22015 vec[i * 2 + 1] = const1_rtx;
22017 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22018 vt = validize_mem (force_const_mem (maskmode, vt));
22019 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
22020 OPTAB_DIRECT);
22022 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
22023 operands[3] = mask = t1;
22024 target = gen_reg_rtx (mode);
22025 op0 = gen_lowpart (mode, op0);
22026 op1 = gen_lowpart (mode, op1);
22029 switch (mode)
22031 case V8SImode:
22032 /* The VPERMD and VPERMPS instructions already properly ignore
22033 the high bits of the shuffle elements. No need for us to
22034 perform an AND ourselves. */
22035 if (one_operand_shuffle)
22037 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
22038 if (target != operands[0])
22039 emit_move_insn (operands[0],
22040 gen_lowpart (GET_MODE (operands[0]), target));
22042 else
22044 t1 = gen_reg_rtx (V8SImode);
22045 t2 = gen_reg_rtx (V8SImode);
22046 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
22047 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
22048 goto merge_two;
22050 return;
22052 case V8SFmode:
22053 mask = gen_lowpart (V8SImode, mask);
22054 if (one_operand_shuffle)
22055 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
22056 else
22058 t1 = gen_reg_rtx (V8SFmode);
22059 t2 = gen_reg_rtx (V8SFmode);
22060 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
22061 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
22062 goto merge_two;
22064 return;
22066 case V4SImode:
22067 /* By combining the two 128-bit input vectors into one 256-bit
22068 input vector, we can use VPERMD and VPERMPS for the full
22069 two-operand shuffle. */
22070 t1 = gen_reg_rtx (V8SImode);
22071 t2 = gen_reg_rtx (V8SImode);
22072 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
22073 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22074 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
22075 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
22076 return;
22078 case V4SFmode:
22079 t1 = gen_reg_rtx (V8SFmode);
22080 t2 = gen_reg_rtx (V8SImode);
22081 mask = gen_lowpart (V4SImode, mask);
22082 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
22083 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22084 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
22085 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
22086 return;
22088 case V32QImode:
22089 t1 = gen_reg_rtx (V32QImode);
22090 t2 = gen_reg_rtx (V32QImode);
22091 t3 = gen_reg_rtx (V32QImode);
22092 vt2 = GEN_INT (-128);
22093 for (i = 0; i < 32; i++)
22094 vec[i] = vt2;
22095 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22096 vt = force_reg (V32QImode, vt);
22097 for (i = 0; i < 32; i++)
22098 vec[i] = i < 16 ? vt2 : const0_rtx;
22099 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22100 vt2 = force_reg (V32QImode, vt2);
22101 /* From mask create two adjusted masks, which contain the same
22102 bits as mask in the low 7 bits of each vector element.
22103 The first mask will have the most significant bit clear
22104 if it requests element from the same 128-bit lane
22105 and MSB set if it requests element from the other 128-bit lane.
22106 The second mask will have the opposite values of the MSB,
22107 and additionally will have its 128-bit lanes swapped.
22108 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
22109 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
22110 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
22111 stands for other 12 bytes. */
22112 /* The bit whether element is from the same lane or the other
22113 lane is bit 4, so shift it up by 3 to the MSB position. */
22114 t5 = gen_reg_rtx (V4DImode);
22115 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
22116 GEN_INT (3)));
22117 /* Clear MSB bits from the mask just in case it had them set. */
22118 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
22119 /* After this t1 will have MSB set for elements from other lane. */
22120 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
22121 /* Clear bits other than MSB. */
22122 emit_insn (gen_andv32qi3 (t1, t1, vt));
22123 /* Or in the lower bits from mask into t3. */
22124 emit_insn (gen_iorv32qi3 (t3, t1, t2));
22125 /* And invert MSB bits in t1, so MSB is set for elements from the same
22126 lane. */
22127 emit_insn (gen_xorv32qi3 (t1, t1, vt));
22128 /* Swap 128-bit lanes in t3. */
22129 t6 = gen_reg_rtx (V4DImode);
22130 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
22131 const2_rtx, GEN_INT (3),
22132 const0_rtx, const1_rtx));
22133 /* And or in the lower bits from mask into t1. */
22134 emit_insn (gen_iorv32qi3 (t1, t1, t2));
22135 if (one_operand_shuffle)
22137 /* Each of these shuffles will put 0s in places where
22138 element from the other 128-bit lane is needed, otherwise
22139 will shuffle in the requested value. */
22140 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
22141 gen_lowpart (V32QImode, t6)));
22142 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
22143 /* For t3 the 128-bit lanes are swapped again. */
22144 t7 = gen_reg_rtx (V4DImode);
22145 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
22146 const2_rtx, GEN_INT (3),
22147 const0_rtx, const1_rtx));
22148 /* And oring both together leads to the result. */
22149 emit_insn (gen_iorv32qi3 (target, t1,
22150 gen_lowpart (V32QImode, t7)));
22151 if (target != operands[0])
22152 emit_move_insn (operands[0],
22153 gen_lowpart (GET_MODE (operands[0]), target));
22154 return;
22157 t4 = gen_reg_rtx (V32QImode);
22158 /* Similarly to the above one_operand_shuffle code,
22159 just for repeated twice for each operand. merge_two:
22160 code will merge the two results together. */
22161 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
22162 gen_lowpart (V32QImode, t6)));
22163 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
22164 gen_lowpart (V32QImode, t6)));
22165 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
22166 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
22167 t7 = gen_reg_rtx (V4DImode);
22168 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
22169 const2_rtx, GEN_INT (3),
22170 const0_rtx, const1_rtx));
22171 t8 = gen_reg_rtx (V4DImode);
22172 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
22173 const2_rtx, GEN_INT (3),
22174 const0_rtx, const1_rtx));
22175 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
22176 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
22177 t1 = t4;
22178 t2 = t3;
22179 goto merge_two;
22181 default:
22182 gcc_assert (GET_MODE_SIZE (mode) <= 16);
22183 break;
22187 if (TARGET_XOP)
22189 /* The XOP VPPERM insn supports three inputs. By ignoring the
22190 one_operand_shuffle special case, we avoid creating another
22191 set of constant vectors in memory. */
22192 one_operand_shuffle = false;
22194 /* mask = mask & {2*w-1, ...} */
22195 vt = GEN_INT (2*w - 1);
22197 else
22199 /* mask = mask & {w-1, ...} */
22200 vt = GEN_INT (w - 1);
22203 for (i = 0; i < w; i++)
22204 vec[i] = vt;
22205 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22206 mask = expand_simple_binop (maskmode, AND, mask, vt,
22207 NULL_RTX, 0, OPTAB_DIRECT);
22209 /* For non-QImode operations, convert the word permutation control
22210 into a byte permutation control. */
22211 if (mode != V16QImode)
22213 mask = expand_simple_binop (maskmode, ASHIFT, mask,
22214 GEN_INT (exact_log2 (e)),
22215 NULL_RTX, 0, OPTAB_DIRECT);
22217 /* Convert mask to vector of chars. */
22218 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
22220 /* Replicate each of the input bytes into byte positions:
22221 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
22222 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
22223 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
22224 for (i = 0; i < 16; ++i)
22225 vec[i] = GEN_INT (i/e * e);
22226 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22227 vt = validize_mem (force_const_mem (V16QImode, vt));
22228 if (TARGET_XOP)
22229 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
22230 else
22231 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
22233 /* Convert it into the byte positions by doing
22234 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
22235 for (i = 0; i < 16; ++i)
22236 vec[i] = GEN_INT (i % e);
22237 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22238 vt = validize_mem (force_const_mem (V16QImode, vt));
22239 emit_insn (gen_addv16qi3 (mask, mask, vt));
22242 /* The actual shuffle operations all operate on V16QImode. */
22243 op0 = gen_lowpart (V16QImode, op0);
22244 op1 = gen_lowpart (V16QImode, op1);
22246 if (TARGET_XOP)
22248 if (GET_MODE (target) != V16QImode)
22249 target = gen_reg_rtx (V16QImode);
22250 emit_insn (gen_xop_pperm (target, op0, op1, mask));
22251 if (target != operands[0])
22252 emit_move_insn (operands[0],
22253 gen_lowpart (GET_MODE (operands[0]), target));
22255 else if (one_operand_shuffle)
22257 if (GET_MODE (target) != V16QImode)
22258 target = gen_reg_rtx (V16QImode);
22259 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
22260 if (target != operands[0])
22261 emit_move_insn (operands[0],
22262 gen_lowpart (GET_MODE (operands[0]), target));
22264 else
22266 rtx xops[6];
22267 bool ok;
22269 /* Shuffle the two input vectors independently. */
22270 t1 = gen_reg_rtx (V16QImode);
22271 t2 = gen_reg_rtx (V16QImode);
22272 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
22273 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
22275 merge_two:
22276 /* Then merge them together. The key is whether any given control
22277 element contained a bit set that indicates the second word. */
22278 mask = operands[3];
22279 vt = GEN_INT (w);
22280 if (maskmode == V2DImode && !TARGET_SSE4_1)
22282 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
22283 more shuffle to convert the V2DI input mask into a V4SI
22284 input mask. At which point the masking that expand_int_vcond
22285 will work as desired. */
22286 rtx t3 = gen_reg_rtx (V4SImode);
22287 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
22288 const0_rtx, const0_rtx,
22289 const2_rtx, const2_rtx));
22290 mask = t3;
22291 maskmode = V4SImode;
22292 e = w = 4;
22295 for (i = 0; i < w; i++)
22296 vec[i] = vt;
22297 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22298 vt = force_reg (maskmode, vt);
22299 mask = expand_simple_binop (maskmode, AND, mask, vt,
22300 NULL_RTX, 0, OPTAB_DIRECT);
22302 if (GET_MODE (target) != mode)
22303 target = gen_reg_rtx (mode);
22304 xops[0] = target;
22305 xops[1] = gen_lowpart (mode, t2);
22306 xops[2] = gen_lowpart (mode, t1);
22307 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
22308 xops[4] = mask;
22309 xops[5] = vt;
22310 ok = ix86_expand_int_vcond (xops);
22311 gcc_assert (ok);
22312 if (target != operands[0])
22313 emit_move_insn (operands[0],
22314 gen_lowpart (GET_MODE (operands[0]), target));
22318 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
22319 true if we should do zero extension, else sign extension. HIGH_P is
22320 true if we want the N/2 high elements, else the low elements. */
22322 void
22323 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
22325 machine_mode imode = GET_MODE (src);
22326 rtx tmp;
22328 if (TARGET_SSE4_1)
22330 rtx (*unpack)(rtx, rtx);
22331 rtx (*extract)(rtx, rtx) = NULL;
22332 machine_mode halfmode = BLKmode;
22334 switch (imode)
22336 case V64QImode:
22337 if (unsigned_p)
22338 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
22339 else
22340 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
22341 halfmode = V32QImode;
22342 extract
22343 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
22344 break;
22345 case V32QImode:
22346 if (unsigned_p)
22347 unpack = gen_avx2_zero_extendv16qiv16hi2;
22348 else
22349 unpack = gen_avx2_sign_extendv16qiv16hi2;
22350 halfmode = V16QImode;
22351 extract
22352 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22353 break;
22354 case V32HImode:
22355 if (unsigned_p)
22356 unpack = gen_avx512f_zero_extendv16hiv16si2;
22357 else
22358 unpack = gen_avx512f_sign_extendv16hiv16si2;
22359 halfmode = V16HImode;
22360 extract
22361 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22362 break;
22363 case V16HImode:
22364 if (unsigned_p)
22365 unpack = gen_avx2_zero_extendv8hiv8si2;
22366 else
22367 unpack = gen_avx2_sign_extendv8hiv8si2;
22368 halfmode = V8HImode;
22369 extract
22370 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22371 break;
22372 case V16SImode:
22373 if (unsigned_p)
22374 unpack = gen_avx512f_zero_extendv8siv8di2;
22375 else
22376 unpack = gen_avx512f_sign_extendv8siv8di2;
22377 halfmode = V8SImode;
22378 extract
22379 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22380 break;
22381 case V8SImode:
22382 if (unsigned_p)
22383 unpack = gen_avx2_zero_extendv4siv4di2;
22384 else
22385 unpack = gen_avx2_sign_extendv4siv4di2;
22386 halfmode = V4SImode;
22387 extract
22388 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22389 break;
22390 case V16QImode:
22391 if (unsigned_p)
22392 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22393 else
22394 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22395 break;
22396 case V8HImode:
22397 if (unsigned_p)
22398 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22399 else
22400 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22401 break;
22402 case V4SImode:
22403 if (unsigned_p)
22404 unpack = gen_sse4_1_zero_extendv2siv2di2;
22405 else
22406 unpack = gen_sse4_1_sign_extendv2siv2di2;
22407 break;
22408 default:
22409 gcc_unreachable ();
22412 if (GET_MODE_SIZE (imode) >= 32)
22414 tmp = gen_reg_rtx (halfmode);
22415 emit_insn (extract (tmp, src));
22417 else if (high_p)
22419 /* Shift higher 8 bytes to lower 8 bytes. */
22420 tmp = gen_reg_rtx (V1TImode);
22421 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22422 GEN_INT (64)));
22423 tmp = gen_lowpart (imode, tmp);
22425 else
22426 tmp = src;
22428 emit_insn (unpack (dest, tmp));
22430 else
22432 rtx (*unpack)(rtx, rtx, rtx);
22434 switch (imode)
22436 case V16QImode:
22437 if (high_p)
22438 unpack = gen_vec_interleave_highv16qi;
22439 else
22440 unpack = gen_vec_interleave_lowv16qi;
22441 break;
22442 case V8HImode:
22443 if (high_p)
22444 unpack = gen_vec_interleave_highv8hi;
22445 else
22446 unpack = gen_vec_interleave_lowv8hi;
22447 break;
22448 case V4SImode:
22449 if (high_p)
22450 unpack = gen_vec_interleave_highv4si;
22451 else
22452 unpack = gen_vec_interleave_lowv4si;
22453 break;
22454 default:
22455 gcc_unreachable ();
22458 if (unsigned_p)
22459 tmp = force_reg (imode, CONST0_RTX (imode));
22460 else
22461 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22462 src, pc_rtx, pc_rtx);
22464 rtx tmp2 = gen_reg_rtx (imode);
22465 emit_insn (unpack (tmp2, src, tmp));
22466 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22470 /* Expand conditional increment or decrement using adb/sbb instructions.
22471 The default case using setcc followed by the conditional move can be
22472 done by generic code. */
22473 bool
22474 ix86_expand_int_addcc (rtx operands[])
22476 enum rtx_code code = GET_CODE (operands[1]);
22477 rtx flags;
22478 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22479 rtx compare_op;
22480 rtx val = const0_rtx;
22481 bool fpcmp = false;
22482 machine_mode mode;
22483 rtx op0 = XEXP (operands[1], 0);
22484 rtx op1 = XEXP (operands[1], 1);
22486 if (operands[3] != const1_rtx
22487 && operands[3] != constm1_rtx)
22488 return false;
22489 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22490 return false;
22491 code = GET_CODE (compare_op);
22493 flags = XEXP (compare_op, 0);
22495 if (GET_MODE (flags) == CCFPmode
22496 || GET_MODE (flags) == CCFPUmode)
22498 fpcmp = true;
22499 code = ix86_fp_compare_code_to_integer (code);
22502 if (code != LTU)
22504 val = constm1_rtx;
22505 if (fpcmp)
22506 PUT_CODE (compare_op,
22507 reverse_condition_maybe_unordered
22508 (GET_CODE (compare_op)));
22509 else
22510 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22513 mode = GET_MODE (operands[0]);
22515 /* Construct either adc or sbb insn. */
22516 if ((code == LTU) == (operands[3] == constm1_rtx))
22518 switch (mode)
22520 case QImode:
22521 insn = gen_subqi3_carry;
22522 break;
22523 case HImode:
22524 insn = gen_subhi3_carry;
22525 break;
22526 case SImode:
22527 insn = gen_subsi3_carry;
22528 break;
22529 case DImode:
22530 insn = gen_subdi3_carry;
22531 break;
22532 default:
22533 gcc_unreachable ();
22536 else
22538 switch (mode)
22540 case QImode:
22541 insn = gen_addqi3_carry;
22542 break;
22543 case HImode:
22544 insn = gen_addhi3_carry;
22545 break;
22546 case SImode:
22547 insn = gen_addsi3_carry;
22548 break;
22549 case DImode:
22550 insn = gen_adddi3_carry;
22551 break;
22552 default:
22553 gcc_unreachable ();
22556 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22558 return true;
22562 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22563 but works for floating pointer parameters and nonoffsetable memories.
22564 For pushes, it returns just stack offsets; the values will be saved
22565 in the right order. Maximally three parts are generated. */
22567 static int
22568 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
22570 int size;
22572 if (!TARGET_64BIT)
22573 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22574 else
22575 size = (GET_MODE_SIZE (mode) + 4) / 8;
22577 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22578 gcc_assert (size >= 2 && size <= 4);
22580 /* Optimize constant pool reference to immediates. This is used by fp
22581 moves, that force all constants to memory to allow combining. */
22582 if (MEM_P (operand) && MEM_READONLY_P (operand))
22584 rtx tmp = maybe_get_pool_constant (operand);
22585 if (tmp)
22586 operand = tmp;
22589 if (MEM_P (operand) && !offsettable_memref_p (operand))
22591 /* The only non-offsetable memories we handle are pushes. */
22592 int ok = push_operand (operand, VOIDmode);
22594 gcc_assert (ok);
22596 operand = copy_rtx (operand);
22597 PUT_MODE (operand, word_mode);
22598 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22599 return size;
22602 if (GET_CODE (operand) == CONST_VECTOR)
22604 machine_mode imode = int_mode_for_mode (mode);
22605 /* Caution: if we looked through a constant pool memory above,
22606 the operand may actually have a different mode now. That's
22607 ok, since we want to pun this all the way back to an integer. */
22608 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22609 gcc_assert (operand != NULL);
22610 mode = imode;
22613 if (!TARGET_64BIT)
22615 if (mode == DImode)
22616 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22617 else
22619 int i;
22621 if (REG_P (operand))
22623 gcc_assert (reload_completed);
22624 for (i = 0; i < size; i++)
22625 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22627 else if (offsettable_memref_p (operand))
22629 operand = adjust_address (operand, SImode, 0);
22630 parts[0] = operand;
22631 for (i = 1; i < size; i++)
22632 parts[i] = adjust_address (operand, SImode, 4 * i);
22634 else if (GET_CODE (operand) == CONST_DOUBLE)
22636 REAL_VALUE_TYPE r;
22637 long l[4];
22639 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22640 switch (mode)
22642 case TFmode:
22643 real_to_target (l, &r, mode);
22644 parts[3] = gen_int_mode (l[3], SImode);
22645 parts[2] = gen_int_mode (l[2], SImode);
22646 break;
22647 case XFmode:
22648 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22649 long double may not be 80-bit. */
22650 real_to_target (l, &r, mode);
22651 parts[2] = gen_int_mode (l[2], SImode);
22652 break;
22653 case DFmode:
22654 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22655 break;
22656 default:
22657 gcc_unreachable ();
22659 parts[1] = gen_int_mode (l[1], SImode);
22660 parts[0] = gen_int_mode (l[0], SImode);
22662 else
22663 gcc_unreachable ();
22666 else
22668 if (mode == TImode)
22669 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22670 if (mode == XFmode || mode == TFmode)
22672 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22673 if (REG_P (operand))
22675 gcc_assert (reload_completed);
22676 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22677 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22679 else if (offsettable_memref_p (operand))
22681 operand = adjust_address (operand, DImode, 0);
22682 parts[0] = operand;
22683 parts[1] = adjust_address (operand, upper_mode, 8);
22685 else if (GET_CODE (operand) == CONST_DOUBLE)
22687 REAL_VALUE_TYPE r;
22688 long l[4];
22690 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22691 real_to_target (l, &r, mode);
22693 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22694 if (HOST_BITS_PER_WIDE_INT >= 64)
22695 parts[0]
22696 = gen_int_mode
22697 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22698 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22699 DImode);
22700 else
22701 parts[0] = immed_double_const (l[0], l[1], DImode);
22703 if (upper_mode == SImode)
22704 parts[1] = gen_int_mode (l[2], SImode);
22705 else if (HOST_BITS_PER_WIDE_INT >= 64)
22706 parts[1]
22707 = gen_int_mode
22708 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22709 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22710 DImode);
22711 else
22712 parts[1] = immed_double_const (l[2], l[3], DImode);
22714 else
22715 gcc_unreachable ();
22719 return size;
22722 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22723 Return false when normal moves are needed; true when all required
22724 insns have been emitted. Operands 2-4 contain the input values
22725 int the correct order; operands 5-7 contain the output values. */
22727 void
22728 ix86_split_long_move (rtx operands[])
22730 rtx part[2][4];
22731 int nparts, i, j;
22732 int push = 0;
22733 int collisions = 0;
22734 machine_mode mode = GET_MODE (operands[0]);
22735 bool collisionparts[4];
22737 /* The DFmode expanders may ask us to move double.
22738 For 64bit target this is single move. By hiding the fact
22739 here we simplify i386.md splitters. */
22740 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22742 /* Optimize constant pool reference to immediates. This is used by
22743 fp moves, that force all constants to memory to allow combining. */
22745 if (MEM_P (operands[1])
22746 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22747 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22748 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22749 if (push_operand (operands[0], VOIDmode))
22751 operands[0] = copy_rtx (operands[0]);
22752 PUT_MODE (operands[0], word_mode);
22754 else
22755 operands[0] = gen_lowpart (DImode, operands[0]);
22756 operands[1] = gen_lowpart (DImode, operands[1]);
22757 emit_move_insn (operands[0], operands[1]);
22758 return;
22761 /* The only non-offsettable memory we handle is push. */
22762 if (push_operand (operands[0], VOIDmode))
22763 push = 1;
22764 else
22765 gcc_assert (!MEM_P (operands[0])
22766 || offsettable_memref_p (operands[0]));
22768 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22769 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22771 /* When emitting push, take care for source operands on the stack. */
22772 if (push && MEM_P (operands[1])
22773 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22775 rtx src_base = XEXP (part[1][nparts - 1], 0);
22777 /* Compensate for the stack decrement by 4. */
22778 if (!TARGET_64BIT && nparts == 3
22779 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22780 src_base = plus_constant (Pmode, src_base, 4);
22782 /* src_base refers to the stack pointer and is
22783 automatically decreased by emitted push. */
22784 for (i = 0; i < nparts; i++)
22785 part[1][i] = change_address (part[1][i],
22786 GET_MODE (part[1][i]), src_base);
22789 /* We need to do copy in the right order in case an address register
22790 of the source overlaps the destination. */
22791 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22793 rtx tmp;
22795 for (i = 0; i < nparts; i++)
22797 collisionparts[i]
22798 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22799 if (collisionparts[i])
22800 collisions++;
22803 /* Collision in the middle part can be handled by reordering. */
22804 if (collisions == 1 && nparts == 3 && collisionparts [1])
22806 std::swap (part[0][1], part[0][2]);
22807 std::swap (part[1][1], part[1][2]);
22809 else if (collisions == 1
22810 && nparts == 4
22811 && (collisionparts [1] || collisionparts [2]))
22813 if (collisionparts [1])
22815 std::swap (part[0][1], part[0][2]);
22816 std::swap (part[1][1], part[1][2]);
22818 else
22820 std::swap (part[0][2], part[0][3]);
22821 std::swap (part[1][2], part[1][3]);
22825 /* If there are more collisions, we can't handle it by reordering.
22826 Do an lea to the last part and use only one colliding move. */
22827 else if (collisions > 1)
22829 rtx base;
22831 collisions = 1;
22833 base = part[0][nparts - 1];
22835 /* Handle the case when the last part isn't valid for lea.
22836 Happens in 64-bit mode storing the 12-byte XFmode. */
22837 if (GET_MODE (base) != Pmode)
22838 base = gen_rtx_REG (Pmode, REGNO (base));
22840 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22841 part[1][0] = replace_equiv_address (part[1][0], base);
22842 for (i = 1; i < nparts; i++)
22844 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22845 part[1][i] = replace_equiv_address (part[1][i], tmp);
22850 if (push)
22852 if (!TARGET_64BIT)
22854 if (nparts == 3)
22856 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22857 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22858 stack_pointer_rtx, GEN_INT (-4)));
22859 emit_move_insn (part[0][2], part[1][2]);
22861 else if (nparts == 4)
22863 emit_move_insn (part[0][3], part[1][3]);
22864 emit_move_insn (part[0][2], part[1][2]);
22867 else
22869 /* In 64bit mode we don't have 32bit push available. In case this is
22870 register, it is OK - we will just use larger counterpart. We also
22871 retype memory - these comes from attempt to avoid REX prefix on
22872 moving of second half of TFmode value. */
22873 if (GET_MODE (part[1][1]) == SImode)
22875 switch (GET_CODE (part[1][1]))
22877 case MEM:
22878 part[1][1] = adjust_address (part[1][1], DImode, 0);
22879 break;
22881 case REG:
22882 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22883 break;
22885 default:
22886 gcc_unreachable ();
22889 if (GET_MODE (part[1][0]) == SImode)
22890 part[1][0] = part[1][1];
22893 emit_move_insn (part[0][1], part[1][1]);
22894 emit_move_insn (part[0][0], part[1][0]);
22895 return;
22898 /* Choose correct order to not overwrite the source before it is copied. */
22899 if ((REG_P (part[0][0])
22900 && REG_P (part[1][1])
22901 && (REGNO (part[0][0]) == REGNO (part[1][1])
22902 || (nparts == 3
22903 && REGNO (part[0][0]) == REGNO (part[1][2]))
22904 || (nparts == 4
22905 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22906 || (collisions > 0
22907 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22909 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22911 operands[2 + i] = part[0][j];
22912 operands[6 + i] = part[1][j];
22915 else
22917 for (i = 0; i < nparts; i++)
22919 operands[2 + i] = part[0][i];
22920 operands[6 + i] = part[1][i];
22924 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22925 if (optimize_insn_for_size_p ())
22927 for (j = 0; j < nparts - 1; j++)
22928 if (CONST_INT_P (operands[6 + j])
22929 && operands[6 + j] != const0_rtx
22930 && REG_P (operands[2 + j]))
22931 for (i = j; i < nparts - 1; i++)
22932 if (CONST_INT_P (operands[7 + i])
22933 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22934 operands[7 + i] = operands[2 + j];
22937 for (i = 0; i < nparts; i++)
22938 emit_move_insn (operands[2 + i], operands[6 + i]);
22940 return;
22943 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22944 left shift by a constant, either using a single shift or
22945 a sequence of add instructions. */
22947 static void
22948 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
22950 rtx (*insn)(rtx, rtx, rtx);
22952 if (count == 1
22953 || (count * ix86_cost->add <= ix86_cost->shift_const
22954 && !optimize_insn_for_size_p ()))
22956 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22957 while (count-- > 0)
22958 emit_insn (insn (operand, operand, operand));
22960 else
22962 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22963 emit_insn (insn (operand, operand, GEN_INT (count)));
22967 void
22968 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
22970 rtx (*gen_ashl3)(rtx, rtx, rtx);
22971 rtx (*gen_shld)(rtx, rtx, rtx);
22972 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22974 rtx low[2], high[2];
22975 int count;
22977 if (CONST_INT_P (operands[2]))
22979 split_double_mode (mode, operands, 2, low, high);
22980 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22982 if (count >= half_width)
22984 emit_move_insn (high[0], low[1]);
22985 emit_move_insn (low[0], const0_rtx);
22987 if (count > half_width)
22988 ix86_expand_ashl_const (high[0], count - half_width, mode);
22990 else
22992 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22994 if (!rtx_equal_p (operands[0], operands[1]))
22995 emit_move_insn (operands[0], operands[1]);
22997 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22998 ix86_expand_ashl_const (low[0], count, mode);
23000 return;
23003 split_double_mode (mode, operands, 1, low, high);
23005 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23007 if (operands[1] == const1_rtx)
23009 /* Assuming we've chosen a QImode capable registers, then 1 << N
23010 can be done with two 32/64-bit shifts, no branches, no cmoves. */
23011 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
23013 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
23015 ix86_expand_clear (low[0]);
23016 ix86_expand_clear (high[0]);
23017 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
23019 d = gen_lowpart (QImode, low[0]);
23020 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23021 s = gen_rtx_EQ (QImode, flags, const0_rtx);
23022 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23024 d = gen_lowpart (QImode, high[0]);
23025 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23026 s = gen_rtx_NE (QImode, flags, const0_rtx);
23027 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23030 /* Otherwise, we can get the same results by manually performing
23031 a bit extract operation on bit 5/6, and then performing the two
23032 shifts. The two methods of getting 0/1 into low/high are exactly
23033 the same size. Avoiding the shift in the bit extract case helps
23034 pentium4 a bit; no one else seems to care much either way. */
23035 else
23037 machine_mode half_mode;
23038 rtx (*gen_lshr3)(rtx, rtx, rtx);
23039 rtx (*gen_and3)(rtx, rtx, rtx);
23040 rtx (*gen_xor3)(rtx, rtx, rtx);
23041 HOST_WIDE_INT bits;
23042 rtx x;
23044 if (mode == DImode)
23046 half_mode = SImode;
23047 gen_lshr3 = gen_lshrsi3;
23048 gen_and3 = gen_andsi3;
23049 gen_xor3 = gen_xorsi3;
23050 bits = 5;
23052 else
23054 half_mode = DImode;
23055 gen_lshr3 = gen_lshrdi3;
23056 gen_and3 = gen_anddi3;
23057 gen_xor3 = gen_xordi3;
23058 bits = 6;
23061 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
23062 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
23063 else
23064 x = gen_lowpart (half_mode, operands[2]);
23065 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
23067 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
23068 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
23069 emit_move_insn (low[0], high[0]);
23070 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
23073 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23074 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
23075 return;
23078 if (operands[1] == constm1_rtx)
23080 /* For -1 << N, we can avoid the shld instruction, because we
23081 know that we're shifting 0...31/63 ones into a -1. */
23082 emit_move_insn (low[0], constm1_rtx);
23083 if (optimize_insn_for_size_p ())
23084 emit_move_insn (high[0], low[0]);
23085 else
23086 emit_move_insn (high[0], constm1_rtx);
23088 else
23090 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23092 if (!rtx_equal_p (operands[0], operands[1]))
23093 emit_move_insn (operands[0], operands[1]);
23095 split_double_mode (mode, operands, 1, low, high);
23096 emit_insn (gen_shld (high[0], low[0], operands[2]));
23099 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23101 if (TARGET_CMOVE && scratch)
23103 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23104 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23106 ix86_expand_clear (scratch);
23107 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
23109 else
23111 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23112 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23114 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
23118 void
23119 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
23121 rtx (*gen_ashr3)(rtx, rtx, rtx)
23122 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
23123 rtx (*gen_shrd)(rtx, rtx, rtx);
23124 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23126 rtx low[2], high[2];
23127 int count;
23129 if (CONST_INT_P (operands[2]))
23131 split_double_mode (mode, operands, 2, low, high);
23132 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23134 if (count == GET_MODE_BITSIZE (mode) - 1)
23136 emit_move_insn (high[0], high[1]);
23137 emit_insn (gen_ashr3 (high[0], high[0],
23138 GEN_INT (half_width - 1)));
23139 emit_move_insn (low[0], high[0]);
23142 else if (count >= half_width)
23144 emit_move_insn (low[0], high[1]);
23145 emit_move_insn (high[0], low[0]);
23146 emit_insn (gen_ashr3 (high[0], high[0],
23147 GEN_INT (half_width - 1)));
23149 if (count > half_width)
23150 emit_insn (gen_ashr3 (low[0], low[0],
23151 GEN_INT (count - half_width)));
23153 else
23155 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23157 if (!rtx_equal_p (operands[0], operands[1]))
23158 emit_move_insn (operands[0], operands[1]);
23160 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23161 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
23164 else
23166 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23168 if (!rtx_equal_p (operands[0], operands[1]))
23169 emit_move_insn (operands[0], operands[1]);
23171 split_double_mode (mode, operands, 1, low, high);
23173 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23174 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
23176 if (TARGET_CMOVE && scratch)
23178 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23179 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23181 emit_move_insn (scratch, high[0]);
23182 emit_insn (gen_ashr3 (scratch, scratch,
23183 GEN_INT (half_width - 1)));
23184 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23185 scratch));
23187 else
23189 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
23190 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
23192 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
23197 void
23198 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
23200 rtx (*gen_lshr3)(rtx, rtx, rtx)
23201 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
23202 rtx (*gen_shrd)(rtx, rtx, rtx);
23203 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23205 rtx low[2], high[2];
23206 int count;
23208 if (CONST_INT_P (operands[2]))
23210 split_double_mode (mode, operands, 2, low, high);
23211 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23213 if (count >= half_width)
23215 emit_move_insn (low[0], high[1]);
23216 ix86_expand_clear (high[0]);
23218 if (count > half_width)
23219 emit_insn (gen_lshr3 (low[0], low[0],
23220 GEN_INT (count - half_width)));
23222 else
23224 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23226 if (!rtx_equal_p (operands[0], operands[1]))
23227 emit_move_insn (operands[0], operands[1]);
23229 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23230 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
23233 else
23235 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23237 if (!rtx_equal_p (operands[0], operands[1]))
23238 emit_move_insn (operands[0], operands[1]);
23240 split_double_mode (mode, operands, 1, low, high);
23242 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23243 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
23245 if (TARGET_CMOVE && scratch)
23247 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23248 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23250 ix86_expand_clear (scratch);
23251 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23252 scratch));
23254 else
23256 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23257 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23259 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
23264 /* Predict just emitted jump instruction to be taken with probability PROB. */
23265 static void
23266 predict_jump (int prob)
23268 rtx insn = get_last_insn ();
23269 gcc_assert (JUMP_P (insn));
23270 add_int_reg_note (insn, REG_BR_PROB, prob);
23273 /* Helper function for the string operations below. Dest VARIABLE whether
23274 it is aligned to VALUE bytes. If true, jump to the label. */
23275 static rtx_code_label *
23276 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
23278 rtx_code_label *label = gen_label_rtx ();
23279 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
23280 if (GET_MODE (variable) == DImode)
23281 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
23282 else
23283 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
23284 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
23285 1, label);
23286 if (epilogue)
23287 predict_jump (REG_BR_PROB_BASE * 50 / 100);
23288 else
23289 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23290 return label;
23293 /* Adjust COUNTER by the VALUE. */
23294 static void
23295 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
23297 rtx (*gen_add)(rtx, rtx, rtx)
23298 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
23300 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
23303 /* Zero extend possibly SImode EXP to Pmode register. */
23305 ix86_zero_extend_to_Pmode (rtx exp)
23307 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
23310 /* Divide COUNTREG by SCALE. */
23311 static rtx
23312 scale_counter (rtx countreg, int scale)
23314 rtx sc;
23316 if (scale == 1)
23317 return countreg;
23318 if (CONST_INT_P (countreg))
23319 return GEN_INT (INTVAL (countreg) / scale);
23320 gcc_assert (REG_P (countreg));
23322 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
23323 GEN_INT (exact_log2 (scale)),
23324 NULL, 1, OPTAB_DIRECT);
23325 return sc;
23328 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
23329 DImode for constant loop counts. */
23331 static machine_mode
23332 counter_mode (rtx count_exp)
23334 if (GET_MODE (count_exp) != VOIDmode)
23335 return GET_MODE (count_exp);
23336 if (!CONST_INT_P (count_exp))
23337 return Pmode;
23338 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
23339 return DImode;
23340 return SImode;
23343 /* Copy the address to a Pmode register. This is used for x32 to
23344 truncate DImode TLS address to a SImode register. */
23346 static rtx
23347 ix86_copy_addr_to_reg (rtx addr)
23349 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23350 return copy_addr_to_reg (addr);
23351 else
23353 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23354 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
23358 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23359 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23360 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
23361 memory by VALUE (supposed to be in MODE).
23363 The size is rounded down to whole number of chunk size moved at once.
23364 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
23367 static void
23368 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23369 rtx destptr, rtx srcptr, rtx value,
23370 rtx count, machine_mode mode, int unroll,
23371 int expected_size, bool issetmem)
23373 rtx_code_label *out_label, *top_label;
23374 rtx iter, tmp;
23375 machine_mode iter_mode = counter_mode (count);
23376 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23377 rtx piece_size = GEN_INT (piece_size_n);
23378 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23379 rtx size;
23380 int i;
23382 top_label = gen_label_rtx ();
23383 out_label = gen_label_rtx ();
23384 iter = gen_reg_rtx (iter_mode);
23386 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23387 NULL, 1, OPTAB_DIRECT);
23388 /* Those two should combine. */
23389 if (piece_size == const1_rtx)
23391 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23392 true, out_label);
23393 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23395 emit_move_insn (iter, const0_rtx);
23397 emit_label (top_label);
23399 tmp = convert_modes (Pmode, iter_mode, iter, true);
23401 /* This assert could be relaxed - in this case we'll need to compute
23402 smallest power of two, containing in PIECE_SIZE_N and pass it to
23403 offset_address. */
23404 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23405 destmem = offset_address (destmem, tmp, piece_size_n);
23406 destmem = adjust_address (destmem, mode, 0);
23408 if (!issetmem)
23410 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23411 srcmem = adjust_address (srcmem, mode, 0);
23413 /* When unrolling for chips that reorder memory reads and writes,
23414 we can save registers by using single temporary.
23415 Also using 4 temporaries is overkill in 32bit mode. */
23416 if (!TARGET_64BIT && 0)
23418 for (i = 0; i < unroll; i++)
23420 if (i)
23422 destmem =
23423 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23424 srcmem =
23425 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23427 emit_move_insn (destmem, srcmem);
23430 else
23432 rtx tmpreg[4];
23433 gcc_assert (unroll <= 4);
23434 for (i = 0; i < unroll; i++)
23436 tmpreg[i] = gen_reg_rtx (mode);
23437 if (i)
23439 srcmem =
23440 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23442 emit_move_insn (tmpreg[i], srcmem);
23444 for (i = 0; i < unroll; i++)
23446 if (i)
23448 destmem =
23449 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23451 emit_move_insn (destmem, tmpreg[i]);
23455 else
23456 for (i = 0; i < unroll; i++)
23458 if (i)
23459 destmem =
23460 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23461 emit_move_insn (destmem, value);
23464 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23465 true, OPTAB_LIB_WIDEN);
23466 if (tmp != iter)
23467 emit_move_insn (iter, tmp);
23469 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23470 true, top_label);
23471 if (expected_size != -1)
23473 expected_size /= GET_MODE_SIZE (mode) * unroll;
23474 if (expected_size == 0)
23475 predict_jump (0);
23476 else if (expected_size > REG_BR_PROB_BASE)
23477 predict_jump (REG_BR_PROB_BASE - 1);
23478 else
23479 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23481 else
23482 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23483 iter = ix86_zero_extend_to_Pmode (iter);
23484 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23485 true, OPTAB_LIB_WIDEN);
23486 if (tmp != destptr)
23487 emit_move_insn (destptr, tmp);
23488 if (!issetmem)
23490 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23491 true, OPTAB_LIB_WIDEN);
23492 if (tmp != srcptr)
23493 emit_move_insn (srcptr, tmp);
23495 emit_label (out_label);
23498 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23499 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23500 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23501 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23502 ORIG_VALUE is the original value passed to memset to fill the memory with.
23503 Other arguments have same meaning as for previous function. */
23505 static void
23506 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23507 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23508 rtx count,
23509 machine_mode mode, bool issetmem)
23511 rtx destexp;
23512 rtx srcexp;
23513 rtx countreg;
23514 HOST_WIDE_INT rounded_count;
23516 /* If possible, it is shorter to use rep movs.
23517 TODO: Maybe it is better to move this logic to decide_alg. */
23518 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23519 && (!issetmem || orig_value == const0_rtx))
23520 mode = SImode;
23522 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23523 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23525 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23526 GET_MODE_SIZE (mode)));
23527 if (mode != QImode)
23529 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23530 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23531 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23533 else
23534 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23535 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23537 rounded_count = (INTVAL (count)
23538 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23539 destmem = shallow_copy_rtx (destmem);
23540 set_mem_size (destmem, rounded_count);
23542 else if (MEM_SIZE_KNOWN_P (destmem))
23543 clear_mem_size (destmem);
23545 if (issetmem)
23547 value = force_reg (mode, gen_lowpart (mode, value));
23548 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23550 else
23552 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23553 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23554 if (mode != QImode)
23556 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23557 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23558 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23560 else
23561 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23562 if (CONST_INT_P (count))
23564 rounded_count = (INTVAL (count)
23565 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23566 srcmem = shallow_copy_rtx (srcmem);
23567 set_mem_size (srcmem, rounded_count);
23569 else
23571 if (MEM_SIZE_KNOWN_P (srcmem))
23572 clear_mem_size (srcmem);
23574 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23575 destexp, srcexp));
23579 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23580 DESTMEM.
23581 SRC is passed by pointer to be updated on return.
23582 Return value is updated DST. */
23583 static rtx
23584 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23585 HOST_WIDE_INT size_to_move)
23587 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23588 enum insn_code code;
23589 machine_mode move_mode;
23590 int piece_size, i;
23592 /* Find the widest mode in which we could perform moves.
23593 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23594 it until move of such size is supported. */
23595 piece_size = 1 << floor_log2 (size_to_move);
23596 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23597 code = optab_handler (mov_optab, move_mode);
23598 while (code == CODE_FOR_nothing && piece_size > 1)
23600 piece_size >>= 1;
23601 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23602 code = optab_handler (mov_optab, move_mode);
23605 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23606 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23607 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23609 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23610 move_mode = mode_for_vector (word_mode, nunits);
23611 code = optab_handler (mov_optab, move_mode);
23612 if (code == CODE_FOR_nothing)
23614 move_mode = word_mode;
23615 piece_size = GET_MODE_SIZE (move_mode);
23616 code = optab_handler (mov_optab, move_mode);
23619 gcc_assert (code != CODE_FOR_nothing);
23621 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23622 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23624 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23625 gcc_assert (size_to_move % piece_size == 0);
23626 adjust = GEN_INT (piece_size);
23627 for (i = 0; i < size_to_move; i += piece_size)
23629 /* We move from memory to memory, so we'll need to do it via
23630 a temporary register. */
23631 tempreg = gen_reg_rtx (move_mode);
23632 emit_insn (GEN_FCN (code) (tempreg, src));
23633 emit_insn (GEN_FCN (code) (dst, tempreg));
23635 emit_move_insn (destptr,
23636 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23637 emit_move_insn (srcptr,
23638 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23640 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23641 piece_size);
23642 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23643 piece_size);
23646 /* Update DST and SRC rtx. */
23647 *srcmem = src;
23648 return dst;
23651 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23652 static void
23653 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23654 rtx destptr, rtx srcptr, rtx count, int max_size)
23656 rtx src, dest;
23657 if (CONST_INT_P (count))
23659 HOST_WIDE_INT countval = INTVAL (count);
23660 HOST_WIDE_INT epilogue_size = countval % max_size;
23661 int i;
23663 /* For now MAX_SIZE should be a power of 2. This assert could be
23664 relaxed, but it'll require a bit more complicated epilogue
23665 expanding. */
23666 gcc_assert ((max_size & (max_size - 1)) == 0);
23667 for (i = max_size; i >= 1; i >>= 1)
23669 if (epilogue_size & i)
23670 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23672 return;
23674 if (max_size > 8)
23676 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23677 count, 1, OPTAB_DIRECT);
23678 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23679 count, QImode, 1, 4, false);
23680 return;
23683 /* When there are stringops, we can cheaply increase dest and src pointers.
23684 Otherwise we save code size by maintaining offset (zero is readily
23685 available from preceding rep operation) and using x86 addressing modes.
23687 if (TARGET_SINGLE_STRINGOP)
23689 if (max_size > 4)
23691 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23692 src = change_address (srcmem, SImode, srcptr);
23693 dest = change_address (destmem, SImode, destptr);
23694 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23695 emit_label (label);
23696 LABEL_NUSES (label) = 1;
23698 if (max_size > 2)
23700 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23701 src = change_address (srcmem, HImode, srcptr);
23702 dest = change_address (destmem, HImode, destptr);
23703 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23704 emit_label (label);
23705 LABEL_NUSES (label) = 1;
23707 if (max_size > 1)
23709 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23710 src = change_address (srcmem, QImode, srcptr);
23711 dest = change_address (destmem, QImode, destptr);
23712 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23713 emit_label (label);
23714 LABEL_NUSES (label) = 1;
23717 else
23719 rtx offset = force_reg (Pmode, const0_rtx);
23720 rtx tmp;
23722 if (max_size > 4)
23724 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23725 src = change_address (srcmem, SImode, srcptr);
23726 dest = change_address (destmem, SImode, destptr);
23727 emit_move_insn (dest, src);
23728 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23729 true, OPTAB_LIB_WIDEN);
23730 if (tmp != offset)
23731 emit_move_insn (offset, tmp);
23732 emit_label (label);
23733 LABEL_NUSES (label) = 1;
23735 if (max_size > 2)
23737 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23738 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23739 src = change_address (srcmem, HImode, tmp);
23740 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23741 dest = change_address (destmem, HImode, tmp);
23742 emit_move_insn (dest, src);
23743 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23744 true, OPTAB_LIB_WIDEN);
23745 if (tmp != offset)
23746 emit_move_insn (offset, tmp);
23747 emit_label (label);
23748 LABEL_NUSES (label) = 1;
23750 if (max_size > 1)
23752 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23753 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23754 src = change_address (srcmem, QImode, tmp);
23755 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23756 dest = change_address (destmem, QImode, tmp);
23757 emit_move_insn (dest, src);
23758 emit_label (label);
23759 LABEL_NUSES (label) = 1;
23764 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23765 with value PROMOTED_VAL.
23766 SRC is passed by pointer to be updated on return.
23767 Return value is updated DST. */
23768 static rtx
23769 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23770 HOST_WIDE_INT size_to_move)
23772 rtx dst = destmem, adjust;
23773 enum insn_code code;
23774 machine_mode move_mode;
23775 int piece_size, i;
23777 /* Find the widest mode in which we could perform moves.
23778 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23779 it until move of such size is supported. */
23780 move_mode = GET_MODE (promoted_val);
23781 if (move_mode == VOIDmode)
23782 move_mode = QImode;
23783 if (size_to_move < GET_MODE_SIZE (move_mode))
23785 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23786 promoted_val = gen_lowpart (move_mode, promoted_val);
23788 piece_size = GET_MODE_SIZE (move_mode);
23789 code = optab_handler (mov_optab, move_mode);
23790 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23792 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23794 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23795 gcc_assert (size_to_move % piece_size == 0);
23796 adjust = GEN_INT (piece_size);
23797 for (i = 0; i < size_to_move; i += piece_size)
23799 if (piece_size <= GET_MODE_SIZE (word_mode))
23801 emit_insn (gen_strset (destptr, dst, promoted_val));
23802 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23803 piece_size);
23804 continue;
23807 emit_insn (GEN_FCN (code) (dst, promoted_val));
23809 emit_move_insn (destptr,
23810 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23812 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23813 piece_size);
23816 /* Update DST rtx. */
23817 return dst;
23819 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23820 static void
23821 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23822 rtx count, int max_size)
23824 count =
23825 expand_simple_binop (counter_mode (count), AND, count,
23826 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23827 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23828 gen_lowpart (QImode, value), count, QImode,
23829 1, max_size / 2, true);
23832 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23833 static void
23834 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23835 rtx count, int max_size)
23837 rtx dest;
23839 if (CONST_INT_P (count))
23841 HOST_WIDE_INT countval = INTVAL (count);
23842 HOST_WIDE_INT epilogue_size = countval % max_size;
23843 int i;
23845 /* For now MAX_SIZE should be a power of 2. This assert could be
23846 relaxed, but it'll require a bit more complicated epilogue
23847 expanding. */
23848 gcc_assert ((max_size & (max_size - 1)) == 0);
23849 for (i = max_size; i >= 1; i >>= 1)
23851 if (epilogue_size & i)
23853 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23854 destmem = emit_memset (destmem, destptr, vec_value, i);
23855 else
23856 destmem = emit_memset (destmem, destptr, value, i);
23859 return;
23861 if (max_size > 32)
23863 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23864 return;
23866 if (max_size > 16)
23868 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23869 if (TARGET_64BIT)
23871 dest = change_address (destmem, DImode, destptr);
23872 emit_insn (gen_strset (destptr, dest, value));
23873 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23874 emit_insn (gen_strset (destptr, dest, value));
23876 else
23878 dest = change_address (destmem, SImode, destptr);
23879 emit_insn (gen_strset (destptr, dest, value));
23880 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23881 emit_insn (gen_strset (destptr, dest, value));
23882 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23883 emit_insn (gen_strset (destptr, dest, value));
23884 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23885 emit_insn (gen_strset (destptr, dest, value));
23887 emit_label (label);
23888 LABEL_NUSES (label) = 1;
23890 if (max_size > 8)
23892 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
23893 if (TARGET_64BIT)
23895 dest = change_address (destmem, DImode, destptr);
23896 emit_insn (gen_strset (destptr, dest, value));
23898 else
23900 dest = change_address (destmem, SImode, destptr);
23901 emit_insn (gen_strset (destptr, dest, value));
23902 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23903 emit_insn (gen_strset (destptr, dest, value));
23905 emit_label (label);
23906 LABEL_NUSES (label) = 1;
23908 if (max_size > 4)
23910 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23911 dest = change_address (destmem, SImode, destptr);
23912 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23913 emit_label (label);
23914 LABEL_NUSES (label) = 1;
23916 if (max_size > 2)
23918 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23919 dest = change_address (destmem, HImode, destptr);
23920 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23921 emit_label (label);
23922 LABEL_NUSES (label) = 1;
23924 if (max_size > 1)
23926 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23927 dest = change_address (destmem, QImode, destptr);
23928 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23929 emit_label (label);
23930 LABEL_NUSES (label) = 1;
23934 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23935 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23936 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23937 ignored.
23938 Return value is updated DESTMEM. */
23939 static rtx
23940 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23941 rtx destptr, rtx srcptr, rtx value,
23942 rtx vec_value, rtx count, int align,
23943 int desired_alignment, bool issetmem)
23945 int i;
23946 for (i = 1; i < desired_alignment; i <<= 1)
23948 if (align <= i)
23950 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
23951 if (issetmem)
23953 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23954 destmem = emit_memset (destmem, destptr, vec_value, i);
23955 else
23956 destmem = emit_memset (destmem, destptr, value, i);
23958 else
23959 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23960 ix86_adjust_counter (count, i);
23961 emit_label (label);
23962 LABEL_NUSES (label) = 1;
23963 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23966 return destmem;
23969 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23970 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23971 and jump to DONE_LABEL. */
23972 static void
23973 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23974 rtx destptr, rtx srcptr,
23975 rtx value, rtx vec_value,
23976 rtx count, int size,
23977 rtx done_label, bool issetmem)
23979 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
23980 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23981 rtx modesize;
23982 int n;
23984 /* If we do not have vector value to copy, we must reduce size. */
23985 if (issetmem)
23987 if (!vec_value)
23989 if (GET_MODE (value) == VOIDmode && size > 8)
23990 mode = Pmode;
23991 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23992 mode = GET_MODE (value);
23994 else
23995 mode = GET_MODE (vec_value), value = vec_value;
23997 else
23999 /* Choose appropriate vector mode. */
24000 if (size >= 32)
24001 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
24002 else if (size >= 16)
24003 mode = TARGET_SSE ? V16QImode : DImode;
24004 srcmem = change_address (srcmem, mode, srcptr);
24006 destmem = change_address (destmem, mode, destptr);
24007 modesize = GEN_INT (GET_MODE_SIZE (mode));
24008 gcc_assert (GET_MODE_SIZE (mode) <= size);
24009 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24011 if (issetmem)
24012 emit_move_insn (destmem, gen_lowpart (mode, value));
24013 else
24015 emit_move_insn (destmem, srcmem);
24016 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24018 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24021 destmem = offset_address (destmem, count, 1);
24022 destmem = offset_address (destmem, GEN_INT (-2 * size),
24023 GET_MODE_SIZE (mode));
24024 if (!issetmem)
24026 srcmem = offset_address (srcmem, count, 1);
24027 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
24028 GET_MODE_SIZE (mode));
24030 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24032 if (issetmem)
24033 emit_move_insn (destmem, gen_lowpart (mode, value));
24034 else
24036 emit_move_insn (destmem, srcmem);
24037 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24039 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24041 emit_jump_insn (gen_jump (done_label));
24042 emit_barrier ();
24044 emit_label (label);
24045 LABEL_NUSES (label) = 1;
24048 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
24049 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
24050 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
24051 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
24052 DONE_LABEL is a label after the whole copying sequence. The label is created
24053 on demand if *DONE_LABEL is NULL.
24054 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
24055 bounds after the initial copies.
24057 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
24058 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
24059 we will dispatch to a library call for large blocks.
24061 In pseudocode we do:
24063 if (COUNT < SIZE)
24065 Assume that SIZE is 4. Bigger sizes are handled analogously
24066 if (COUNT & 4)
24068 copy 4 bytes from SRCPTR to DESTPTR
24069 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
24070 goto done_label
24072 if (!COUNT)
24073 goto done_label;
24074 copy 1 byte from SRCPTR to DESTPTR
24075 if (COUNT & 2)
24077 copy 2 bytes from SRCPTR to DESTPTR
24078 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
24081 else
24083 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
24084 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
24086 OLD_DESPTR = DESTPTR;
24087 Align DESTPTR up to DESIRED_ALIGN
24088 SRCPTR += DESTPTR - OLD_DESTPTR
24089 COUNT -= DEST_PTR - OLD_DESTPTR
24090 if (DYNAMIC_CHECK)
24091 Round COUNT down to multiple of SIZE
24092 << optional caller supplied zero size guard is here >>
24093 << optional caller suppplied dynamic check is here >>
24094 << caller supplied main copy loop is here >>
24096 done_label:
24098 static void
24099 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
24100 rtx *destptr, rtx *srcptr,
24101 machine_mode mode,
24102 rtx value, rtx vec_value,
24103 rtx *count,
24104 rtx_code_label **done_label,
24105 int size,
24106 int desired_align,
24107 int align,
24108 unsigned HOST_WIDE_INT *min_size,
24109 bool dynamic_check,
24110 bool issetmem)
24112 rtx_code_label *loop_label = NULL, *label;
24113 int n;
24114 rtx modesize;
24115 int prolog_size = 0;
24116 rtx mode_value;
24118 /* Chose proper value to copy. */
24119 if (issetmem && VECTOR_MODE_P (mode))
24120 mode_value = vec_value;
24121 else
24122 mode_value = value;
24123 gcc_assert (GET_MODE_SIZE (mode) <= size);
24125 /* See if block is big or small, handle small blocks. */
24126 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
24128 int size2 = size;
24129 loop_label = gen_label_rtx ();
24131 if (!*done_label)
24132 *done_label = gen_label_rtx ();
24134 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
24135 1, loop_label);
24136 size2 >>= 1;
24138 /* Handle sizes > 3. */
24139 for (;size2 > 2; size2 >>= 1)
24140 expand_small_movmem_or_setmem (destmem, srcmem,
24141 *destptr, *srcptr,
24142 value, vec_value,
24143 *count,
24144 size2, *done_label, issetmem);
24145 /* Nothing to copy? Jump to DONE_LABEL if so */
24146 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
24147 1, *done_label);
24149 /* Do a byte copy. */
24150 destmem = change_address (destmem, QImode, *destptr);
24151 if (issetmem)
24152 emit_move_insn (destmem, gen_lowpart (QImode, value));
24153 else
24155 srcmem = change_address (srcmem, QImode, *srcptr);
24156 emit_move_insn (destmem, srcmem);
24159 /* Handle sizes 2 and 3. */
24160 label = ix86_expand_aligntest (*count, 2, false);
24161 destmem = change_address (destmem, HImode, *destptr);
24162 destmem = offset_address (destmem, *count, 1);
24163 destmem = offset_address (destmem, GEN_INT (-2), 2);
24164 if (issetmem)
24165 emit_move_insn (destmem, gen_lowpart (HImode, value));
24166 else
24168 srcmem = change_address (srcmem, HImode, *srcptr);
24169 srcmem = offset_address (srcmem, *count, 1);
24170 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
24171 emit_move_insn (destmem, srcmem);
24174 emit_label (label);
24175 LABEL_NUSES (label) = 1;
24176 emit_jump_insn (gen_jump (*done_label));
24177 emit_barrier ();
24179 else
24180 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
24181 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
24183 /* Start memcpy for COUNT >= SIZE. */
24184 if (loop_label)
24186 emit_label (loop_label);
24187 LABEL_NUSES (loop_label) = 1;
24190 /* Copy first desired_align bytes. */
24191 if (!issetmem)
24192 srcmem = change_address (srcmem, mode, *srcptr);
24193 destmem = change_address (destmem, mode, *destptr);
24194 modesize = GEN_INT (GET_MODE_SIZE (mode));
24195 for (n = 0; prolog_size < desired_align - align; n++)
24197 if (issetmem)
24198 emit_move_insn (destmem, mode_value);
24199 else
24201 emit_move_insn (destmem, srcmem);
24202 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24204 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24205 prolog_size += GET_MODE_SIZE (mode);
24209 /* Copy last SIZE bytes. */
24210 destmem = offset_address (destmem, *count, 1);
24211 destmem = offset_address (destmem,
24212 GEN_INT (-size - prolog_size),
24214 if (issetmem)
24215 emit_move_insn (destmem, mode_value);
24216 else
24218 srcmem = offset_address (srcmem, *count, 1);
24219 srcmem = offset_address (srcmem,
24220 GEN_INT (-size - prolog_size),
24222 emit_move_insn (destmem, srcmem);
24224 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
24226 destmem = offset_address (destmem, modesize, 1);
24227 if (issetmem)
24228 emit_move_insn (destmem, mode_value);
24229 else
24231 srcmem = offset_address (srcmem, modesize, 1);
24232 emit_move_insn (destmem, srcmem);
24236 /* Align destination. */
24237 if (desired_align > 1 && desired_align > align)
24239 rtx saveddest = *destptr;
24241 gcc_assert (desired_align <= size);
24242 /* Align destptr up, place it to new register. */
24243 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
24244 GEN_INT (prolog_size),
24245 NULL_RTX, 1, OPTAB_DIRECT);
24246 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
24247 GEN_INT (-desired_align),
24248 *destptr, 1, OPTAB_DIRECT);
24249 /* See how many bytes we skipped. */
24250 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
24251 *destptr,
24252 saveddest, 1, OPTAB_DIRECT);
24253 /* Adjust srcptr and count. */
24254 if (!issetmem)
24255 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
24256 *srcptr, 1, OPTAB_DIRECT);
24257 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24258 saveddest, *count, 1, OPTAB_DIRECT);
24259 /* We copied at most size + prolog_size. */
24260 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
24261 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
24262 else
24263 *min_size = 0;
24265 /* Our loops always round down the bock size, but for dispatch to library
24266 we need precise value. */
24267 if (dynamic_check)
24268 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
24269 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
24271 else
24273 gcc_assert (prolog_size == 0);
24274 /* Decrease count, so we won't end up copying last word twice. */
24275 if (!CONST_INT_P (*count))
24276 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24277 constm1_rtx, *count, 1, OPTAB_DIRECT);
24278 else
24279 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
24280 if (*min_size)
24281 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
24286 /* This function is like the previous one, except here we know how many bytes
24287 need to be copied. That allows us to update alignment not only of DST, which
24288 is returned, but also of SRC, which is passed as a pointer for that
24289 reason. */
24290 static rtx
24291 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
24292 rtx srcreg, rtx value, rtx vec_value,
24293 int desired_align, int align_bytes,
24294 bool issetmem)
24296 rtx src = NULL;
24297 rtx orig_dst = dst;
24298 rtx orig_src = NULL;
24299 int piece_size = 1;
24300 int copied_bytes = 0;
24302 if (!issetmem)
24304 gcc_assert (srcp != NULL);
24305 src = *srcp;
24306 orig_src = src;
24309 for (piece_size = 1;
24310 piece_size <= desired_align && copied_bytes < align_bytes;
24311 piece_size <<= 1)
24313 if (align_bytes & piece_size)
24315 if (issetmem)
24317 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
24318 dst = emit_memset (dst, destreg, vec_value, piece_size);
24319 else
24320 dst = emit_memset (dst, destreg, value, piece_size);
24322 else
24323 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
24324 copied_bytes += piece_size;
24327 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
24328 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24329 if (MEM_SIZE_KNOWN_P (orig_dst))
24330 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
24332 if (!issetmem)
24334 int src_align_bytes = get_mem_align_offset (src, desired_align
24335 * BITS_PER_UNIT);
24336 if (src_align_bytes >= 0)
24337 src_align_bytes = desired_align - src_align_bytes;
24338 if (src_align_bytes >= 0)
24340 unsigned int src_align;
24341 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
24343 if ((src_align_bytes & (src_align - 1))
24344 == (align_bytes & (src_align - 1)))
24345 break;
24347 if (src_align > (unsigned int) desired_align)
24348 src_align = desired_align;
24349 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24350 set_mem_align (src, src_align * BITS_PER_UNIT);
24352 if (MEM_SIZE_KNOWN_P (orig_src))
24353 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24354 *srcp = src;
24357 return dst;
24360 /* Return true if ALG can be used in current context.
24361 Assume we expand memset if MEMSET is true. */
24362 static bool
24363 alg_usable_p (enum stringop_alg alg, bool memset)
24365 if (alg == no_stringop)
24366 return false;
24367 if (alg == vector_loop)
24368 return TARGET_SSE || TARGET_AVX;
24369 /* Algorithms using the rep prefix want at least edi and ecx;
24370 additionally, memset wants eax and memcpy wants esi. Don't
24371 consider such algorithms if the user has appropriated those
24372 registers for their own purposes. */
24373 if (alg == rep_prefix_1_byte
24374 || alg == rep_prefix_4_byte
24375 || alg == rep_prefix_8_byte)
24376 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24377 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24378 return true;
24381 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
24382 static enum stringop_alg
24383 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24384 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24385 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24387 const struct stringop_algs * algs;
24388 bool optimize_for_speed;
24389 int max = 0;
24390 const struct processor_costs *cost;
24391 int i;
24392 bool any_alg_usable_p = false;
24394 *noalign = false;
24395 *dynamic_check = -1;
24397 /* Even if the string operation call is cold, we still might spend a lot
24398 of time processing large blocks. */
24399 if (optimize_function_for_size_p (cfun)
24400 || (optimize_insn_for_size_p ()
24401 && (max_size < 256
24402 || (expected_size != -1 && expected_size < 256))))
24403 optimize_for_speed = false;
24404 else
24405 optimize_for_speed = true;
24407 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24408 if (memset)
24409 algs = &cost->memset[TARGET_64BIT != 0];
24410 else
24411 algs = &cost->memcpy[TARGET_64BIT != 0];
24413 /* See maximal size for user defined algorithm. */
24414 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24416 enum stringop_alg candidate = algs->size[i].alg;
24417 bool usable = alg_usable_p (candidate, memset);
24418 any_alg_usable_p |= usable;
24420 if (candidate != libcall && candidate && usable)
24421 max = algs->size[i].max;
24424 /* If expected size is not known but max size is small enough
24425 so inline version is a win, set expected size into
24426 the range. */
24427 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24428 && expected_size == -1)
24429 expected_size = min_size / 2 + max_size / 2;
24431 /* If user specified the algorithm, honnor it if possible. */
24432 if (ix86_stringop_alg != no_stringop
24433 && alg_usable_p (ix86_stringop_alg, memset))
24434 return ix86_stringop_alg;
24435 /* rep; movq or rep; movl is the smallest variant. */
24436 else if (!optimize_for_speed)
24438 *noalign = true;
24439 if (!count || (count & 3) || (memset && !zero_memset))
24440 return alg_usable_p (rep_prefix_1_byte, memset)
24441 ? rep_prefix_1_byte : loop_1_byte;
24442 else
24443 return alg_usable_p (rep_prefix_4_byte, memset)
24444 ? rep_prefix_4_byte : loop;
24446 /* Very tiny blocks are best handled via the loop, REP is expensive to
24447 setup. */
24448 else if (expected_size != -1 && expected_size < 4)
24449 return loop_1_byte;
24450 else if (expected_size != -1)
24452 enum stringop_alg alg = libcall;
24453 bool alg_noalign = false;
24454 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24456 /* We get here if the algorithms that were not libcall-based
24457 were rep-prefix based and we are unable to use rep prefixes
24458 based on global register usage. Break out of the loop and
24459 use the heuristic below. */
24460 if (algs->size[i].max == 0)
24461 break;
24462 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24464 enum stringop_alg candidate = algs->size[i].alg;
24466 if (candidate != libcall && alg_usable_p (candidate, memset))
24468 alg = candidate;
24469 alg_noalign = algs->size[i].noalign;
24471 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24472 last non-libcall inline algorithm. */
24473 if (TARGET_INLINE_ALL_STRINGOPS)
24475 /* When the current size is best to be copied by a libcall,
24476 but we are still forced to inline, run the heuristic below
24477 that will pick code for medium sized blocks. */
24478 if (alg != libcall)
24480 *noalign = alg_noalign;
24481 return alg;
24483 else if (!any_alg_usable_p)
24484 break;
24486 else if (alg_usable_p (candidate, memset))
24488 *noalign = algs->size[i].noalign;
24489 return candidate;
24494 /* When asked to inline the call anyway, try to pick meaningful choice.
24495 We look for maximal size of block that is faster to copy by hand and
24496 take blocks of at most of that size guessing that average size will
24497 be roughly half of the block.
24499 If this turns out to be bad, we might simply specify the preferred
24500 choice in ix86_costs. */
24501 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24502 && (algs->unknown_size == libcall
24503 || !alg_usable_p (algs->unknown_size, memset)))
24505 enum stringop_alg alg;
24507 /* If there aren't any usable algorithms, then recursing on
24508 smaller sizes isn't going to find anything. Just return the
24509 simple byte-at-a-time copy loop. */
24510 if (!any_alg_usable_p)
24512 /* Pick something reasonable. */
24513 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24514 *dynamic_check = 128;
24515 return loop_1_byte;
24517 if (max <= 0)
24518 max = 4096;
24519 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24520 zero_memset, dynamic_check, noalign);
24521 gcc_assert (*dynamic_check == -1);
24522 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24523 *dynamic_check = max;
24524 else
24525 gcc_assert (alg != libcall);
24526 return alg;
24528 return (alg_usable_p (algs->unknown_size, memset)
24529 ? algs->unknown_size : libcall);
24532 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24533 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24534 static int
24535 decide_alignment (int align,
24536 enum stringop_alg alg,
24537 int expected_size,
24538 machine_mode move_mode)
24540 int desired_align = 0;
24542 gcc_assert (alg != no_stringop);
24544 if (alg == libcall)
24545 return 0;
24546 if (move_mode == VOIDmode)
24547 return 0;
24549 desired_align = GET_MODE_SIZE (move_mode);
24550 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24551 copying whole cacheline at once. */
24552 if (TARGET_PENTIUMPRO
24553 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24554 desired_align = 8;
24556 if (optimize_size)
24557 desired_align = 1;
24558 if (desired_align < align)
24559 desired_align = align;
24560 if (expected_size != -1 && expected_size < 4)
24561 desired_align = align;
24563 return desired_align;
24567 /* Helper function for memcpy. For QImode value 0xXY produce
24568 0xXYXYXYXY of wide specified by MODE. This is essentially
24569 a * 0x10101010, but we can do slightly better than
24570 synth_mult by unwinding the sequence by hand on CPUs with
24571 slow multiply. */
24572 static rtx
24573 promote_duplicated_reg (machine_mode mode, rtx val)
24575 machine_mode valmode = GET_MODE (val);
24576 rtx tmp;
24577 int nops = mode == DImode ? 3 : 2;
24579 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24580 if (val == const0_rtx)
24581 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24582 if (CONST_INT_P (val))
24584 HOST_WIDE_INT v = INTVAL (val) & 255;
24586 v |= v << 8;
24587 v |= v << 16;
24588 if (mode == DImode)
24589 v |= (v << 16) << 16;
24590 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24593 if (valmode == VOIDmode)
24594 valmode = QImode;
24595 if (valmode != QImode)
24596 val = gen_lowpart (QImode, val);
24597 if (mode == QImode)
24598 return val;
24599 if (!TARGET_PARTIAL_REG_STALL)
24600 nops--;
24601 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24602 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24603 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24604 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24606 rtx reg = convert_modes (mode, QImode, val, true);
24607 tmp = promote_duplicated_reg (mode, const1_rtx);
24608 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24609 OPTAB_DIRECT);
24611 else
24613 rtx reg = convert_modes (mode, QImode, val, true);
24615 if (!TARGET_PARTIAL_REG_STALL)
24616 if (mode == SImode)
24617 emit_insn (gen_movsi_insv_1 (reg, reg));
24618 else
24619 emit_insn (gen_movdi_insv_1 (reg, reg));
24620 else
24622 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24623 NULL, 1, OPTAB_DIRECT);
24624 reg =
24625 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24627 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24628 NULL, 1, OPTAB_DIRECT);
24629 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24630 if (mode == SImode)
24631 return reg;
24632 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24633 NULL, 1, OPTAB_DIRECT);
24634 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24635 return reg;
24639 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24640 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24641 alignment from ALIGN to DESIRED_ALIGN. */
24642 static rtx
24643 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24644 int align)
24646 rtx promoted_val;
24648 if (TARGET_64BIT
24649 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24650 promoted_val = promote_duplicated_reg (DImode, val);
24651 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24652 promoted_val = promote_duplicated_reg (SImode, val);
24653 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24654 promoted_val = promote_duplicated_reg (HImode, val);
24655 else
24656 promoted_val = val;
24658 return promoted_val;
24661 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24662 operations when profitable. The code depends upon architecture, block size
24663 and alignment, but always has one of the following overall structures:
24665 Aligned move sequence:
24667 1) Prologue guard: Conditional that jumps up to epilogues for small
24668 blocks that can be handled by epilogue alone. This is faster
24669 but also needed for correctness, since prologue assume the block
24670 is larger than the desired alignment.
24672 Optional dynamic check for size and libcall for large
24673 blocks is emitted here too, with -minline-stringops-dynamically.
24675 2) Prologue: copy first few bytes in order to get destination
24676 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24677 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24678 copied. We emit either a jump tree on power of two sized
24679 blocks, or a byte loop.
24681 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24682 with specified algorithm.
24684 4) Epilogue: code copying tail of the block that is too small to be
24685 handled by main body (or up to size guarded by prologue guard).
24687 Misaligned move sequence
24689 1) missaligned move prologue/epilogue containing:
24690 a) Prologue handling small memory blocks and jumping to done_label
24691 (skipped if blocks are known to be large enough)
24692 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24693 needed by single possibly misaligned move
24694 (skipped if alignment is not needed)
24695 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24697 2) Zero size guard dispatching to done_label, if needed
24699 3) dispatch to library call, if needed,
24701 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24702 with specified algorithm. */
24703 bool
24704 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24705 rtx align_exp, rtx expected_align_exp,
24706 rtx expected_size_exp, rtx min_size_exp,
24707 rtx max_size_exp, rtx probable_max_size_exp,
24708 bool issetmem)
24710 rtx destreg;
24711 rtx srcreg = NULL;
24712 rtx_code_label *label = NULL;
24713 rtx tmp;
24714 rtx_code_label *jump_around_label = NULL;
24715 HOST_WIDE_INT align = 1;
24716 unsigned HOST_WIDE_INT count = 0;
24717 HOST_WIDE_INT expected_size = -1;
24718 int size_needed = 0, epilogue_size_needed;
24719 int desired_align = 0, align_bytes = 0;
24720 enum stringop_alg alg;
24721 rtx promoted_val = NULL;
24722 rtx vec_promoted_val = NULL;
24723 bool force_loopy_epilogue = false;
24724 int dynamic_check;
24725 bool need_zero_guard = false;
24726 bool noalign;
24727 machine_mode move_mode = VOIDmode;
24728 int unroll_factor = 1;
24729 /* TODO: Once value ranges are available, fill in proper data. */
24730 unsigned HOST_WIDE_INT min_size = 0;
24731 unsigned HOST_WIDE_INT max_size = -1;
24732 unsigned HOST_WIDE_INT probable_max_size = -1;
24733 bool misaligned_prologue_used = false;
24735 if (CONST_INT_P (align_exp))
24736 align = INTVAL (align_exp);
24737 /* i386 can do misaligned access on reasonably increased cost. */
24738 if (CONST_INT_P (expected_align_exp)
24739 && INTVAL (expected_align_exp) > align)
24740 align = INTVAL (expected_align_exp);
24741 /* ALIGN is the minimum of destination and source alignment, but we care here
24742 just about destination alignment. */
24743 else if (!issetmem
24744 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24745 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24747 if (CONST_INT_P (count_exp))
24749 min_size = max_size = probable_max_size = count = expected_size
24750 = INTVAL (count_exp);
24751 /* When COUNT is 0, there is nothing to do. */
24752 if (!count)
24753 return true;
24755 else
24757 if (min_size_exp)
24758 min_size = INTVAL (min_size_exp);
24759 if (max_size_exp)
24760 max_size = INTVAL (max_size_exp);
24761 if (probable_max_size_exp)
24762 probable_max_size = INTVAL (probable_max_size_exp);
24763 if (CONST_INT_P (expected_size_exp))
24764 expected_size = INTVAL (expected_size_exp);
24767 /* Make sure we don't need to care about overflow later on. */
24768 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24769 return false;
24771 /* Step 0: Decide on preferred algorithm, desired alignment and
24772 size of chunks to be copied by main loop. */
24773 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24774 issetmem,
24775 issetmem && val_exp == const0_rtx,
24776 &dynamic_check, &noalign);
24777 if (alg == libcall)
24778 return false;
24779 gcc_assert (alg != no_stringop);
24781 /* For now vector-version of memset is generated only for memory zeroing, as
24782 creating of promoted vector value is very cheap in this case. */
24783 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24784 alg = unrolled_loop;
24786 if (!count)
24787 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24788 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24789 if (!issetmem)
24790 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24792 unroll_factor = 1;
24793 move_mode = word_mode;
24794 switch (alg)
24796 case libcall:
24797 case no_stringop:
24798 case last_alg:
24799 gcc_unreachable ();
24800 case loop_1_byte:
24801 need_zero_guard = true;
24802 move_mode = QImode;
24803 break;
24804 case loop:
24805 need_zero_guard = true;
24806 break;
24807 case unrolled_loop:
24808 need_zero_guard = true;
24809 unroll_factor = (TARGET_64BIT ? 4 : 2);
24810 break;
24811 case vector_loop:
24812 need_zero_guard = true;
24813 unroll_factor = 4;
24814 /* Find the widest supported mode. */
24815 move_mode = word_mode;
24816 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24817 != CODE_FOR_nothing)
24818 move_mode = GET_MODE_WIDER_MODE (move_mode);
24820 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24821 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24822 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24824 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24825 move_mode = mode_for_vector (word_mode, nunits);
24826 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24827 move_mode = word_mode;
24829 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24830 break;
24831 case rep_prefix_8_byte:
24832 move_mode = DImode;
24833 break;
24834 case rep_prefix_4_byte:
24835 move_mode = SImode;
24836 break;
24837 case rep_prefix_1_byte:
24838 move_mode = QImode;
24839 break;
24841 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24842 epilogue_size_needed = size_needed;
24844 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24845 if (!TARGET_ALIGN_STRINGOPS || noalign)
24846 align = desired_align;
24848 /* Step 1: Prologue guard. */
24850 /* Alignment code needs count to be in register. */
24851 if (CONST_INT_P (count_exp) && desired_align > align)
24853 if (INTVAL (count_exp) > desired_align
24854 && INTVAL (count_exp) > size_needed)
24856 align_bytes
24857 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24858 if (align_bytes <= 0)
24859 align_bytes = 0;
24860 else
24861 align_bytes = desired_align - align_bytes;
24863 if (align_bytes == 0)
24864 count_exp = force_reg (counter_mode (count_exp), count_exp);
24866 gcc_assert (desired_align >= 1 && align >= 1);
24868 /* Misaligned move sequences handle both prologue and epilogue at once.
24869 Default code generation results in a smaller code for large alignments
24870 and also avoids redundant job when sizes are known precisely. */
24871 misaligned_prologue_used
24872 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24873 && MAX (desired_align, epilogue_size_needed) <= 32
24874 && desired_align <= epilogue_size_needed
24875 && ((desired_align > align && !align_bytes)
24876 || (!count && epilogue_size_needed > 1)));
24878 /* Do the cheap promotion to allow better CSE across the
24879 main loop and epilogue (ie one load of the big constant in the
24880 front of all code.
24881 For now the misaligned move sequences do not have fast path
24882 without broadcasting. */
24883 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24885 if (alg == vector_loop)
24887 gcc_assert (val_exp == const0_rtx);
24888 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24889 promoted_val = promote_duplicated_reg_to_size (val_exp,
24890 GET_MODE_SIZE (word_mode),
24891 desired_align, align);
24893 else
24895 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24896 desired_align, align);
24899 /* Misaligned move sequences handles both prologues and epilogues at once.
24900 Default code generation results in smaller code for large alignments and
24901 also avoids redundant job when sizes are known precisely. */
24902 if (misaligned_prologue_used)
24904 /* Misaligned move prologue handled small blocks by itself. */
24905 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24906 (dst, src, &destreg, &srcreg,
24907 move_mode, promoted_val, vec_promoted_val,
24908 &count_exp,
24909 &jump_around_label,
24910 desired_align < align
24911 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24912 desired_align, align, &min_size, dynamic_check, issetmem);
24913 if (!issetmem)
24914 src = change_address (src, BLKmode, srcreg);
24915 dst = change_address (dst, BLKmode, destreg);
24916 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24917 epilogue_size_needed = 0;
24918 if (need_zero_guard && !min_size)
24920 /* It is possible that we copied enough so the main loop will not
24921 execute. */
24922 gcc_assert (size_needed > 1);
24923 if (jump_around_label == NULL_RTX)
24924 jump_around_label = gen_label_rtx ();
24925 emit_cmp_and_jump_insns (count_exp,
24926 GEN_INT (size_needed),
24927 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24928 if (expected_size == -1
24929 || expected_size < (desired_align - align) / 2 + size_needed)
24930 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24931 else
24932 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24935 /* Ensure that alignment prologue won't copy past end of block. */
24936 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24938 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24939 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24940 Make sure it is power of 2. */
24941 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24943 /* To improve performance of small blocks, we jump around the VAL
24944 promoting mode. This mean that if the promoted VAL is not constant,
24945 we might not use it in the epilogue and have to use byte
24946 loop variant. */
24947 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24948 force_loopy_epilogue = true;
24949 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24950 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24952 /* If main algorithm works on QImode, no epilogue is needed.
24953 For small sizes just don't align anything. */
24954 if (size_needed == 1)
24955 desired_align = align;
24956 else
24957 goto epilogue;
24959 else if (!count
24960 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24962 label = gen_label_rtx ();
24963 emit_cmp_and_jump_insns (count_exp,
24964 GEN_INT (epilogue_size_needed),
24965 LTU, 0, counter_mode (count_exp), 1, label);
24966 if (expected_size == -1 || expected_size < epilogue_size_needed)
24967 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24968 else
24969 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24973 /* Emit code to decide on runtime whether library call or inline should be
24974 used. */
24975 if (dynamic_check != -1)
24977 if (!issetmem && CONST_INT_P (count_exp))
24979 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24981 emit_block_move_via_libcall (dst, src, count_exp, false);
24982 count_exp = const0_rtx;
24983 goto epilogue;
24986 else
24988 rtx_code_label *hot_label = gen_label_rtx ();
24989 if (jump_around_label == NULL_RTX)
24990 jump_around_label = gen_label_rtx ();
24991 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24992 LEU, 0, counter_mode (count_exp),
24993 1, hot_label);
24994 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24995 if (issetmem)
24996 set_storage_via_libcall (dst, count_exp, val_exp, false);
24997 else
24998 emit_block_move_via_libcall (dst, src, count_exp, false);
24999 emit_jump (jump_around_label);
25000 emit_label (hot_label);
25004 /* Step 2: Alignment prologue. */
25005 /* Do the expensive promotion once we branched off the small blocks. */
25006 if (issetmem && !promoted_val)
25007 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25008 desired_align, align);
25010 if (desired_align > align && !misaligned_prologue_used)
25012 if (align_bytes == 0)
25014 /* Except for the first move in prologue, we no longer know
25015 constant offset in aliasing info. It don't seems to worth
25016 the pain to maintain it for the first move, so throw away
25017 the info early. */
25018 dst = change_address (dst, BLKmode, destreg);
25019 if (!issetmem)
25020 src = change_address (src, BLKmode, srcreg);
25021 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
25022 promoted_val, vec_promoted_val,
25023 count_exp, align, desired_align,
25024 issetmem);
25025 /* At most desired_align - align bytes are copied. */
25026 if (min_size < (unsigned)(desired_align - align))
25027 min_size = 0;
25028 else
25029 min_size -= desired_align - align;
25031 else
25033 /* If we know how many bytes need to be stored before dst is
25034 sufficiently aligned, maintain aliasing info accurately. */
25035 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
25036 srcreg,
25037 promoted_val,
25038 vec_promoted_val,
25039 desired_align,
25040 align_bytes,
25041 issetmem);
25043 count_exp = plus_constant (counter_mode (count_exp),
25044 count_exp, -align_bytes);
25045 count -= align_bytes;
25046 min_size -= align_bytes;
25047 max_size -= align_bytes;
25049 if (need_zero_guard
25050 && !min_size
25051 && (count < (unsigned HOST_WIDE_INT) size_needed
25052 || (align_bytes == 0
25053 && count < ((unsigned HOST_WIDE_INT) size_needed
25054 + desired_align - align))))
25056 /* It is possible that we copied enough so the main loop will not
25057 execute. */
25058 gcc_assert (size_needed > 1);
25059 if (label == NULL_RTX)
25060 label = gen_label_rtx ();
25061 emit_cmp_and_jump_insns (count_exp,
25062 GEN_INT (size_needed),
25063 LTU, 0, counter_mode (count_exp), 1, label);
25064 if (expected_size == -1
25065 || expected_size < (desired_align - align) / 2 + size_needed)
25066 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25067 else
25068 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25071 if (label && size_needed == 1)
25073 emit_label (label);
25074 LABEL_NUSES (label) = 1;
25075 label = NULL;
25076 epilogue_size_needed = 1;
25077 if (issetmem)
25078 promoted_val = val_exp;
25080 else if (label == NULL_RTX && !misaligned_prologue_used)
25081 epilogue_size_needed = size_needed;
25083 /* Step 3: Main loop. */
25085 switch (alg)
25087 case libcall:
25088 case no_stringop:
25089 case last_alg:
25090 gcc_unreachable ();
25091 case loop_1_byte:
25092 case loop:
25093 case unrolled_loop:
25094 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
25095 count_exp, move_mode, unroll_factor,
25096 expected_size, issetmem);
25097 break;
25098 case vector_loop:
25099 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
25100 vec_promoted_val, count_exp, move_mode,
25101 unroll_factor, expected_size, issetmem);
25102 break;
25103 case rep_prefix_8_byte:
25104 case rep_prefix_4_byte:
25105 case rep_prefix_1_byte:
25106 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
25107 val_exp, count_exp, move_mode, issetmem);
25108 break;
25110 /* Adjust properly the offset of src and dest memory for aliasing. */
25111 if (CONST_INT_P (count_exp))
25113 if (!issetmem)
25114 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
25115 (count / size_needed) * size_needed);
25116 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
25117 (count / size_needed) * size_needed);
25119 else
25121 if (!issetmem)
25122 src = change_address (src, BLKmode, srcreg);
25123 dst = change_address (dst, BLKmode, destreg);
25126 /* Step 4: Epilogue to copy the remaining bytes. */
25127 epilogue:
25128 if (label)
25130 /* When the main loop is done, COUNT_EXP might hold original count,
25131 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
25132 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
25133 bytes. Compensate if needed. */
25135 if (size_needed < epilogue_size_needed)
25137 tmp =
25138 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
25139 GEN_INT (size_needed - 1), count_exp, 1,
25140 OPTAB_DIRECT);
25141 if (tmp != count_exp)
25142 emit_move_insn (count_exp, tmp);
25144 emit_label (label);
25145 LABEL_NUSES (label) = 1;
25148 if (count_exp != const0_rtx && epilogue_size_needed > 1)
25150 if (force_loopy_epilogue)
25151 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
25152 epilogue_size_needed);
25153 else
25155 if (issetmem)
25156 expand_setmem_epilogue (dst, destreg, promoted_val,
25157 vec_promoted_val, count_exp,
25158 epilogue_size_needed);
25159 else
25160 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
25161 epilogue_size_needed);
25164 if (jump_around_label)
25165 emit_label (jump_around_label);
25166 return true;
25170 /* Expand the appropriate insns for doing strlen if not just doing
25171 repnz; scasb
25173 out = result, initialized with the start address
25174 align_rtx = alignment of the address.
25175 scratch = scratch register, initialized with the startaddress when
25176 not aligned, otherwise undefined
25178 This is just the body. It needs the initializations mentioned above and
25179 some address computing at the end. These things are done in i386.md. */
25181 static void
25182 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
25184 int align;
25185 rtx tmp;
25186 rtx_code_label *align_2_label = NULL;
25187 rtx_code_label *align_3_label = NULL;
25188 rtx_code_label *align_4_label = gen_label_rtx ();
25189 rtx_code_label *end_0_label = gen_label_rtx ();
25190 rtx mem;
25191 rtx tmpreg = gen_reg_rtx (SImode);
25192 rtx scratch = gen_reg_rtx (SImode);
25193 rtx cmp;
25195 align = 0;
25196 if (CONST_INT_P (align_rtx))
25197 align = INTVAL (align_rtx);
25199 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
25201 /* Is there a known alignment and is it less than 4? */
25202 if (align < 4)
25204 rtx scratch1 = gen_reg_rtx (Pmode);
25205 emit_move_insn (scratch1, out);
25206 /* Is there a known alignment and is it not 2? */
25207 if (align != 2)
25209 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
25210 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
25212 /* Leave just the 3 lower bits. */
25213 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
25214 NULL_RTX, 0, OPTAB_WIDEN);
25216 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25217 Pmode, 1, align_4_label);
25218 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
25219 Pmode, 1, align_2_label);
25220 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
25221 Pmode, 1, align_3_label);
25223 else
25225 /* Since the alignment is 2, we have to check 2 or 0 bytes;
25226 check if is aligned to 4 - byte. */
25228 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
25229 NULL_RTX, 0, OPTAB_WIDEN);
25231 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25232 Pmode, 1, align_4_label);
25235 mem = change_address (src, QImode, out);
25237 /* Now compare the bytes. */
25239 /* Compare the first n unaligned byte on a byte per byte basis. */
25240 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
25241 QImode, 1, end_0_label);
25243 /* Increment the address. */
25244 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25246 /* Not needed with an alignment of 2 */
25247 if (align != 2)
25249 emit_label (align_2_label);
25251 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25252 end_0_label);
25254 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25256 emit_label (align_3_label);
25259 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25260 end_0_label);
25262 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25265 /* Generate loop to check 4 bytes at a time. It is not a good idea to
25266 align this loop. It gives only huge programs, but does not help to
25267 speed up. */
25268 emit_label (align_4_label);
25270 mem = change_address (src, SImode, out);
25271 emit_move_insn (scratch, mem);
25272 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
25274 /* This formula yields a nonzero result iff one of the bytes is zero.
25275 This saves three branches inside loop and many cycles. */
25277 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
25278 emit_insn (gen_one_cmplsi2 (scratch, scratch));
25279 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
25280 emit_insn (gen_andsi3 (tmpreg, tmpreg,
25281 gen_int_mode (0x80808080, SImode)));
25282 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
25283 align_4_label);
25285 if (TARGET_CMOVE)
25287 rtx reg = gen_reg_rtx (SImode);
25288 rtx reg2 = gen_reg_rtx (Pmode);
25289 emit_move_insn (reg, tmpreg);
25290 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
25292 /* If zero is not in the first two bytes, move two bytes forward. */
25293 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25294 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25295 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25296 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
25297 gen_rtx_IF_THEN_ELSE (SImode, tmp,
25298 reg,
25299 tmpreg)));
25300 /* Emit lea manually to avoid clobbering of flags. */
25301 emit_insn (gen_rtx_SET (SImode, reg2,
25302 gen_rtx_PLUS (Pmode, out, const2_rtx)));
25304 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25305 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25306 emit_insn (gen_rtx_SET (VOIDmode, out,
25307 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
25308 reg2,
25309 out)));
25311 else
25313 rtx_code_label *end_2_label = gen_label_rtx ();
25314 /* Is zero in the first two bytes? */
25316 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25317 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25318 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
25319 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25320 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
25321 pc_rtx);
25322 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
25323 JUMP_LABEL (tmp) = end_2_label;
25325 /* Not in the first two. Move two bytes forward. */
25326 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
25327 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
25329 emit_label (end_2_label);
25333 /* Avoid branch in fixing the byte. */
25334 tmpreg = gen_lowpart (QImode, tmpreg);
25335 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
25336 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
25337 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
25338 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
25340 emit_label (end_0_label);
25343 /* Expand strlen. */
25345 bool
25346 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
25348 rtx addr, scratch1, scratch2, scratch3, scratch4;
25350 /* The generic case of strlen expander is long. Avoid it's
25351 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
25353 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25354 && !TARGET_INLINE_ALL_STRINGOPS
25355 && !optimize_insn_for_size_p ()
25356 && (!CONST_INT_P (align) || INTVAL (align) < 4))
25357 return false;
25359 addr = force_reg (Pmode, XEXP (src, 0));
25360 scratch1 = gen_reg_rtx (Pmode);
25362 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25363 && !optimize_insn_for_size_p ())
25365 /* Well it seems that some optimizer does not combine a call like
25366 foo(strlen(bar), strlen(bar));
25367 when the move and the subtraction is done here. It does calculate
25368 the length just once when these instructions are done inside of
25369 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
25370 often used and I use one fewer register for the lifetime of
25371 output_strlen_unroll() this is better. */
25373 emit_move_insn (out, addr);
25375 ix86_expand_strlensi_unroll_1 (out, src, align);
25377 /* strlensi_unroll_1 returns the address of the zero at the end of
25378 the string, like memchr(), so compute the length by subtracting
25379 the start address. */
25380 emit_insn (ix86_gen_sub3 (out, out, addr));
25382 else
25384 rtx unspec;
25386 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25387 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25388 return false;
25390 scratch2 = gen_reg_rtx (Pmode);
25391 scratch3 = gen_reg_rtx (Pmode);
25392 scratch4 = force_reg (Pmode, constm1_rtx);
25394 emit_move_insn (scratch3, addr);
25395 eoschar = force_reg (QImode, eoschar);
25397 src = replace_equiv_address_nv (src, scratch3);
25399 /* If .md starts supporting :P, this can be done in .md. */
25400 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25401 scratch4), UNSPEC_SCAS);
25402 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25403 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25404 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25406 return true;
25409 /* For given symbol (function) construct code to compute address of it's PLT
25410 entry in large x86-64 PIC model. */
25411 static rtx
25412 construct_plt_address (rtx symbol)
25414 rtx tmp, unspec;
25416 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25417 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25418 gcc_assert (Pmode == DImode);
25420 tmp = gen_reg_rtx (Pmode);
25421 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25423 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25424 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25425 return tmp;
25429 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25430 rtx callarg2,
25431 rtx pop, bool sibcall)
25433 rtx vec[3];
25434 rtx use = NULL, call;
25435 unsigned int vec_len = 0;
25437 if (pop == const0_rtx)
25438 pop = NULL;
25439 gcc_assert (!TARGET_64BIT || !pop);
25441 if (TARGET_MACHO && !TARGET_64BIT)
25443 #if TARGET_MACHO
25444 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25445 fnaddr = machopic_indirect_call_target (fnaddr);
25446 #endif
25448 else
25450 /* Static functions and indirect calls don't need the pic register. */
25451 if (flag_pic
25452 && (!TARGET_64BIT
25453 || (ix86_cmodel == CM_LARGE_PIC
25454 && DEFAULT_ABI != MS_ABI))
25455 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25456 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
25458 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25459 if (ix86_use_pseudo_pic_reg ())
25460 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25461 pic_offset_table_rtx);
25465 /* Skip setting up RAX register for -mskip-rax-setup when there are no
25466 parameters passed in vector registers. */
25467 if (TARGET_64BIT
25468 && (INTVAL (callarg2) > 0
25469 || (INTVAL (callarg2) == 0
25470 && (TARGET_SSE || !flag_skip_rax_setup))))
25472 rtx al = gen_rtx_REG (QImode, AX_REG);
25473 emit_move_insn (al, callarg2);
25474 use_reg (&use, al);
25477 if (ix86_cmodel == CM_LARGE_PIC
25478 && !TARGET_PECOFF
25479 && MEM_P (fnaddr)
25480 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25481 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25482 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25483 else if (sibcall
25484 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25485 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25487 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25488 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25491 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25493 if (retval)
25495 /* We should add bounds as destination register in case
25496 pointer with bounds may be returned. */
25497 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
25499 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
25500 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
25501 retval = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, retval, b0, b1));
25502 chkp_put_regs_to_expr_list (retval);
25505 call = gen_rtx_SET (VOIDmode, retval, call);
25507 vec[vec_len++] = call;
25509 if (pop)
25511 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25512 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
25513 vec[vec_len++] = pop;
25516 if (TARGET_64BIT_MS_ABI
25517 && (!callarg2 || INTVAL (callarg2) != -2))
25519 int const cregs_size
25520 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25521 int i;
25523 for (i = 0; i < cregs_size; i++)
25525 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25526 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25528 clobber_reg (&use, gen_rtx_REG (mode, regno));
25532 if (vec_len > 1)
25533 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25534 call = emit_call_insn (call);
25535 if (use)
25536 CALL_INSN_FUNCTION_USAGE (call) = use;
25538 return call;
25541 /* Output the assembly for a call instruction. */
25543 const char *
25544 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25546 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25547 bool seh_nop_p = false;
25548 const char *xasm;
25550 if (SIBLING_CALL_P (insn))
25552 if (direct_p)
25553 xasm = "%!jmp\t%P0";
25554 /* SEH epilogue detection requires the indirect branch case
25555 to include REX.W. */
25556 else if (TARGET_SEH)
25557 xasm = "%!rex.W jmp %A0";
25558 else
25559 xasm = "%!jmp\t%A0";
25561 output_asm_insn (xasm, &call_op);
25562 return "";
25565 /* SEH unwinding can require an extra nop to be emitted in several
25566 circumstances. Determine if we have one of those. */
25567 if (TARGET_SEH)
25569 rtx_insn *i;
25571 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25573 /* If we get to another real insn, we don't need the nop. */
25574 if (INSN_P (i))
25575 break;
25577 /* If we get to the epilogue note, prevent a catch region from
25578 being adjacent to the standard epilogue sequence. If non-
25579 call-exceptions, we'll have done this during epilogue emission. */
25580 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25581 && !flag_non_call_exceptions
25582 && !can_throw_internal (insn))
25584 seh_nop_p = true;
25585 break;
25589 /* If we didn't find a real insn following the call, prevent the
25590 unwinder from looking into the next function. */
25591 if (i == NULL)
25592 seh_nop_p = true;
25595 if (direct_p)
25596 xasm = "%!call\t%P0";
25597 else
25598 xasm = "%!call\t%A0";
25600 output_asm_insn (xasm, &call_op);
25602 if (seh_nop_p)
25603 return "nop";
25605 return "";
25608 /* Clear stack slot assignments remembered from previous functions.
25609 This is called from INIT_EXPANDERS once before RTL is emitted for each
25610 function. */
25612 static struct machine_function *
25613 ix86_init_machine_status (void)
25615 struct machine_function *f;
25617 f = ggc_cleared_alloc<machine_function> ();
25618 f->use_fast_prologue_epilogue_nregs = -1;
25619 f->call_abi = ix86_abi;
25621 return f;
25624 /* Return a MEM corresponding to a stack slot with mode MODE.
25625 Allocate a new slot if necessary.
25627 The RTL for a function can have several slots available: N is
25628 which slot to use. */
25631 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
25633 struct stack_local_entry *s;
25635 gcc_assert (n < MAX_386_STACK_LOCALS);
25637 for (s = ix86_stack_locals; s; s = s->next)
25638 if (s->mode == mode && s->n == n)
25639 return validize_mem (copy_rtx (s->rtl));
25641 s = ggc_alloc<stack_local_entry> ();
25642 s->n = n;
25643 s->mode = mode;
25644 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25646 s->next = ix86_stack_locals;
25647 ix86_stack_locals = s;
25648 return validize_mem (copy_rtx (s->rtl));
25651 static void
25652 ix86_instantiate_decls (void)
25654 struct stack_local_entry *s;
25656 for (s = ix86_stack_locals; s; s = s->next)
25657 if (s->rtl != NULL_RTX)
25658 instantiate_decl_rtl (s->rtl);
25661 /* Check whether x86 address PARTS is a pc-relative address. */
25663 static bool
25664 rip_relative_addr_p (struct ix86_address *parts)
25666 rtx base, index, disp;
25668 base = parts->base;
25669 index = parts->index;
25670 disp = parts->disp;
25672 if (disp && !base && !index)
25674 if (TARGET_64BIT)
25676 rtx symbol = disp;
25678 if (GET_CODE (disp) == CONST)
25679 symbol = XEXP (disp, 0);
25680 if (GET_CODE (symbol) == PLUS
25681 && CONST_INT_P (XEXP (symbol, 1)))
25682 symbol = XEXP (symbol, 0);
25684 if (GET_CODE (symbol) == LABEL_REF
25685 || (GET_CODE (symbol) == SYMBOL_REF
25686 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25687 || (GET_CODE (symbol) == UNSPEC
25688 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25689 || XINT (symbol, 1) == UNSPEC_PCREL
25690 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25691 return true;
25694 return false;
25697 /* Calculate the length of the memory address in the instruction encoding.
25698 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25699 or other prefixes. We never generate addr32 prefix for LEA insn. */
25702 memory_address_length (rtx addr, bool lea)
25704 struct ix86_address parts;
25705 rtx base, index, disp;
25706 int len;
25707 int ok;
25709 if (GET_CODE (addr) == PRE_DEC
25710 || GET_CODE (addr) == POST_INC
25711 || GET_CODE (addr) == PRE_MODIFY
25712 || GET_CODE (addr) == POST_MODIFY)
25713 return 0;
25715 ok = ix86_decompose_address (addr, &parts);
25716 gcc_assert (ok);
25718 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25720 /* If this is not LEA instruction, add the length of addr32 prefix. */
25721 if (TARGET_64BIT && !lea
25722 && (SImode_address_operand (addr, VOIDmode)
25723 || (parts.base && GET_MODE (parts.base) == SImode)
25724 || (parts.index && GET_MODE (parts.index) == SImode)))
25725 len++;
25727 base = parts.base;
25728 index = parts.index;
25729 disp = parts.disp;
25731 if (base && GET_CODE (base) == SUBREG)
25732 base = SUBREG_REG (base);
25733 if (index && GET_CODE (index) == SUBREG)
25734 index = SUBREG_REG (index);
25736 gcc_assert (base == NULL_RTX || REG_P (base));
25737 gcc_assert (index == NULL_RTX || REG_P (index));
25739 /* Rule of thumb:
25740 - esp as the base always wants an index,
25741 - ebp as the base always wants a displacement,
25742 - r12 as the base always wants an index,
25743 - r13 as the base always wants a displacement. */
25745 /* Register Indirect. */
25746 if (base && !index && !disp)
25748 /* esp (for its index) and ebp (for its displacement) need
25749 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25750 code. */
25751 if (base == arg_pointer_rtx
25752 || base == frame_pointer_rtx
25753 || REGNO (base) == SP_REG
25754 || REGNO (base) == BP_REG
25755 || REGNO (base) == R12_REG
25756 || REGNO (base) == R13_REG)
25757 len++;
25760 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25761 is not disp32, but disp32(%rip), so for disp32
25762 SIB byte is needed, unless print_operand_address
25763 optimizes it into disp32(%rip) or (%rip) is implied
25764 by UNSPEC. */
25765 else if (disp && !base && !index)
25767 len += 4;
25768 if (rip_relative_addr_p (&parts))
25769 len++;
25771 else
25773 /* Find the length of the displacement constant. */
25774 if (disp)
25776 if (base && satisfies_constraint_K (disp))
25777 len += 1;
25778 else
25779 len += 4;
25781 /* ebp always wants a displacement. Similarly r13. */
25782 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25783 len++;
25785 /* An index requires the two-byte modrm form.... */
25786 if (index
25787 /* ...like esp (or r12), which always wants an index. */
25788 || base == arg_pointer_rtx
25789 || base == frame_pointer_rtx
25790 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25791 len++;
25794 return len;
25797 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25798 is set, expect that insn have 8bit immediate alternative. */
25800 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
25802 int len = 0;
25803 int i;
25804 extract_insn_cached (insn);
25805 for (i = recog_data.n_operands - 1; i >= 0; --i)
25806 if (CONSTANT_P (recog_data.operand[i]))
25808 enum attr_mode mode = get_attr_mode (insn);
25810 gcc_assert (!len);
25811 if (shortform && CONST_INT_P (recog_data.operand[i]))
25813 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25814 switch (mode)
25816 case MODE_QI:
25817 len = 1;
25818 continue;
25819 case MODE_HI:
25820 ival = trunc_int_for_mode (ival, HImode);
25821 break;
25822 case MODE_SI:
25823 ival = trunc_int_for_mode (ival, SImode);
25824 break;
25825 default:
25826 break;
25828 if (IN_RANGE (ival, -128, 127))
25830 len = 1;
25831 continue;
25834 switch (mode)
25836 case MODE_QI:
25837 len = 1;
25838 break;
25839 case MODE_HI:
25840 len = 2;
25841 break;
25842 case MODE_SI:
25843 len = 4;
25844 break;
25845 /* Immediates for DImode instructions are encoded
25846 as 32bit sign extended values. */
25847 case MODE_DI:
25848 len = 4;
25849 break;
25850 default:
25851 fatal_insn ("unknown insn mode", insn);
25854 return len;
25857 /* Compute default value for "length_address" attribute. */
25859 ix86_attr_length_address_default (rtx_insn *insn)
25861 int i;
25863 if (get_attr_type (insn) == TYPE_LEA)
25865 rtx set = PATTERN (insn), addr;
25867 if (GET_CODE (set) == PARALLEL)
25868 set = XVECEXP (set, 0, 0);
25870 gcc_assert (GET_CODE (set) == SET);
25872 addr = SET_SRC (set);
25874 return memory_address_length (addr, true);
25877 extract_insn_cached (insn);
25878 for (i = recog_data.n_operands - 1; i >= 0; --i)
25879 if (MEM_P (recog_data.operand[i]))
25881 constrain_operands_cached (insn, reload_completed);
25882 if (which_alternative != -1)
25884 const char *constraints = recog_data.constraints[i];
25885 int alt = which_alternative;
25887 while (*constraints == '=' || *constraints == '+')
25888 constraints++;
25889 while (alt-- > 0)
25890 while (*constraints++ != ',')
25892 /* Skip ignored operands. */
25893 if (*constraints == 'X')
25894 continue;
25896 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25898 return 0;
25901 /* Compute default value for "length_vex" attribute. It includes
25902 2 or 3 byte VEX prefix and 1 opcode byte. */
25905 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
25906 bool has_vex_w)
25908 int i;
25910 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25911 byte VEX prefix. */
25912 if (!has_0f_opcode || has_vex_w)
25913 return 3 + 1;
25915 /* We can always use 2 byte VEX prefix in 32bit. */
25916 if (!TARGET_64BIT)
25917 return 2 + 1;
25919 extract_insn_cached (insn);
25921 for (i = recog_data.n_operands - 1; i >= 0; --i)
25922 if (REG_P (recog_data.operand[i]))
25924 /* REX.W bit uses 3 byte VEX prefix. */
25925 if (GET_MODE (recog_data.operand[i]) == DImode
25926 && GENERAL_REG_P (recog_data.operand[i]))
25927 return 3 + 1;
25929 else
25931 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25932 if (MEM_P (recog_data.operand[i])
25933 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25934 return 3 + 1;
25937 return 2 + 1;
25940 /* Return the maximum number of instructions a cpu can issue. */
25942 static int
25943 ix86_issue_rate (void)
25945 switch (ix86_tune)
25947 case PROCESSOR_PENTIUM:
25948 case PROCESSOR_BONNELL:
25949 case PROCESSOR_SILVERMONT:
25950 case PROCESSOR_KNL:
25951 case PROCESSOR_INTEL:
25952 case PROCESSOR_K6:
25953 case PROCESSOR_BTVER2:
25954 case PROCESSOR_PENTIUM4:
25955 case PROCESSOR_NOCONA:
25956 return 2;
25958 case PROCESSOR_PENTIUMPRO:
25959 case PROCESSOR_ATHLON:
25960 case PROCESSOR_K8:
25961 case PROCESSOR_AMDFAM10:
25962 case PROCESSOR_GENERIC:
25963 case PROCESSOR_BTVER1:
25964 return 3;
25966 case PROCESSOR_BDVER1:
25967 case PROCESSOR_BDVER2:
25968 case PROCESSOR_BDVER3:
25969 case PROCESSOR_BDVER4:
25970 case PROCESSOR_CORE2:
25971 case PROCESSOR_NEHALEM:
25972 case PROCESSOR_SANDYBRIDGE:
25973 case PROCESSOR_HASWELL:
25974 return 4;
25976 default:
25977 return 1;
25981 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25982 by DEP_INSN and nothing set by DEP_INSN. */
25984 static bool
25985 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
25987 rtx set, set2;
25989 /* Simplify the test for uninteresting insns. */
25990 if (insn_type != TYPE_SETCC
25991 && insn_type != TYPE_ICMOV
25992 && insn_type != TYPE_FCMOV
25993 && insn_type != TYPE_IBR)
25994 return false;
25996 if ((set = single_set (dep_insn)) != 0)
25998 set = SET_DEST (set);
25999 set2 = NULL_RTX;
26001 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
26002 && XVECLEN (PATTERN (dep_insn), 0) == 2
26003 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
26004 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
26006 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26007 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26009 else
26010 return false;
26012 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
26013 return false;
26015 /* This test is true if the dependent insn reads the flags but
26016 not any other potentially set register. */
26017 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
26018 return false;
26020 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
26021 return false;
26023 return true;
26026 /* Return true iff USE_INSN has a memory address with operands set by
26027 SET_INSN. */
26029 bool
26030 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
26032 int i;
26033 extract_insn_cached (use_insn);
26034 for (i = recog_data.n_operands - 1; i >= 0; --i)
26035 if (MEM_P (recog_data.operand[i]))
26037 rtx addr = XEXP (recog_data.operand[i], 0);
26038 return modified_in_p (addr, set_insn) != 0;
26040 return false;
26043 /* Helper function for exact_store_load_dependency.
26044 Return true if addr is found in insn. */
26045 static bool
26046 exact_dependency_1 (rtx addr, rtx insn)
26048 enum rtx_code code;
26049 const char *format_ptr;
26050 int i, j;
26052 code = GET_CODE (insn);
26053 switch (code)
26055 case MEM:
26056 if (rtx_equal_p (addr, insn))
26057 return true;
26058 break;
26059 case REG:
26060 CASE_CONST_ANY:
26061 case SYMBOL_REF:
26062 case CODE_LABEL:
26063 case PC:
26064 case CC0:
26065 case EXPR_LIST:
26066 return false;
26067 default:
26068 break;
26071 format_ptr = GET_RTX_FORMAT (code);
26072 for (i = 0; i < GET_RTX_LENGTH (code); i++)
26074 switch (*format_ptr++)
26076 case 'e':
26077 if (exact_dependency_1 (addr, XEXP (insn, i)))
26078 return true;
26079 break;
26080 case 'E':
26081 for (j = 0; j < XVECLEN (insn, i); j++)
26082 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
26083 return true;
26084 break;
26087 return false;
26090 /* Return true if there exists exact dependency for store & load, i.e.
26091 the same memory address is used in them. */
26092 static bool
26093 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
26095 rtx set1, set2;
26097 set1 = single_set (store);
26098 if (!set1)
26099 return false;
26100 if (!MEM_P (SET_DEST (set1)))
26101 return false;
26102 set2 = single_set (load);
26103 if (!set2)
26104 return false;
26105 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
26106 return true;
26107 return false;
26110 static int
26111 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26113 enum attr_type insn_type, dep_insn_type;
26114 enum attr_memory memory;
26115 rtx set, set2;
26116 int dep_insn_code_number;
26118 /* Anti and output dependencies have zero cost on all CPUs. */
26119 if (REG_NOTE_KIND (link) != 0)
26120 return 0;
26122 dep_insn_code_number = recog_memoized (dep_insn);
26124 /* If we can't recognize the insns, we can't really do anything. */
26125 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
26126 return cost;
26128 insn_type = get_attr_type (insn);
26129 dep_insn_type = get_attr_type (dep_insn);
26131 switch (ix86_tune)
26133 case PROCESSOR_PENTIUM:
26134 /* Address Generation Interlock adds a cycle of latency. */
26135 if (insn_type == TYPE_LEA)
26137 rtx addr = PATTERN (insn);
26139 if (GET_CODE (addr) == PARALLEL)
26140 addr = XVECEXP (addr, 0, 0);
26142 gcc_assert (GET_CODE (addr) == SET);
26144 addr = SET_SRC (addr);
26145 if (modified_in_p (addr, dep_insn))
26146 cost += 1;
26148 else if (ix86_agi_dependent (dep_insn, insn))
26149 cost += 1;
26151 /* ??? Compares pair with jump/setcc. */
26152 if (ix86_flags_dependent (insn, dep_insn, insn_type))
26153 cost = 0;
26155 /* Floating point stores require value to be ready one cycle earlier. */
26156 if (insn_type == TYPE_FMOV
26157 && get_attr_memory (insn) == MEMORY_STORE
26158 && !ix86_agi_dependent (dep_insn, insn))
26159 cost += 1;
26160 break;
26162 case PROCESSOR_PENTIUMPRO:
26163 /* INT->FP conversion is expensive. */
26164 if (get_attr_fp_int_src (dep_insn))
26165 cost += 5;
26167 /* There is one cycle extra latency between an FP op and a store. */
26168 if (insn_type == TYPE_FMOV
26169 && (set = single_set (dep_insn)) != NULL_RTX
26170 && (set2 = single_set (insn)) != NULL_RTX
26171 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
26172 && MEM_P (SET_DEST (set2)))
26173 cost += 1;
26175 memory = get_attr_memory (insn);
26177 /* Show ability of reorder buffer to hide latency of load by executing
26178 in parallel with previous instruction in case
26179 previous instruction is not needed to compute the address. */
26180 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26181 && !ix86_agi_dependent (dep_insn, insn))
26183 /* Claim moves to take one cycle, as core can issue one load
26184 at time and the next load can start cycle later. */
26185 if (dep_insn_type == TYPE_IMOV
26186 || dep_insn_type == TYPE_FMOV)
26187 cost = 1;
26188 else if (cost > 1)
26189 cost--;
26191 break;
26193 case PROCESSOR_K6:
26194 /* The esp dependency is resolved before
26195 the instruction is really finished. */
26196 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26197 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26198 return 1;
26200 /* INT->FP conversion is expensive. */
26201 if (get_attr_fp_int_src (dep_insn))
26202 cost += 5;
26204 memory = get_attr_memory (insn);
26206 /* Show ability of reorder buffer to hide latency of load by executing
26207 in parallel with previous instruction in case
26208 previous instruction is not needed to compute the address. */
26209 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26210 && !ix86_agi_dependent (dep_insn, insn))
26212 /* Claim moves to take one cycle, as core can issue one load
26213 at time and the next load can start cycle later. */
26214 if (dep_insn_type == TYPE_IMOV
26215 || dep_insn_type == TYPE_FMOV)
26216 cost = 1;
26217 else if (cost > 2)
26218 cost -= 2;
26219 else
26220 cost = 1;
26222 break;
26224 case PROCESSOR_AMDFAM10:
26225 case PROCESSOR_BDVER1:
26226 case PROCESSOR_BDVER2:
26227 case PROCESSOR_BDVER3:
26228 case PROCESSOR_BDVER4:
26229 case PROCESSOR_BTVER1:
26230 case PROCESSOR_BTVER2:
26231 case PROCESSOR_GENERIC:
26232 /* Stack engine allows to execute push&pop instructions in parall. */
26233 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26234 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26235 return 0;
26236 /* FALLTHRU */
26238 case PROCESSOR_ATHLON:
26239 case PROCESSOR_K8:
26240 memory = get_attr_memory (insn);
26242 /* Show ability of reorder buffer to hide latency of load by executing
26243 in parallel with previous instruction in case
26244 previous instruction is not needed to compute the address. */
26245 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26246 && !ix86_agi_dependent (dep_insn, insn))
26248 enum attr_unit unit = get_attr_unit (insn);
26249 int loadcost = 3;
26251 /* Because of the difference between the length of integer and
26252 floating unit pipeline preparation stages, the memory operands
26253 for floating point are cheaper.
26255 ??? For Athlon it the difference is most probably 2. */
26256 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
26257 loadcost = 3;
26258 else
26259 loadcost = TARGET_ATHLON ? 2 : 0;
26261 if (cost >= loadcost)
26262 cost -= loadcost;
26263 else
26264 cost = 0;
26266 break;
26268 case PROCESSOR_CORE2:
26269 case PROCESSOR_NEHALEM:
26270 case PROCESSOR_SANDYBRIDGE:
26271 case PROCESSOR_HASWELL:
26272 /* Stack engine allows to execute push&pop instructions in parall. */
26273 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26274 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26275 return 0;
26277 memory = get_attr_memory (insn);
26279 /* Show ability of reorder buffer to hide latency of load by executing
26280 in parallel with previous instruction in case
26281 previous instruction is not needed to compute the address. */
26282 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26283 && !ix86_agi_dependent (dep_insn, insn))
26285 if (cost >= 4)
26286 cost -= 4;
26287 else
26288 cost = 0;
26290 break;
26292 case PROCESSOR_SILVERMONT:
26293 case PROCESSOR_KNL:
26294 case PROCESSOR_INTEL:
26295 if (!reload_completed)
26296 return cost;
26298 /* Increase cost of integer loads. */
26299 memory = get_attr_memory (dep_insn);
26300 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26302 enum attr_unit unit = get_attr_unit (dep_insn);
26303 if (unit == UNIT_INTEGER && cost == 1)
26305 if (memory == MEMORY_LOAD)
26306 cost = 3;
26307 else
26309 /* Increase cost of ld/st for short int types only
26310 because of store forwarding issue. */
26311 rtx set = single_set (dep_insn);
26312 if (set && (GET_MODE (SET_DEST (set)) == QImode
26313 || GET_MODE (SET_DEST (set)) == HImode))
26315 /* Increase cost of store/load insn if exact
26316 dependence exists and it is load insn. */
26317 enum attr_memory insn_memory = get_attr_memory (insn);
26318 if (insn_memory == MEMORY_LOAD
26319 && exact_store_load_dependency (dep_insn, insn))
26320 cost = 3;
26326 default:
26327 break;
26330 return cost;
26333 /* How many alternative schedules to try. This should be as wide as the
26334 scheduling freedom in the DFA, but no wider. Making this value too
26335 large results extra work for the scheduler. */
26337 static int
26338 ia32_multipass_dfa_lookahead (void)
26340 switch (ix86_tune)
26342 case PROCESSOR_PENTIUM:
26343 return 2;
26345 case PROCESSOR_PENTIUMPRO:
26346 case PROCESSOR_K6:
26347 return 1;
26349 case PROCESSOR_BDVER1:
26350 case PROCESSOR_BDVER2:
26351 case PROCESSOR_BDVER3:
26352 case PROCESSOR_BDVER4:
26353 /* We use lookahead value 4 for BD both before and after reload
26354 schedules. Plan is to have value 8 included for O3. */
26355 return 4;
26357 case PROCESSOR_CORE2:
26358 case PROCESSOR_NEHALEM:
26359 case PROCESSOR_SANDYBRIDGE:
26360 case PROCESSOR_HASWELL:
26361 case PROCESSOR_BONNELL:
26362 case PROCESSOR_SILVERMONT:
26363 case PROCESSOR_KNL:
26364 case PROCESSOR_INTEL:
26365 /* Generally, we want haifa-sched:max_issue() to look ahead as far
26366 as many instructions can be executed on a cycle, i.e.,
26367 issue_rate. I wonder why tuning for many CPUs does not do this. */
26368 if (reload_completed)
26369 return ix86_issue_rate ();
26370 /* Don't use lookahead for pre-reload schedule to save compile time. */
26371 return 0;
26373 default:
26374 return 0;
26378 /* Return true if target platform supports macro-fusion. */
26380 static bool
26381 ix86_macro_fusion_p ()
26383 return TARGET_FUSE_CMP_AND_BRANCH;
26386 /* Check whether current microarchitecture support macro fusion
26387 for insn pair "CONDGEN + CONDJMP". Refer to
26388 "Intel Architectures Optimization Reference Manual". */
26390 static bool
26391 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
26393 rtx src, dest;
26394 enum rtx_code ccode;
26395 rtx compare_set = NULL_RTX, test_if, cond;
26396 rtx alu_set = NULL_RTX, addr = NULL_RTX;
26398 if (!any_condjump_p (condjmp))
26399 return false;
26401 if (get_attr_type (condgen) != TYPE_TEST
26402 && get_attr_type (condgen) != TYPE_ICMP
26403 && get_attr_type (condgen) != TYPE_INCDEC
26404 && get_attr_type (condgen) != TYPE_ALU)
26405 return false;
26407 compare_set = single_set (condgen);
26408 if (compare_set == NULL_RTX
26409 && !TARGET_FUSE_ALU_AND_BRANCH)
26410 return false;
26412 if (compare_set == NULL_RTX)
26414 int i;
26415 rtx pat = PATTERN (condgen);
26416 for (i = 0; i < XVECLEN (pat, 0); i++)
26417 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26419 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26420 if (GET_CODE (set_src) == COMPARE)
26421 compare_set = XVECEXP (pat, 0, i);
26422 else
26423 alu_set = XVECEXP (pat, 0, i);
26426 if (compare_set == NULL_RTX)
26427 return false;
26428 src = SET_SRC (compare_set);
26429 if (GET_CODE (src) != COMPARE)
26430 return false;
26432 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26433 supported. */
26434 if ((MEM_P (XEXP (src, 0))
26435 && CONST_INT_P (XEXP (src, 1)))
26436 || (MEM_P (XEXP (src, 1))
26437 && CONST_INT_P (XEXP (src, 0))))
26438 return false;
26440 /* No fusion for RIP-relative address. */
26441 if (MEM_P (XEXP (src, 0)))
26442 addr = XEXP (XEXP (src, 0), 0);
26443 else if (MEM_P (XEXP (src, 1)))
26444 addr = XEXP (XEXP (src, 1), 0);
26446 if (addr) {
26447 ix86_address parts;
26448 int ok = ix86_decompose_address (addr, &parts);
26449 gcc_assert (ok);
26451 if (rip_relative_addr_p (&parts))
26452 return false;
26455 test_if = SET_SRC (pc_set (condjmp));
26456 cond = XEXP (test_if, 0);
26457 ccode = GET_CODE (cond);
26458 /* Check whether conditional jump use Sign or Overflow Flags. */
26459 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26460 && (ccode == GE
26461 || ccode == GT
26462 || ccode == LE
26463 || ccode == LT))
26464 return false;
26466 /* Return true for TYPE_TEST and TYPE_ICMP. */
26467 if (get_attr_type (condgen) == TYPE_TEST
26468 || get_attr_type (condgen) == TYPE_ICMP)
26469 return true;
26471 /* The following is the case that macro-fusion for alu + jmp. */
26472 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26473 return false;
26475 /* No fusion for alu op with memory destination operand. */
26476 dest = SET_DEST (alu_set);
26477 if (MEM_P (dest))
26478 return false;
26480 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26481 supported. */
26482 if (get_attr_type (condgen) == TYPE_INCDEC
26483 && (ccode == GEU
26484 || ccode == GTU
26485 || ccode == LEU
26486 || ccode == LTU))
26487 return false;
26489 return true;
26492 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26493 execution. It is applied if
26494 (1) IMUL instruction is on the top of list;
26495 (2) There exists the only producer of independent IMUL instruction in
26496 ready list.
26497 Return index of IMUL producer if it was found and -1 otherwise. */
26498 static int
26499 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26501 rtx_insn *insn;
26502 rtx set, insn1, insn2;
26503 sd_iterator_def sd_it;
26504 dep_t dep;
26505 int index = -1;
26506 int i;
26508 if (!TARGET_BONNELL)
26509 return index;
26511 /* Check that IMUL instruction is on the top of ready list. */
26512 insn = ready[n_ready - 1];
26513 set = single_set (insn);
26514 if (!set)
26515 return index;
26516 if (!(GET_CODE (SET_SRC (set)) == MULT
26517 && GET_MODE (SET_SRC (set)) == SImode))
26518 return index;
26520 /* Search for producer of independent IMUL instruction. */
26521 for (i = n_ready - 2; i >= 0; i--)
26523 insn = ready[i];
26524 if (!NONDEBUG_INSN_P (insn))
26525 continue;
26526 /* Skip IMUL instruction. */
26527 insn2 = PATTERN (insn);
26528 if (GET_CODE (insn2) == PARALLEL)
26529 insn2 = XVECEXP (insn2, 0, 0);
26530 if (GET_CODE (insn2) == SET
26531 && GET_CODE (SET_SRC (insn2)) == MULT
26532 && GET_MODE (SET_SRC (insn2)) == SImode)
26533 continue;
26535 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26537 rtx con;
26538 con = DEP_CON (dep);
26539 if (!NONDEBUG_INSN_P (con))
26540 continue;
26541 insn1 = PATTERN (con);
26542 if (GET_CODE (insn1) == PARALLEL)
26543 insn1 = XVECEXP (insn1, 0, 0);
26545 if (GET_CODE (insn1) == SET
26546 && GET_CODE (SET_SRC (insn1)) == MULT
26547 && GET_MODE (SET_SRC (insn1)) == SImode)
26549 sd_iterator_def sd_it1;
26550 dep_t dep1;
26551 /* Check if there is no other dependee for IMUL. */
26552 index = i;
26553 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26555 rtx pro;
26556 pro = DEP_PRO (dep1);
26557 if (!NONDEBUG_INSN_P (pro))
26558 continue;
26559 if (pro != insn)
26560 index = -1;
26562 if (index >= 0)
26563 break;
26566 if (index >= 0)
26567 break;
26569 return index;
26572 /* Try to find the best candidate on the top of ready list if two insns
26573 have the same priority - candidate is best if its dependees were
26574 scheduled earlier. Applied for Silvermont only.
26575 Return true if top 2 insns must be interchanged. */
26576 static bool
26577 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26579 rtx_insn *top = ready[n_ready - 1];
26580 rtx_insn *next = ready[n_ready - 2];
26581 rtx set;
26582 sd_iterator_def sd_it;
26583 dep_t dep;
26584 int clock1 = -1;
26585 int clock2 = -1;
26586 #define INSN_TICK(INSN) (HID (INSN)->tick)
26588 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26589 return false;
26591 if (!NONDEBUG_INSN_P (top))
26592 return false;
26593 if (!NONJUMP_INSN_P (top))
26594 return false;
26595 if (!NONDEBUG_INSN_P (next))
26596 return false;
26597 if (!NONJUMP_INSN_P (next))
26598 return false;
26599 set = single_set (top);
26600 if (!set)
26601 return false;
26602 set = single_set (next);
26603 if (!set)
26604 return false;
26606 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26608 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26609 return false;
26610 /* Determine winner more precise. */
26611 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26613 rtx pro;
26614 pro = DEP_PRO (dep);
26615 if (!NONDEBUG_INSN_P (pro))
26616 continue;
26617 if (INSN_TICK (pro) > clock1)
26618 clock1 = INSN_TICK (pro);
26620 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26622 rtx pro;
26623 pro = DEP_PRO (dep);
26624 if (!NONDEBUG_INSN_P (pro))
26625 continue;
26626 if (INSN_TICK (pro) > clock2)
26627 clock2 = INSN_TICK (pro);
26630 if (clock1 == clock2)
26632 /* Determine winner - load must win. */
26633 enum attr_memory memory1, memory2;
26634 memory1 = get_attr_memory (top);
26635 memory2 = get_attr_memory (next);
26636 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26637 return true;
26639 return (bool) (clock2 < clock1);
26641 return false;
26642 #undef INSN_TICK
26645 /* Perform possible reodering of ready list for Atom/Silvermont only.
26646 Return issue rate. */
26647 static int
26648 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26649 int *pn_ready, int clock_var)
26651 int issue_rate = -1;
26652 int n_ready = *pn_ready;
26653 int i;
26654 rtx_insn *insn;
26655 int index = -1;
26657 /* Set up issue rate. */
26658 issue_rate = ix86_issue_rate ();
26660 /* Do reodering for BONNELL/SILVERMONT only. */
26661 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26662 return issue_rate;
26664 /* Nothing to do if ready list contains only 1 instruction. */
26665 if (n_ready <= 1)
26666 return issue_rate;
26668 /* Do reodering for post-reload scheduler only. */
26669 if (!reload_completed)
26670 return issue_rate;
26672 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26674 if (sched_verbose > 1)
26675 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26676 INSN_UID (ready[index]));
26678 /* Put IMUL producer (ready[index]) at the top of ready list. */
26679 insn = ready[index];
26680 for (i = index; i < n_ready - 1; i++)
26681 ready[i] = ready[i + 1];
26682 ready[n_ready - 1] = insn;
26683 return issue_rate;
26685 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26687 if (sched_verbose > 1)
26688 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26689 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26690 /* Swap 2 top elements of ready list. */
26691 insn = ready[n_ready - 1];
26692 ready[n_ready - 1] = ready[n_ready - 2];
26693 ready[n_ready - 2] = insn;
26695 return issue_rate;
26698 static bool
26699 ix86_class_likely_spilled_p (reg_class_t);
26701 /* Returns true if lhs of insn is HW function argument register and set up
26702 is_spilled to true if it is likely spilled HW register. */
26703 static bool
26704 insn_is_function_arg (rtx insn, bool* is_spilled)
26706 rtx dst;
26708 if (!NONDEBUG_INSN_P (insn))
26709 return false;
26710 /* Call instructions are not movable, ignore it. */
26711 if (CALL_P (insn))
26712 return false;
26713 insn = PATTERN (insn);
26714 if (GET_CODE (insn) == PARALLEL)
26715 insn = XVECEXP (insn, 0, 0);
26716 if (GET_CODE (insn) != SET)
26717 return false;
26718 dst = SET_DEST (insn);
26719 if (REG_P (dst) && HARD_REGISTER_P (dst)
26720 && ix86_function_arg_regno_p (REGNO (dst)))
26722 /* Is it likely spilled HW register? */
26723 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26724 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26725 *is_spilled = true;
26726 return true;
26728 return false;
26731 /* Add output dependencies for chain of function adjacent arguments if only
26732 there is a move to likely spilled HW register. Return first argument
26733 if at least one dependence was added or NULL otherwise. */
26734 static rtx_insn *
26735 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26737 rtx_insn *insn;
26738 rtx_insn *last = call;
26739 rtx_insn *first_arg = NULL;
26740 bool is_spilled = false;
26742 head = PREV_INSN (head);
26744 /* Find nearest to call argument passing instruction. */
26745 while (true)
26747 last = PREV_INSN (last);
26748 if (last == head)
26749 return NULL;
26750 if (!NONDEBUG_INSN_P (last))
26751 continue;
26752 if (insn_is_function_arg (last, &is_spilled))
26753 break;
26754 return NULL;
26757 first_arg = last;
26758 while (true)
26760 insn = PREV_INSN (last);
26761 if (!INSN_P (insn))
26762 break;
26763 if (insn == head)
26764 break;
26765 if (!NONDEBUG_INSN_P (insn))
26767 last = insn;
26768 continue;
26770 if (insn_is_function_arg (insn, &is_spilled))
26772 /* Add output depdendence between two function arguments if chain
26773 of output arguments contains likely spilled HW registers. */
26774 if (is_spilled)
26775 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26776 first_arg = last = insn;
26778 else
26779 break;
26781 if (!is_spilled)
26782 return NULL;
26783 return first_arg;
26786 /* Add output or anti dependency from insn to first_arg to restrict its code
26787 motion. */
26788 static void
26789 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26791 rtx set;
26792 rtx tmp;
26794 set = single_set (insn);
26795 if (!set)
26796 return;
26797 tmp = SET_DEST (set);
26798 if (REG_P (tmp))
26800 /* Add output dependency to the first function argument. */
26801 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26802 return;
26804 /* Add anti dependency. */
26805 add_dependence (first_arg, insn, REG_DEP_ANTI);
26808 /* Avoid cross block motion of function argument through adding dependency
26809 from the first non-jump instruction in bb. */
26810 static void
26811 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26813 rtx_insn *insn = BB_END (bb);
26815 while (insn)
26817 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26819 rtx set = single_set (insn);
26820 if (set)
26822 avoid_func_arg_motion (arg, insn);
26823 return;
26826 if (insn == BB_HEAD (bb))
26827 return;
26828 insn = PREV_INSN (insn);
26832 /* Hook for pre-reload schedule - avoid motion of function arguments
26833 passed in likely spilled HW registers. */
26834 static void
26835 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26837 rtx_insn *insn;
26838 rtx_insn *first_arg = NULL;
26839 if (reload_completed)
26840 return;
26841 while (head != tail && DEBUG_INSN_P (head))
26842 head = NEXT_INSN (head);
26843 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26844 if (INSN_P (insn) && CALL_P (insn))
26846 first_arg = add_parameter_dependencies (insn, head);
26847 if (first_arg)
26849 /* Add dependee for first argument to predecessors if only
26850 region contains more than one block. */
26851 basic_block bb = BLOCK_FOR_INSN (insn);
26852 int rgn = CONTAINING_RGN (bb->index);
26853 int nr_blks = RGN_NR_BLOCKS (rgn);
26854 /* Skip trivial regions and region head blocks that can have
26855 predecessors outside of region. */
26856 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26858 edge e;
26859 edge_iterator ei;
26861 /* Regions are SCCs with the exception of selective
26862 scheduling with pipelining of outer blocks enabled.
26863 So also check that immediate predecessors of a non-head
26864 block are in the same region. */
26865 FOR_EACH_EDGE (e, ei, bb->preds)
26867 /* Avoid creating of loop-carried dependencies through
26868 using topological ordering in the region. */
26869 if (rgn == CONTAINING_RGN (e->src->index)
26870 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26871 add_dependee_for_func_arg (first_arg, e->src);
26874 insn = first_arg;
26875 if (insn == head)
26876 break;
26879 else if (first_arg)
26880 avoid_func_arg_motion (first_arg, insn);
26883 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26884 HW registers to maximum, to schedule them at soon as possible. These are
26885 moves from function argument registers at the top of the function entry
26886 and moves from function return value registers after call. */
26887 static int
26888 ix86_adjust_priority (rtx_insn *insn, int priority)
26890 rtx set;
26892 if (reload_completed)
26893 return priority;
26895 if (!NONDEBUG_INSN_P (insn))
26896 return priority;
26898 set = single_set (insn);
26899 if (set)
26901 rtx tmp = SET_SRC (set);
26902 if (REG_P (tmp)
26903 && HARD_REGISTER_P (tmp)
26904 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26905 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26906 return current_sched_info->sched_max_insns_priority;
26909 return priority;
26912 /* Model decoder of Core 2/i7.
26913 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26914 track the instruction fetch block boundaries and make sure that long
26915 (9+ bytes) instructions are assigned to D0. */
26917 /* Maximum length of an insn that can be handled by
26918 a secondary decoder unit. '8' for Core 2/i7. */
26919 static int core2i7_secondary_decoder_max_insn_size;
26921 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26922 '16' for Core 2/i7. */
26923 static int core2i7_ifetch_block_size;
26925 /* Maximum number of instructions decoder can handle per cycle.
26926 '6' for Core 2/i7. */
26927 static int core2i7_ifetch_block_max_insns;
26929 typedef struct ix86_first_cycle_multipass_data_ *
26930 ix86_first_cycle_multipass_data_t;
26931 typedef const struct ix86_first_cycle_multipass_data_ *
26932 const_ix86_first_cycle_multipass_data_t;
26934 /* A variable to store target state across calls to max_issue within
26935 one cycle. */
26936 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26937 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26939 /* Initialize DATA. */
26940 static void
26941 core2i7_first_cycle_multipass_init (void *_data)
26943 ix86_first_cycle_multipass_data_t data
26944 = (ix86_first_cycle_multipass_data_t) _data;
26946 data->ifetch_block_len = 0;
26947 data->ifetch_block_n_insns = 0;
26948 data->ready_try_change = NULL;
26949 data->ready_try_change_size = 0;
26952 /* Advancing the cycle; reset ifetch block counts. */
26953 static void
26954 core2i7_dfa_post_advance_cycle (void)
26956 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26958 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26960 data->ifetch_block_len = 0;
26961 data->ifetch_block_n_insns = 0;
26964 static int min_insn_size (rtx_insn *);
26966 /* Filter out insns from ready_try that the core will not be able to issue
26967 on current cycle due to decoder. */
26968 static void
26969 core2i7_first_cycle_multipass_filter_ready_try
26970 (const_ix86_first_cycle_multipass_data_t data,
26971 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
26973 while (n_ready--)
26975 rtx_insn *insn;
26976 int insn_size;
26978 if (ready_try[n_ready])
26979 continue;
26981 insn = get_ready_element (n_ready);
26982 insn_size = min_insn_size (insn);
26984 if (/* If this is a too long an insn for a secondary decoder ... */
26985 (!first_cycle_insn_p
26986 && insn_size > core2i7_secondary_decoder_max_insn_size)
26987 /* ... or it would not fit into the ifetch block ... */
26988 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26989 /* ... or the decoder is full already ... */
26990 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26991 /* ... mask the insn out. */
26993 ready_try[n_ready] = 1;
26995 if (data->ready_try_change)
26996 bitmap_set_bit (data->ready_try_change, n_ready);
27001 /* Prepare for a new round of multipass lookahead scheduling. */
27002 static void
27003 core2i7_first_cycle_multipass_begin (void *_data,
27004 signed char *ready_try, int n_ready,
27005 bool first_cycle_insn_p)
27007 ix86_first_cycle_multipass_data_t data
27008 = (ix86_first_cycle_multipass_data_t) _data;
27009 const_ix86_first_cycle_multipass_data_t prev_data
27010 = ix86_first_cycle_multipass_data;
27012 /* Restore the state from the end of the previous round. */
27013 data->ifetch_block_len = prev_data->ifetch_block_len;
27014 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
27016 /* Filter instructions that cannot be issued on current cycle due to
27017 decoder restrictions. */
27018 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27019 first_cycle_insn_p);
27022 /* INSN is being issued in current solution. Account for its impact on
27023 the decoder model. */
27024 static void
27025 core2i7_first_cycle_multipass_issue (void *_data,
27026 signed char *ready_try, int n_ready,
27027 rtx_insn *insn, const void *_prev_data)
27029 ix86_first_cycle_multipass_data_t data
27030 = (ix86_first_cycle_multipass_data_t) _data;
27031 const_ix86_first_cycle_multipass_data_t prev_data
27032 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
27034 int insn_size = min_insn_size (insn);
27036 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
27037 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
27038 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
27039 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27041 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
27042 if (!data->ready_try_change)
27044 data->ready_try_change = sbitmap_alloc (n_ready);
27045 data->ready_try_change_size = n_ready;
27047 else if (data->ready_try_change_size < n_ready)
27049 data->ready_try_change = sbitmap_resize (data->ready_try_change,
27050 n_ready, 0);
27051 data->ready_try_change_size = n_ready;
27053 bitmap_clear (data->ready_try_change);
27055 /* Filter out insns from ready_try that the core will not be able to issue
27056 on current cycle due to decoder. */
27057 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27058 false);
27061 /* Revert the effect on ready_try. */
27062 static void
27063 core2i7_first_cycle_multipass_backtrack (const void *_data,
27064 signed char *ready_try,
27065 int n_ready ATTRIBUTE_UNUSED)
27067 const_ix86_first_cycle_multipass_data_t data
27068 = (const_ix86_first_cycle_multipass_data_t) _data;
27069 unsigned int i = 0;
27070 sbitmap_iterator sbi;
27072 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
27073 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
27075 ready_try[i] = 0;
27079 /* Save the result of multipass lookahead scheduling for the next round. */
27080 static void
27081 core2i7_first_cycle_multipass_end (const void *_data)
27083 const_ix86_first_cycle_multipass_data_t data
27084 = (const_ix86_first_cycle_multipass_data_t) _data;
27085 ix86_first_cycle_multipass_data_t next_data
27086 = ix86_first_cycle_multipass_data;
27088 if (data != NULL)
27090 next_data->ifetch_block_len = data->ifetch_block_len;
27091 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
27095 /* Deallocate target data. */
27096 static void
27097 core2i7_first_cycle_multipass_fini (void *_data)
27099 ix86_first_cycle_multipass_data_t data
27100 = (ix86_first_cycle_multipass_data_t) _data;
27102 if (data->ready_try_change)
27104 sbitmap_free (data->ready_try_change);
27105 data->ready_try_change = NULL;
27106 data->ready_try_change_size = 0;
27110 /* Prepare for scheduling pass. */
27111 static void
27112 ix86_sched_init_global (FILE *, int, int)
27114 /* Install scheduling hooks for current CPU. Some of these hooks are used
27115 in time-critical parts of the scheduler, so we only set them up when
27116 they are actually used. */
27117 switch (ix86_tune)
27119 case PROCESSOR_CORE2:
27120 case PROCESSOR_NEHALEM:
27121 case PROCESSOR_SANDYBRIDGE:
27122 case PROCESSOR_HASWELL:
27123 /* Do not perform multipass scheduling for pre-reload schedule
27124 to save compile time. */
27125 if (reload_completed)
27127 targetm.sched.dfa_post_advance_cycle
27128 = core2i7_dfa_post_advance_cycle;
27129 targetm.sched.first_cycle_multipass_init
27130 = core2i7_first_cycle_multipass_init;
27131 targetm.sched.first_cycle_multipass_begin
27132 = core2i7_first_cycle_multipass_begin;
27133 targetm.sched.first_cycle_multipass_issue
27134 = core2i7_first_cycle_multipass_issue;
27135 targetm.sched.first_cycle_multipass_backtrack
27136 = core2i7_first_cycle_multipass_backtrack;
27137 targetm.sched.first_cycle_multipass_end
27138 = core2i7_first_cycle_multipass_end;
27139 targetm.sched.first_cycle_multipass_fini
27140 = core2i7_first_cycle_multipass_fini;
27142 /* Set decoder parameters. */
27143 core2i7_secondary_decoder_max_insn_size = 8;
27144 core2i7_ifetch_block_size = 16;
27145 core2i7_ifetch_block_max_insns = 6;
27146 break;
27148 /* ... Fall through ... */
27149 default:
27150 targetm.sched.dfa_post_advance_cycle = NULL;
27151 targetm.sched.first_cycle_multipass_init = NULL;
27152 targetm.sched.first_cycle_multipass_begin = NULL;
27153 targetm.sched.first_cycle_multipass_issue = NULL;
27154 targetm.sched.first_cycle_multipass_backtrack = NULL;
27155 targetm.sched.first_cycle_multipass_end = NULL;
27156 targetm.sched.first_cycle_multipass_fini = NULL;
27157 break;
27162 /* Compute the alignment given to a constant that is being placed in memory.
27163 EXP is the constant and ALIGN is the alignment that the object would
27164 ordinarily have.
27165 The value of this function is used instead of that alignment to align
27166 the object. */
27169 ix86_constant_alignment (tree exp, int align)
27171 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
27172 || TREE_CODE (exp) == INTEGER_CST)
27174 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
27175 return 64;
27176 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
27177 return 128;
27179 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
27180 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
27181 return BITS_PER_WORD;
27183 return align;
27186 /* Compute the alignment for a static variable.
27187 TYPE is the data type, and ALIGN is the alignment that
27188 the object would ordinarily have. The value of this function is used
27189 instead of that alignment to align the object. */
27192 ix86_data_alignment (tree type, int align, bool opt)
27194 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
27195 for symbols from other compilation units or symbols that don't need
27196 to bind locally. In order to preserve some ABI compatibility with
27197 those compilers, ensure we don't decrease alignment from what we
27198 used to assume. */
27200 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
27202 /* A data structure, equal or greater than the size of a cache line
27203 (64 bytes in the Pentium 4 and other recent Intel processors, including
27204 processors based on Intel Core microarchitecture) should be aligned
27205 so that its base address is a multiple of a cache line size. */
27207 int max_align
27208 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
27210 if (max_align < BITS_PER_WORD)
27211 max_align = BITS_PER_WORD;
27213 switch (ix86_align_data_type)
27215 case ix86_align_data_type_abi: opt = false; break;
27216 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
27217 case ix86_align_data_type_cacheline: break;
27220 if (opt
27221 && AGGREGATE_TYPE_P (type)
27222 && TYPE_SIZE (type)
27223 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
27225 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
27226 && align < max_align_compat)
27227 align = max_align_compat;
27228 if (wi::geu_p (TYPE_SIZE (type), max_align)
27229 && align < max_align)
27230 align = max_align;
27233 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27234 to 16byte boundary. */
27235 if (TARGET_64BIT)
27237 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
27238 && TYPE_SIZE (type)
27239 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27240 && wi::geu_p (TYPE_SIZE (type), 128)
27241 && align < 128)
27242 return 128;
27245 if (!opt)
27246 return align;
27248 if (TREE_CODE (type) == ARRAY_TYPE)
27250 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27251 return 64;
27252 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27253 return 128;
27255 else if (TREE_CODE (type) == COMPLEX_TYPE)
27258 if (TYPE_MODE (type) == DCmode && align < 64)
27259 return 64;
27260 if ((TYPE_MODE (type) == XCmode
27261 || TYPE_MODE (type) == TCmode) && align < 128)
27262 return 128;
27264 else if ((TREE_CODE (type) == RECORD_TYPE
27265 || TREE_CODE (type) == UNION_TYPE
27266 || TREE_CODE (type) == QUAL_UNION_TYPE)
27267 && TYPE_FIELDS (type))
27269 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27270 return 64;
27271 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27272 return 128;
27274 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27275 || TREE_CODE (type) == INTEGER_TYPE)
27277 if (TYPE_MODE (type) == DFmode && align < 64)
27278 return 64;
27279 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27280 return 128;
27283 return align;
27286 /* Compute the alignment for a local variable or a stack slot. EXP is
27287 the data type or decl itself, MODE is the widest mode available and
27288 ALIGN is the alignment that the object would ordinarily have. The
27289 value of this macro is used instead of that alignment to align the
27290 object. */
27292 unsigned int
27293 ix86_local_alignment (tree exp, machine_mode mode,
27294 unsigned int align)
27296 tree type, decl;
27298 if (exp && DECL_P (exp))
27300 type = TREE_TYPE (exp);
27301 decl = exp;
27303 else
27305 type = exp;
27306 decl = NULL;
27309 /* Don't do dynamic stack realignment for long long objects with
27310 -mpreferred-stack-boundary=2. */
27311 if (!TARGET_64BIT
27312 && align == 64
27313 && ix86_preferred_stack_boundary < 64
27314 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
27315 && (!type || !TYPE_USER_ALIGN (type))
27316 && (!decl || !DECL_USER_ALIGN (decl)))
27317 align = 32;
27319 /* If TYPE is NULL, we are allocating a stack slot for caller-save
27320 register in MODE. We will return the largest alignment of XF
27321 and DF. */
27322 if (!type)
27324 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
27325 align = GET_MODE_ALIGNMENT (DFmode);
27326 return align;
27329 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27330 to 16byte boundary. Exact wording is:
27332 An array uses the same alignment as its elements, except that a local or
27333 global array variable of length at least 16 bytes or
27334 a C99 variable-length array variable always has alignment of at least 16 bytes.
27336 This was added to allow use of aligned SSE instructions at arrays. This
27337 rule is meant for static storage (where compiler can not do the analysis
27338 by itself). We follow it for automatic variables only when convenient.
27339 We fully control everything in the function compiled and functions from
27340 other unit can not rely on the alignment.
27342 Exclude va_list type. It is the common case of local array where
27343 we can not benefit from the alignment.
27345 TODO: Probably one should optimize for size only when var is not escaping. */
27346 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
27347 && TARGET_SSE)
27349 if (AGGREGATE_TYPE_P (type)
27350 && (va_list_type_node == NULL_TREE
27351 || (TYPE_MAIN_VARIANT (type)
27352 != TYPE_MAIN_VARIANT (va_list_type_node)))
27353 && TYPE_SIZE (type)
27354 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27355 && wi::geu_p (TYPE_SIZE (type), 16)
27356 && align < 128)
27357 return 128;
27359 if (TREE_CODE (type) == ARRAY_TYPE)
27361 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27362 return 64;
27363 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27364 return 128;
27366 else if (TREE_CODE (type) == COMPLEX_TYPE)
27368 if (TYPE_MODE (type) == DCmode && align < 64)
27369 return 64;
27370 if ((TYPE_MODE (type) == XCmode
27371 || TYPE_MODE (type) == TCmode) && align < 128)
27372 return 128;
27374 else if ((TREE_CODE (type) == RECORD_TYPE
27375 || TREE_CODE (type) == UNION_TYPE
27376 || TREE_CODE (type) == QUAL_UNION_TYPE)
27377 && TYPE_FIELDS (type))
27379 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27380 return 64;
27381 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27382 return 128;
27384 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27385 || TREE_CODE (type) == INTEGER_TYPE)
27388 if (TYPE_MODE (type) == DFmode && align < 64)
27389 return 64;
27390 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27391 return 128;
27393 return align;
27396 /* Compute the minimum required alignment for dynamic stack realignment
27397 purposes for a local variable, parameter or a stack slot. EXP is
27398 the data type or decl itself, MODE is its mode and ALIGN is the
27399 alignment that the object would ordinarily have. */
27401 unsigned int
27402 ix86_minimum_alignment (tree exp, machine_mode mode,
27403 unsigned int align)
27405 tree type, decl;
27407 if (exp && DECL_P (exp))
27409 type = TREE_TYPE (exp);
27410 decl = exp;
27412 else
27414 type = exp;
27415 decl = NULL;
27418 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27419 return align;
27421 /* Don't do dynamic stack realignment for long long objects with
27422 -mpreferred-stack-boundary=2. */
27423 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27424 && (!type || !TYPE_USER_ALIGN (type))
27425 && (!decl || !DECL_USER_ALIGN (decl)))
27426 return 32;
27428 return align;
27431 /* Find a location for the static chain incoming to a nested function.
27432 This is a register, unless all free registers are used by arguments. */
27434 static rtx
27435 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
27437 unsigned regno;
27439 /* While this function won't be called by the middle-end when a static
27440 chain isn't needed, it's also used throughout the backend so it's
27441 easiest to keep this check centralized. */
27442 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
27443 return NULL;
27445 if (TARGET_64BIT)
27447 /* We always use R10 in 64-bit mode. */
27448 regno = R10_REG;
27450 else
27452 const_tree fntype, fndecl;
27453 unsigned int ccvt;
27455 /* By default in 32-bit mode we use ECX to pass the static chain. */
27456 regno = CX_REG;
27458 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
27460 fntype = TREE_TYPE (fndecl_or_type);
27461 fndecl = fndecl_or_type;
27463 else
27465 fntype = fndecl_or_type;
27466 fndecl = NULL;
27469 ccvt = ix86_get_callcvt (fntype);
27470 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27472 /* Fastcall functions use ecx/edx for arguments, which leaves
27473 us with EAX for the static chain.
27474 Thiscall functions use ecx for arguments, which also
27475 leaves us with EAX for the static chain. */
27476 regno = AX_REG;
27478 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27480 /* Thiscall functions use ecx for arguments, which leaves
27481 us with EAX and EDX for the static chain.
27482 We are using for abi-compatibility EAX. */
27483 regno = AX_REG;
27485 else if (ix86_function_regparm (fntype, fndecl) == 3)
27487 /* For regparm 3, we have no free call-clobbered registers in
27488 which to store the static chain. In order to implement this,
27489 we have the trampoline push the static chain to the stack.
27490 However, we can't push a value below the return address when
27491 we call the nested function directly, so we have to use an
27492 alternate entry point. For this we use ESI, and have the
27493 alternate entry point push ESI, so that things appear the
27494 same once we're executing the nested function. */
27495 if (incoming_p)
27497 if (fndecl == current_function_decl)
27498 ix86_static_chain_on_stack = true;
27499 return gen_frame_mem (SImode,
27500 plus_constant (Pmode,
27501 arg_pointer_rtx, -8));
27503 regno = SI_REG;
27507 return gen_rtx_REG (Pmode, regno);
27510 /* Emit RTL insns to initialize the variable parts of a trampoline.
27511 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27512 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27513 to be passed to the target function. */
27515 static void
27516 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27518 rtx mem, fnaddr;
27519 int opcode;
27520 int offset = 0;
27522 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27524 if (TARGET_64BIT)
27526 int size;
27528 /* Load the function address to r11. Try to load address using
27529 the shorter movl instead of movabs. We may want to support
27530 movq for kernel mode, but kernel does not use trampolines at
27531 the moment. FNADDR is a 32bit address and may not be in
27532 DImode when ptr_mode == SImode. Always use movl in this
27533 case. */
27534 if (ptr_mode == SImode
27535 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27537 fnaddr = copy_addr_to_reg (fnaddr);
27539 mem = adjust_address (m_tramp, HImode, offset);
27540 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27542 mem = adjust_address (m_tramp, SImode, offset + 2);
27543 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27544 offset += 6;
27546 else
27548 mem = adjust_address (m_tramp, HImode, offset);
27549 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27551 mem = adjust_address (m_tramp, DImode, offset + 2);
27552 emit_move_insn (mem, fnaddr);
27553 offset += 10;
27556 /* Load static chain using movabs to r10. Use the shorter movl
27557 instead of movabs when ptr_mode == SImode. */
27558 if (ptr_mode == SImode)
27560 opcode = 0xba41;
27561 size = 6;
27563 else
27565 opcode = 0xba49;
27566 size = 10;
27569 mem = adjust_address (m_tramp, HImode, offset);
27570 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27572 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27573 emit_move_insn (mem, chain_value);
27574 offset += size;
27576 /* Jump to r11; the last (unused) byte is a nop, only there to
27577 pad the write out to a single 32-bit store. */
27578 mem = adjust_address (m_tramp, SImode, offset);
27579 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27580 offset += 4;
27582 else
27584 rtx disp, chain;
27586 /* Depending on the static chain location, either load a register
27587 with a constant, or push the constant to the stack. All of the
27588 instructions are the same size. */
27589 chain = ix86_static_chain (fndecl, true);
27590 if (REG_P (chain))
27592 switch (REGNO (chain))
27594 case AX_REG:
27595 opcode = 0xb8; break;
27596 case CX_REG:
27597 opcode = 0xb9; break;
27598 default:
27599 gcc_unreachable ();
27602 else
27603 opcode = 0x68;
27605 mem = adjust_address (m_tramp, QImode, offset);
27606 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27608 mem = adjust_address (m_tramp, SImode, offset + 1);
27609 emit_move_insn (mem, chain_value);
27610 offset += 5;
27612 mem = adjust_address (m_tramp, QImode, offset);
27613 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27615 mem = adjust_address (m_tramp, SImode, offset + 1);
27617 /* Compute offset from the end of the jmp to the target function.
27618 In the case in which the trampoline stores the static chain on
27619 the stack, we need to skip the first insn which pushes the
27620 (call-saved) register static chain; this push is 1 byte. */
27621 offset += 5;
27622 disp = expand_binop (SImode, sub_optab, fnaddr,
27623 plus_constant (Pmode, XEXP (m_tramp, 0),
27624 offset - (MEM_P (chain) ? 1 : 0)),
27625 NULL_RTX, 1, OPTAB_DIRECT);
27626 emit_move_insn (mem, disp);
27629 gcc_assert (offset <= TRAMPOLINE_SIZE);
27631 #ifdef HAVE_ENABLE_EXECUTE_STACK
27632 #ifdef CHECK_EXECUTE_STACK_ENABLED
27633 if (CHECK_EXECUTE_STACK_ENABLED)
27634 #endif
27635 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27636 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27637 #endif
27640 /* The following file contains several enumerations and data structures
27641 built from the definitions in i386-builtin-types.def. */
27643 #include "i386-builtin-types.inc"
27645 /* Table for the ix86 builtin non-function types. */
27646 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27648 /* Retrieve an element from the above table, building some of
27649 the types lazily. */
27651 static tree
27652 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27654 unsigned int index;
27655 tree type, itype;
27657 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27659 type = ix86_builtin_type_tab[(int) tcode];
27660 if (type != NULL)
27661 return type;
27663 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27664 if (tcode <= IX86_BT_LAST_VECT)
27666 machine_mode mode;
27668 index = tcode - IX86_BT_LAST_PRIM - 1;
27669 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27670 mode = ix86_builtin_type_vect_mode[index];
27672 type = build_vector_type_for_mode (itype, mode);
27674 else
27676 int quals;
27678 index = tcode - IX86_BT_LAST_VECT - 1;
27679 if (tcode <= IX86_BT_LAST_PTR)
27680 quals = TYPE_UNQUALIFIED;
27681 else
27682 quals = TYPE_QUAL_CONST;
27684 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27685 if (quals != TYPE_UNQUALIFIED)
27686 itype = build_qualified_type (itype, quals);
27688 type = build_pointer_type (itype);
27691 ix86_builtin_type_tab[(int) tcode] = type;
27692 return type;
27695 /* Table for the ix86 builtin function types. */
27696 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27698 /* Retrieve an element from the above table, building some of
27699 the types lazily. */
27701 static tree
27702 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27704 tree type;
27706 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27708 type = ix86_builtin_func_type_tab[(int) tcode];
27709 if (type != NULL)
27710 return type;
27712 if (tcode <= IX86_BT_LAST_FUNC)
27714 unsigned start = ix86_builtin_func_start[(int) tcode];
27715 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27716 tree rtype, atype, args = void_list_node;
27717 unsigned i;
27719 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27720 for (i = after - 1; i > start; --i)
27722 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27723 args = tree_cons (NULL, atype, args);
27726 type = build_function_type (rtype, args);
27728 else
27730 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27731 enum ix86_builtin_func_type icode;
27733 icode = ix86_builtin_func_alias_base[index];
27734 type = ix86_get_builtin_func_type (icode);
27737 ix86_builtin_func_type_tab[(int) tcode] = type;
27738 return type;
27742 /* Codes for all the SSE/MMX builtins. */
27743 enum ix86_builtins
27745 IX86_BUILTIN_ADDPS,
27746 IX86_BUILTIN_ADDSS,
27747 IX86_BUILTIN_DIVPS,
27748 IX86_BUILTIN_DIVSS,
27749 IX86_BUILTIN_MULPS,
27750 IX86_BUILTIN_MULSS,
27751 IX86_BUILTIN_SUBPS,
27752 IX86_BUILTIN_SUBSS,
27754 IX86_BUILTIN_CMPEQPS,
27755 IX86_BUILTIN_CMPLTPS,
27756 IX86_BUILTIN_CMPLEPS,
27757 IX86_BUILTIN_CMPGTPS,
27758 IX86_BUILTIN_CMPGEPS,
27759 IX86_BUILTIN_CMPNEQPS,
27760 IX86_BUILTIN_CMPNLTPS,
27761 IX86_BUILTIN_CMPNLEPS,
27762 IX86_BUILTIN_CMPNGTPS,
27763 IX86_BUILTIN_CMPNGEPS,
27764 IX86_BUILTIN_CMPORDPS,
27765 IX86_BUILTIN_CMPUNORDPS,
27766 IX86_BUILTIN_CMPEQSS,
27767 IX86_BUILTIN_CMPLTSS,
27768 IX86_BUILTIN_CMPLESS,
27769 IX86_BUILTIN_CMPNEQSS,
27770 IX86_BUILTIN_CMPNLTSS,
27771 IX86_BUILTIN_CMPNLESS,
27772 IX86_BUILTIN_CMPORDSS,
27773 IX86_BUILTIN_CMPUNORDSS,
27775 IX86_BUILTIN_COMIEQSS,
27776 IX86_BUILTIN_COMILTSS,
27777 IX86_BUILTIN_COMILESS,
27778 IX86_BUILTIN_COMIGTSS,
27779 IX86_BUILTIN_COMIGESS,
27780 IX86_BUILTIN_COMINEQSS,
27781 IX86_BUILTIN_UCOMIEQSS,
27782 IX86_BUILTIN_UCOMILTSS,
27783 IX86_BUILTIN_UCOMILESS,
27784 IX86_BUILTIN_UCOMIGTSS,
27785 IX86_BUILTIN_UCOMIGESS,
27786 IX86_BUILTIN_UCOMINEQSS,
27788 IX86_BUILTIN_CVTPI2PS,
27789 IX86_BUILTIN_CVTPS2PI,
27790 IX86_BUILTIN_CVTSI2SS,
27791 IX86_BUILTIN_CVTSI642SS,
27792 IX86_BUILTIN_CVTSS2SI,
27793 IX86_BUILTIN_CVTSS2SI64,
27794 IX86_BUILTIN_CVTTPS2PI,
27795 IX86_BUILTIN_CVTTSS2SI,
27796 IX86_BUILTIN_CVTTSS2SI64,
27798 IX86_BUILTIN_MAXPS,
27799 IX86_BUILTIN_MAXSS,
27800 IX86_BUILTIN_MINPS,
27801 IX86_BUILTIN_MINSS,
27803 IX86_BUILTIN_LOADUPS,
27804 IX86_BUILTIN_STOREUPS,
27805 IX86_BUILTIN_MOVSS,
27807 IX86_BUILTIN_MOVHLPS,
27808 IX86_BUILTIN_MOVLHPS,
27809 IX86_BUILTIN_LOADHPS,
27810 IX86_BUILTIN_LOADLPS,
27811 IX86_BUILTIN_STOREHPS,
27812 IX86_BUILTIN_STORELPS,
27814 IX86_BUILTIN_MASKMOVQ,
27815 IX86_BUILTIN_MOVMSKPS,
27816 IX86_BUILTIN_PMOVMSKB,
27818 IX86_BUILTIN_MOVNTPS,
27819 IX86_BUILTIN_MOVNTQ,
27821 IX86_BUILTIN_LOADDQU,
27822 IX86_BUILTIN_STOREDQU,
27824 IX86_BUILTIN_PACKSSWB,
27825 IX86_BUILTIN_PACKSSDW,
27826 IX86_BUILTIN_PACKUSWB,
27828 IX86_BUILTIN_PADDB,
27829 IX86_BUILTIN_PADDW,
27830 IX86_BUILTIN_PADDD,
27831 IX86_BUILTIN_PADDQ,
27832 IX86_BUILTIN_PADDSB,
27833 IX86_BUILTIN_PADDSW,
27834 IX86_BUILTIN_PADDUSB,
27835 IX86_BUILTIN_PADDUSW,
27836 IX86_BUILTIN_PSUBB,
27837 IX86_BUILTIN_PSUBW,
27838 IX86_BUILTIN_PSUBD,
27839 IX86_BUILTIN_PSUBQ,
27840 IX86_BUILTIN_PSUBSB,
27841 IX86_BUILTIN_PSUBSW,
27842 IX86_BUILTIN_PSUBUSB,
27843 IX86_BUILTIN_PSUBUSW,
27845 IX86_BUILTIN_PAND,
27846 IX86_BUILTIN_PANDN,
27847 IX86_BUILTIN_POR,
27848 IX86_BUILTIN_PXOR,
27850 IX86_BUILTIN_PAVGB,
27851 IX86_BUILTIN_PAVGW,
27853 IX86_BUILTIN_PCMPEQB,
27854 IX86_BUILTIN_PCMPEQW,
27855 IX86_BUILTIN_PCMPEQD,
27856 IX86_BUILTIN_PCMPGTB,
27857 IX86_BUILTIN_PCMPGTW,
27858 IX86_BUILTIN_PCMPGTD,
27860 IX86_BUILTIN_PMADDWD,
27862 IX86_BUILTIN_PMAXSW,
27863 IX86_BUILTIN_PMAXUB,
27864 IX86_BUILTIN_PMINSW,
27865 IX86_BUILTIN_PMINUB,
27867 IX86_BUILTIN_PMULHUW,
27868 IX86_BUILTIN_PMULHW,
27869 IX86_BUILTIN_PMULLW,
27871 IX86_BUILTIN_PSADBW,
27872 IX86_BUILTIN_PSHUFW,
27874 IX86_BUILTIN_PSLLW,
27875 IX86_BUILTIN_PSLLD,
27876 IX86_BUILTIN_PSLLQ,
27877 IX86_BUILTIN_PSRAW,
27878 IX86_BUILTIN_PSRAD,
27879 IX86_BUILTIN_PSRLW,
27880 IX86_BUILTIN_PSRLD,
27881 IX86_BUILTIN_PSRLQ,
27882 IX86_BUILTIN_PSLLWI,
27883 IX86_BUILTIN_PSLLDI,
27884 IX86_BUILTIN_PSLLQI,
27885 IX86_BUILTIN_PSRAWI,
27886 IX86_BUILTIN_PSRADI,
27887 IX86_BUILTIN_PSRLWI,
27888 IX86_BUILTIN_PSRLDI,
27889 IX86_BUILTIN_PSRLQI,
27891 IX86_BUILTIN_PUNPCKHBW,
27892 IX86_BUILTIN_PUNPCKHWD,
27893 IX86_BUILTIN_PUNPCKHDQ,
27894 IX86_BUILTIN_PUNPCKLBW,
27895 IX86_BUILTIN_PUNPCKLWD,
27896 IX86_BUILTIN_PUNPCKLDQ,
27898 IX86_BUILTIN_SHUFPS,
27900 IX86_BUILTIN_RCPPS,
27901 IX86_BUILTIN_RCPSS,
27902 IX86_BUILTIN_RSQRTPS,
27903 IX86_BUILTIN_RSQRTPS_NR,
27904 IX86_BUILTIN_RSQRTSS,
27905 IX86_BUILTIN_RSQRTF,
27906 IX86_BUILTIN_SQRTPS,
27907 IX86_BUILTIN_SQRTPS_NR,
27908 IX86_BUILTIN_SQRTSS,
27910 IX86_BUILTIN_UNPCKHPS,
27911 IX86_BUILTIN_UNPCKLPS,
27913 IX86_BUILTIN_ANDPS,
27914 IX86_BUILTIN_ANDNPS,
27915 IX86_BUILTIN_ORPS,
27916 IX86_BUILTIN_XORPS,
27918 IX86_BUILTIN_EMMS,
27919 IX86_BUILTIN_LDMXCSR,
27920 IX86_BUILTIN_STMXCSR,
27921 IX86_BUILTIN_SFENCE,
27923 IX86_BUILTIN_FXSAVE,
27924 IX86_BUILTIN_FXRSTOR,
27925 IX86_BUILTIN_FXSAVE64,
27926 IX86_BUILTIN_FXRSTOR64,
27928 IX86_BUILTIN_XSAVE,
27929 IX86_BUILTIN_XRSTOR,
27930 IX86_BUILTIN_XSAVE64,
27931 IX86_BUILTIN_XRSTOR64,
27933 IX86_BUILTIN_XSAVEOPT,
27934 IX86_BUILTIN_XSAVEOPT64,
27936 IX86_BUILTIN_XSAVEC,
27937 IX86_BUILTIN_XSAVEC64,
27939 IX86_BUILTIN_XSAVES,
27940 IX86_BUILTIN_XRSTORS,
27941 IX86_BUILTIN_XSAVES64,
27942 IX86_BUILTIN_XRSTORS64,
27944 /* 3DNow! Original */
27945 IX86_BUILTIN_FEMMS,
27946 IX86_BUILTIN_PAVGUSB,
27947 IX86_BUILTIN_PF2ID,
27948 IX86_BUILTIN_PFACC,
27949 IX86_BUILTIN_PFADD,
27950 IX86_BUILTIN_PFCMPEQ,
27951 IX86_BUILTIN_PFCMPGE,
27952 IX86_BUILTIN_PFCMPGT,
27953 IX86_BUILTIN_PFMAX,
27954 IX86_BUILTIN_PFMIN,
27955 IX86_BUILTIN_PFMUL,
27956 IX86_BUILTIN_PFRCP,
27957 IX86_BUILTIN_PFRCPIT1,
27958 IX86_BUILTIN_PFRCPIT2,
27959 IX86_BUILTIN_PFRSQIT1,
27960 IX86_BUILTIN_PFRSQRT,
27961 IX86_BUILTIN_PFSUB,
27962 IX86_BUILTIN_PFSUBR,
27963 IX86_BUILTIN_PI2FD,
27964 IX86_BUILTIN_PMULHRW,
27966 /* 3DNow! Athlon Extensions */
27967 IX86_BUILTIN_PF2IW,
27968 IX86_BUILTIN_PFNACC,
27969 IX86_BUILTIN_PFPNACC,
27970 IX86_BUILTIN_PI2FW,
27971 IX86_BUILTIN_PSWAPDSI,
27972 IX86_BUILTIN_PSWAPDSF,
27974 /* SSE2 */
27975 IX86_BUILTIN_ADDPD,
27976 IX86_BUILTIN_ADDSD,
27977 IX86_BUILTIN_DIVPD,
27978 IX86_BUILTIN_DIVSD,
27979 IX86_BUILTIN_MULPD,
27980 IX86_BUILTIN_MULSD,
27981 IX86_BUILTIN_SUBPD,
27982 IX86_BUILTIN_SUBSD,
27984 IX86_BUILTIN_CMPEQPD,
27985 IX86_BUILTIN_CMPLTPD,
27986 IX86_BUILTIN_CMPLEPD,
27987 IX86_BUILTIN_CMPGTPD,
27988 IX86_BUILTIN_CMPGEPD,
27989 IX86_BUILTIN_CMPNEQPD,
27990 IX86_BUILTIN_CMPNLTPD,
27991 IX86_BUILTIN_CMPNLEPD,
27992 IX86_BUILTIN_CMPNGTPD,
27993 IX86_BUILTIN_CMPNGEPD,
27994 IX86_BUILTIN_CMPORDPD,
27995 IX86_BUILTIN_CMPUNORDPD,
27996 IX86_BUILTIN_CMPEQSD,
27997 IX86_BUILTIN_CMPLTSD,
27998 IX86_BUILTIN_CMPLESD,
27999 IX86_BUILTIN_CMPNEQSD,
28000 IX86_BUILTIN_CMPNLTSD,
28001 IX86_BUILTIN_CMPNLESD,
28002 IX86_BUILTIN_CMPORDSD,
28003 IX86_BUILTIN_CMPUNORDSD,
28005 IX86_BUILTIN_COMIEQSD,
28006 IX86_BUILTIN_COMILTSD,
28007 IX86_BUILTIN_COMILESD,
28008 IX86_BUILTIN_COMIGTSD,
28009 IX86_BUILTIN_COMIGESD,
28010 IX86_BUILTIN_COMINEQSD,
28011 IX86_BUILTIN_UCOMIEQSD,
28012 IX86_BUILTIN_UCOMILTSD,
28013 IX86_BUILTIN_UCOMILESD,
28014 IX86_BUILTIN_UCOMIGTSD,
28015 IX86_BUILTIN_UCOMIGESD,
28016 IX86_BUILTIN_UCOMINEQSD,
28018 IX86_BUILTIN_MAXPD,
28019 IX86_BUILTIN_MAXSD,
28020 IX86_BUILTIN_MINPD,
28021 IX86_BUILTIN_MINSD,
28023 IX86_BUILTIN_ANDPD,
28024 IX86_BUILTIN_ANDNPD,
28025 IX86_BUILTIN_ORPD,
28026 IX86_BUILTIN_XORPD,
28028 IX86_BUILTIN_SQRTPD,
28029 IX86_BUILTIN_SQRTSD,
28031 IX86_BUILTIN_UNPCKHPD,
28032 IX86_BUILTIN_UNPCKLPD,
28034 IX86_BUILTIN_SHUFPD,
28036 IX86_BUILTIN_LOADUPD,
28037 IX86_BUILTIN_STOREUPD,
28038 IX86_BUILTIN_MOVSD,
28040 IX86_BUILTIN_LOADHPD,
28041 IX86_BUILTIN_LOADLPD,
28043 IX86_BUILTIN_CVTDQ2PD,
28044 IX86_BUILTIN_CVTDQ2PS,
28046 IX86_BUILTIN_CVTPD2DQ,
28047 IX86_BUILTIN_CVTPD2PI,
28048 IX86_BUILTIN_CVTPD2PS,
28049 IX86_BUILTIN_CVTTPD2DQ,
28050 IX86_BUILTIN_CVTTPD2PI,
28052 IX86_BUILTIN_CVTPI2PD,
28053 IX86_BUILTIN_CVTSI2SD,
28054 IX86_BUILTIN_CVTSI642SD,
28056 IX86_BUILTIN_CVTSD2SI,
28057 IX86_BUILTIN_CVTSD2SI64,
28058 IX86_BUILTIN_CVTSD2SS,
28059 IX86_BUILTIN_CVTSS2SD,
28060 IX86_BUILTIN_CVTTSD2SI,
28061 IX86_BUILTIN_CVTTSD2SI64,
28063 IX86_BUILTIN_CVTPS2DQ,
28064 IX86_BUILTIN_CVTPS2PD,
28065 IX86_BUILTIN_CVTTPS2DQ,
28067 IX86_BUILTIN_MOVNTI,
28068 IX86_BUILTIN_MOVNTI64,
28069 IX86_BUILTIN_MOVNTPD,
28070 IX86_BUILTIN_MOVNTDQ,
28072 IX86_BUILTIN_MOVQ128,
28074 /* SSE2 MMX */
28075 IX86_BUILTIN_MASKMOVDQU,
28076 IX86_BUILTIN_MOVMSKPD,
28077 IX86_BUILTIN_PMOVMSKB128,
28079 IX86_BUILTIN_PACKSSWB128,
28080 IX86_BUILTIN_PACKSSDW128,
28081 IX86_BUILTIN_PACKUSWB128,
28083 IX86_BUILTIN_PADDB128,
28084 IX86_BUILTIN_PADDW128,
28085 IX86_BUILTIN_PADDD128,
28086 IX86_BUILTIN_PADDQ128,
28087 IX86_BUILTIN_PADDSB128,
28088 IX86_BUILTIN_PADDSW128,
28089 IX86_BUILTIN_PADDUSB128,
28090 IX86_BUILTIN_PADDUSW128,
28091 IX86_BUILTIN_PSUBB128,
28092 IX86_BUILTIN_PSUBW128,
28093 IX86_BUILTIN_PSUBD128,
28094 IX86_BUILTIN_PSUBQ128,
28095 IX86_BUILTIN_PSUBSB128,
28096 IX86_BUILTIN_PSUBSW128,
28097 IX86_BUILTIN_PSUBUSB128,
28098 IX86_BUILTIN_PSUBUSW128,
28100 IX86_BUILTIN_PAND128,
28101 IX86_BUILTIN_PANDN128,
28102 IX86_BUILTIN_POR128,
28103 IX86_BUILTIN_PXOR128,
28105 IX86_BUILTIN_PAVGB128,
28106 IX86_BUILTIN_PAVGW128,
28108 IX86_BUILTIN_PCMPEQB128,
28109 IX86_BUILTIN_PCMPEQW128,
28110 IX86_BUILTIN_PCMPEQD128,
28111 IX86_BUILTIN_PCMPGTB128,
28112 IX86_BUILTIN_PCMPGTW128,
28113 IX86_BUILTIN_PCMPGTD128,
28115 IX86_BUILTIN_PMADDWD128,
28117 IX86_BUILTIN_PMAXSW128,
28118 IX86_BUILTIN_PMAXUB128,
28119 IX86_BUILTIN_PMINSW128,
28120 IX86_BUILTIN_PMINUB128,
28122 IX86_BUILTIN_PMULUDQ,
28123 IX86_BUILTIN_PMULUDQ128,
28124 IX86_BUILTIN_PMULHUW128,
28125 IX86_BUILTIN_PMULHW128,
28126 IX86_BUILTIN_PMULLW128,
28128 IX86_BUILTIN_PSADBW128,
28129 IX86_BUILTIN_PSHUFHW,
28130 IX86_BUILTIN_PSHUFLW,
28131 IX86_BUILTIN_PSHUFD,
28133 IX86_BUILTIN_PSLLDQI128,
28134 IX86_BUILTIN_PSLLWI128,
28135 IX86_BUILTIN_PSLLDI128,
28136 IX86_BUILTIN_PSLLQI128,
28137 IX86_BUILTIN_PSRAWI128,
28138 IX86_BUILTIN_PSRADI128,
28139 IX86_BUILTIN_PSRLDQI128,
28140 IX86_BUILTIN_PSRLWI128,
28141 IX86_BUILTIN_PSRLDI128,
28142 IX86_BUILTIN_PSRLQI128,
28144 IX86_BUILTIN_PSLLDQ128,
28145 IX86_BUILTIN_PSLLW128,
28146 IX86_BUILTIN_PSLLD128,
28147 IX86_BUILTIN_PSLLQ128,
28148 IX86_BUILTIN_PSRAW128,
28149 IX86_BUILTIN_PSRAD128,
28150 IX86_BUILTIN_PSRLW128,
28151 IX86_BUILTIN_PSRLD128,
28152 IX86_BUILTIN_PSRLQ128,
28154 IX86_BUILTIN_PUNPCKHBW128,
28155 IX86_BUILTIN_PUNPCKHWD128,
28156 IX86_BUILTIN_PUNPCKHDQ128,
28157 IX86_BUILTIN_PUNPCKHQDQ128,
28158 IX86_BUILTIN_PUNPCKLBW128,
28159 IX86_BUILTIN_PUNPCKLWD128,
28160 IX86_BUILTIN_PUNPCKLDQ128,
28161 IX86_BUILTIN_PUNPCKLQDQ128,
28163 IX86_BUILTIN_CLFLUSH,
28164 IX86_BUILTIN_MFENCE,
28165 IX86_BUILTIN_LFENCE,
28166 IX86_BUILTIN_PAUSE,
28168 IX86_BUILTIN_FNSTENV,
28169 IX86_BUILTIN_FLDENV,
28170 IX86_BUILTIN_FNSTSW,
28171 IX86_BUILTIN_FNCLEX,
28173 IX86_BUILTIN_BSRSI,
28174 IX86_BUILTIN_BSRDI,
28175 IX86_BUILTIN_RDPMC,
28176 IX86_BUILTIN_RDTSC,
28177 IX86_BUILTIN_RDTSCP,
28178 IX86_BUILTIN_ROLQI,
28179 IX86_BUILTIN_ROLHI,
28180 IX86_BUILTIN_RORQI,
28181 IX86_BUILTIN_RORHI,
28183 /* SSE3. */
28184 IX86_BUILTIN_ADDSUBPS,
28185 IX86_BUILTIN_HADDPS,
28186 IX86_BUILTIN_HSUBPS,
28187 IX86_BUILTIN_MOVSHDUP,
28188 IX86_BUILTIN_MOVSLDUP,
28189 IX86_BUILTIN_ADDSUBPD,
28190 IX86_BUILTIN_HADDPD,
28191 IX86_BUILTIN_HSUBPD,
28192 IX86_BUILTIN_LDDQU,
28194 IX86_BUILTIN_MONITOR,
28195 IX86_BUILTIN_MWAIT,
28197 /* SSSE3. */
28198 IX86_BUILTIN_PHADDW,
28199 IX86_BUILTIN_PHADDD,
28200 IX86_BUILTIN_PHADDSW,
28201 IX86_BUILTIN_PHSUBW,
28202 IX86_BUILTIN_PHSUBD,
28203 IX86_BUILTIN_PHSUBSW,
28204 IX86_BUILTIN_PMADDUBSW,
28205 IX86_BUILTIN_PMULHRSW,
28206 IX86_BUILTIN_PSHUFB,
28207 IX86_BUILTIN_PSIGNB,
28208 IX86_BUILTIN_PSIGNW,
28209 IX86_BUILTIN_PSIGND,
28210 IX86_BUILTIN_PALIGNR,
28211 IX86_BUILTIN_PABSB,
28212 IX86_BUILTIN_PABSW,
28213 IX86_BUILTIN_PABSD,
28215 IX86_BUILTIN_PHADDW128,
28216 IX86_BUILTIN_PHADDD128,
28217 IX86_BUILTIN_PHADDSW128,
28218 IX86_BUILTIN_PHSUBW128,
28219 IX86_BUILTIN_PHSUBD128,
28220 IX86_BUILTIN_PHSUBSW128,
28221 IX86_BUILTIN_PMADDUBSW128,
28222 IX86_BUILTIN_PMULHRSW128,
28223 IX86_BUILTIN_PSHUFB128,
28224 IX86_BUILTIN_PSIGNB128,
28225 IX86_BUILTIN_PSIGNW128,
28226 IX86_BUILTIN_PSIGND128,
28227 IX86_BUILTIN_PALIGNR128,
28228 IX86_BUILTIN_PABSB128,
28229 IX86_BUILTIN_PABSW128,
28230 IX86_BUILTIN_PABSD128,
28232 /* AMDFAM10 - SSE4A New Instructions. */
28233 IX86_BUILTIN_MOVNTSD,
28234 IX86_BUILTIN_MOVNTSS,
28235 IX86_BUILTIN_EXTRQI,
28236 IX86_BUILTIN_EXTRQ,
28237 IX86_BUILTIN_INSERTQI,
28238 IX86_BUILTIN_INSERTQ,
28240 /* SSE4.1. */
28241 IX86_BUILTIN_BLENDPD,
28242 IX86_BUILTIN_BLENDPS,
28243 IX86_BUILTIN_BLENDVPD,
28244 IX86_BUILTIN_BLENDVPS,
28245 IX86_BUILTIN_PBLENDVB128,
28246 IX86_BUILTIN_PBLENDW128,
28248 IX86_BUILTIN_DPPD,
28249 IX86_BUILTIN_DPPS,
28251 IX86_BUILTIN_INSERTPS128,
28253 IX86_BUILTIN_MOVNTDQA,
28254 IX86_BUILTIN_MPSADBW128,
28255 IX86_BUILTIN_PACKUSDW128,
28256 IX86_BUILTIN_PCMPEQQ,
28257 IX86_BUILTIN_PHMINPOSUW128,
28259 IX86_BUILTIN_PMAXSB128,
28260 IX86_BUILTIN_PMAXSD128,
28261 IX86_BUILTIN_PMAXUD128,
28262 IX86_BUILTIN_PMAXUW128,
28264 IX86_BUILTIN_PMINSB128,
28265 IX86_BUILTIN_PMINSD128,
28266 IX86_BUILTIN_PMINUD128,
28267 IX86_BUILTIN_PMINUW128,
28269 IX86_BUILTIN_PMOVSXBW128,
28270 IX86_BUILTIN_PMOVSXBD128,
28271 IX86_BUILTIN_PMOVSXBQ128,
28272 IX86_BUILTIN_PMOVSXWD128,
28273 IX86_BUILTIN_PMOVSXWQ128,
28274 IX86_BUILTIN_PMOVSXDQ128,
28276 IX86_BUILTIN_PMOVZXBW128,
28277 IX86_BUILTIN_PMOVZXBD128,
28278 IX86_BUILTIN_PMOVZXBQ128,
28279 IX86_BUILTIN_PMOVZXWD128,
28280 IX86_BUILTIN_PMOVZXWQ128,
28281 IX86_BUILTIN_PMOVZXDQ128,
28283 IX86_BUILTIN_PMULDQ128,
28284 IX86_BUILTIN_PMULLD128,
28286 IX86_BUILTIN_ROUNDSD,
28287 IX86_BUILTIN_ROUNDSS,
28289 IX86_BUILTIN_ROUNDPD,
28290 IX86_BUILTIN_ROUNDPS,
28292 IX86_BUILTIN_FLOORPD,
28293 IX86_BUILTIN_CEILPD,
28294 IX86_BUILTIN_TRUNCPD,
28295 IX86_BUILTIN_RINTPD,
28296 IX86_BUILTIN_ROUNDPD_AZ,
28298 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
28299 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
28300 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
28302 IX86_BUILTIN_FLOORPS,
28303 IX86_BUILTIN_CEILPS,
28304 IX86_BUILTIN_TRUNCPS,
28305 IX86_BUILTIN_RINTPS,
28306 IX86_BUILTIN_ROUNDPS_AZ,
28308 IX86_BUILTIN_FLOORPS_SFIX,
28309 IX86_BUILTIN_CEILPS_SFIX,
28310 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
28312 IX86_BUILTIN_PTESTZ,
28313 IX86_BUILTIN_PTESTC,
28314 IX86_BUILTIN_PTESTNZC,
28316 IX86_BUILTIN_VEC_INIT_V2SI,
28317 IX86_BUILTIN_VEC_INIT_V4HI,
28318 IX86_BUILTIN_VEC_INIT_V8QI,
28319 IX86_BUILTIN_VEC_EXT_V2DF,
28320 IX86_BUILTIN_VEC_EXT_V2DI,
28321 IX86_BUILTIN_VEC_EXT_V4SF,
28322 IX86_BUILTIN_VEC_EXT_V4SI,
28323 IX86_BUILTIN_VEC_EXT_V8HI,
28324 IX86_BUILTIN_VEC_EXT_V2SI,
28325 IX86_BUILTIN_VEC_EXT_V4HI,
28326 IX86_BUILTIN_VEC_EXT_V16QI,
28327 IX86_BUILTIN_VEC_SET_V2DI,
28328 IX86_BUILTIN_VEC_SET_V4SF,
28329 IX86_BUILTIN_VEC_SET_V4SI,
28330 IX86_BUILTIN_VEC_SET_V8HI,
28331 IX86_BUILTIN_VEC_SET_V4HI,
28332 IX86_BUILTIN_VEC_SET_V16QI,
28334 IX86_BUILTIN_VEC_PACK_SFIX,
28335 IX86_BUILTIN_VEC_PACK_SFIX256,
28337 /* SSE4.2. */
28338 IX86_BUILTIN_CRC32QI,
28339 IX86_BUILTIN_CRC32HI,
28340 IX86_BUILTIN_CRC32SI,
28341 IX86_BUILTIN_CRC32DI,
28343 IX86_BUILTIN_PCMPESTRI128,
28344 IX86_BUILTIN_PCMPESTRM128,
28345 IX86_BUILTIN_PCMPESTRA128,
28346 IX86_BUILTIN_PCMPESTRC128,
28347 IX86_BUILTIN_PCMPESTRO128,
28348 IX86_BUILTIN_PCMPESTRS128,
28349 IX86_BUILTIN_PCMPESTRZ128,
28350 IX86_BUILTIN_PCMPISTRI128,
28351 IX86_BUILTIN_PCMPISTRM128,
28352 IX86_BUILTIN_PCMPISTRA128,
28353 IX86_BUILTIN_PCMPISTRC128,
28354 IX86_BUILTIN_PCMPISTRO128,
28355 IX86_BUILTIN_PCMPISTRS128,
28356 IX86_BUILTIN_PCMPISTRZ128,
28358 IX86_BUILTIN_PCMPGTQ,
28360 /* AES instructions */
28361 IX86_BUILTIN_AESENC128,
28362 IX86_BUILTIN_AESENCLAST128,
28363 IX86_BUILTIN_AESDEC128,
28364 IX86_BUILTIN_AESDECLAST128,
28365 IX86_BUILTIN_AESIMC128,
28366 IX86_BUILTIN_AESKEYGENASSIST128,
28368 /* PCLMUL instruction */
28369 IX86_BUILTIN_PCLMULQDQ128,
28371 /* AVX */
28372 IX86_BUILTIN_ADDPD256,
28373 IX86_BUILTIN_ADDPS256,
28374 IX86_BUILTIN_ADDSUBPD256,
28375 IX86_BUILTIN_ADDSUBPS256,
28376 IX86_BUILTIN_ANDPD256,
28377 IX86_BUILTIN_ANDPS256,
28378 IX86_BUILTIN_ANDNPD256,
28379 IX86_BUILTIN_ANDNPS256,
28380 IX86_BUILTIN_BLENDPD256,
28381 IX86_BUILTIN_BLENDPS256,
28382 IX86_BUILTIN_BLENDVPD256,
28383 IX86_BUILTIN_BLENDVPS256,
28384 IX86_BUILTIN_DIVPD256,
28385 IX86_BUILTIN_DIVPS256,
28386 IX86_BUILTIN_DPPS256,
28387 IX86_BUILTIN_HADDPD256,
28388 IX86_BUILTIN_HADDPS256,
28389 IX86_BUILTIN_HSUBPD256,
28390 IX86_BUILTIN_HSUBPS256,
28391 IX86_BUILTIN_MAXPD256,
28392 IX86_BUILTIN_MAXPS256,
28393 IX86_BUILTIN_MINPD256,
28394 IX86_BUILTIN_MINPS256,
28395 IX86_BUILTIN_MULPD256,
28396 IX86_BUILTIN_MULPS256,
28397 IX86_BUILTIN_ORPD256,
28398 IX86_BUILTIN_ORPS256,
28399 IX86_BUILTIN_SHUFPD256,
28400 IX86_BUILTIN_SHUFPS256,
28401 IX86_BUILTIN_SUBPD256,
28402 IX86_BUILTIN_SUBPS256,
28403 IX86_BUILTIN_XORPD256,
28404 IX86_BUILTIN_XORPS256,
28405 IX86_BUILTIN_CMPSD,
28406 IX86_BUILTIN_CMPSS,
28407 IX86_BUILTIN_CMPPD,
28408 IX86_BUILTIN_CMPPS,
28409 IX86_BUILTIN_CMPPD256,
28410 IX86_BUILTIN_CMPPS256,
28411 IX86_BUILTIN_CVTDQ2PD256,
28412 IX86_BUILTIN_CVTDQ2PS256,
28413 IX86_BUILTIN_CVTPD2PS256,
28414 IX86_BUILTIN_CVTPS2DQ256,
28415 IX86_BUILTIN_CVTPS2PD256,
28416 IX86_BUILTIN_CVTTPD2DQ256,
28417 IX86_BUILTIN_CVTPD2DQ256,
28418 IX86_BUILTIN_CVTTPS2DQ256,
28419 IX86_BUILTIN_EXTRACTF128PD256,
28420 IX86_BUILTIN_EXTRACTF128PS256,
28421 IX86_BUILTIN_EXTRACTF128SI256,
28422 IX86_BUILTIN_VZEROALL,
28423 IX86_BUILTIN_VZEROUPPER,
28424 IX86_BUILTIN_VPERMILVARPD,
28425 IX86_BUILTIN_VPERMILVARPS,
28426 IX86_BUILTIN_VPERMILVARPD256,
28427 IX86_BUILTIN_VPERMILVARPS256,
28428 IX86_BUILTIN_VPERMILPD,
28429 IX86_BUILTIN_VPERMILPS,
28430 IX86_BUILTIN_VPERMILPD256,
28431 IX86_BUILTIN_VPERMILPS256,
28432 IX86_BUILTIN_VPERMIL2PD,
28433 IX86_BUILTIN_VPERMIL2PS,
28434 IX86_BUILTIN_VPERMIL2PD256,
28435 IX86_BUILTIN_VPERMIL2PS256,
28436 IX86_BUILTIN_VPERM2F128PD256,
28437 IX86_BUILTIN_VPERM2F128PS256,
28438 IX86_BUILTIN_VPERM2F128SI256,
28439 IX86_BUILTIN_VBROADCASTSS,
28440 IX86_BUILTIN_VBROADCASTSD256,
28441 IX86_BUILTIN_VBROADCASTSS256,
28442 IX86_BUILTIN_VBROADCASTPD256,
28443 IX86_BUILTIN_VBROADCASTPS256,
28444 IX86_BUILTIN_VINSERTF128PD256,
28445 IX86_BUILTIN_VINSERTF128PS256,
28446 IX86_BUILTIN_VINSERTF128SI256,
28447 IX86_BUILTIN_LOADUPD256,
28448 IX86_BUILTIN_LOADUPS256,
28449 IX86_BUILTIN_STOREUPD256,
28450 IX86_BUILTIN_STOREUPS256,
28451 IX86_BUILTIN_LDDQU256,
28452 IX86_BUILTIN_MOVNTDQ256,
28453 IX86_BUILTIN_MOVNTPD256,
28454 IX86_BUILTIN_MOVNTPS256,
28455 IX86_BUILTIN_LOADDQU256,
28456 IX86_BUILTIN_STOREDQU256,
28457 IX86_BUILTIN_MASKLOADPD,
28458 IX86_BUILTIN_MASKLOADPS,
28459 IX86_BUILTIN_MASKSTOREPD,
28460 IX86_BUILTIN_MASKSTOREPS,
28461 IX86_BUILTIN_MASKLOADPD256,
28462 IX86_BUILTIN_MASKLOADPS256,
28463 IX86_BUILTIN_MASKSTOREPD256,
28464 IX86_BUILTIN_MASKSTOREPS256,
28465 IX86_BUILTIN_MOVSHDUP256,
28466 IX86_BUILTIN_MOVSLDUP256,
28467 IX86_BUILTIN_MOVDDUP256,
28469 IX86_BUILTIN_SQRTPD256,
28470 IX86_BUILTIN_SQRTPS256,
28471 IX86_BUILTIN_SQRTPS_NR256,
28472 IX86_BUILTIN_RSQRTPS256,
28473 IX86_BUILTIN_RSQRTPS_NR256,
28475 IX86_BUILTIN_RCPPS256,
28477 IX86_BUILTIN_ROUNDPD256,
28478 IX86_BUILTIN_ROUNDPS256,
28480 IX86_BUILTIN_FLOORPD256,
28481 IX86_BUILTIN_CEILPD256,
28482 IX86_BUILTIN_TRUNCPD256,
28483 IX86_BUILTIN_RINTPD256,
28484 IX86_BUILTIN_ROUNDPD_AZ256,
28486 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28487 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28488 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28490 IX86_BUILTIN_FLOORPS256,
28491 IX86_BUILTIN_CEILPS256,
28492 IX86_BUILTIN_TRUNCPS256,
28493 IX86_BUILTIN_RINTPS256,
28494 IX86_BUILTIN_ROUNDPS_AZ256,
28496 IX86_BUILTIN_FLOORPS_SFIX256,
28497 IX86_BUILTIN_CEILPS_SFIX256,
28498 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28500 IX86_BUILTIN_UNPCKHPD256,
28501 IX86_BUILTIN_UNPCKLPD256,
28502 IX86_BUILTIN_UNPCKHPS256,
28503 IX86_BUILTIN_UNPCKLPS256,
28505 IX86_BUILTIN_SI256_SI,
28506 IX86_BUILTIN_PS256_PS,
28507 IX86_BUILTIN_PD256_PD,
28508 IX86_BUILTIN_SI_SI256,
28509 IX86_BUILTIN_PS_PS256,
28510 IX86_BUILTIN_PD_PD256,
28512 IX86_BUILTIN_VTESTZPD,
28513 IX86_BUILTIN_VTESTCPD,
28514 IX86_BUILTIN_VTESTNZCPD,
28515 IX86_BUILTIN_VTESTZPS,
28516 IX86_BUILTIN_VTESTCPS,
28517 IX86_BUILTIN_VTESTNZCPS,
28518 IX86_BUILTIN_VTESTZPD256,
28519 IX86_BUILTIN_VTESTCPD256,
28520 IX86_BUILTIN_VTESTNZCPD256,
28521 IX86_BUILTIN_VTESTZPS256,
28522 IX86_BUILTIN_VTESTCPS256,
28523 IX86_BUILTIN_VTESTNZCPS256,
28524 IX86_BUILTIN_PTESTZ256,
28525 IX86_BUILTIN_PTESTC256,
28526 IX86_BUILTIN_PTESTNZC256,
28528 IX86_BUILTIN_MOVMSKPD256,
28529 IX86_BUILTIN_MOVMSKPS256,
28531 /* AVX2 */
28532 IX86_BUILTIN_MPSADBW256,
28533 IX86_BUILTIN_PABSB256,
28534 IX86_BUILTIN_PABSW256,
28535 IX86_BUILTIN_PABSD256,
28536 IX86_BUILTIN_PACKSSDW256,
28537 IX86_BUILTIN_PACKSSWB256,
28538 IX86_BUILTIN_PACKUSDW256,
28539 IX86_BUILTIN_PACKUSWB256,
28540 IX86_BUILTIN_PADDB256,
28541 IX86_BUILTIN_PADDW256,
28542 IX86_BUILTIN_PADDD256,
28543 IX86_BUILTIN_PADDQ256,
28544 IX86_BUILTIN_PADDSB256,
28545 IX86_BUILTIN_PADDSW256,
28546 IX86_BUILTIN_PADDUSB256,
28547 IX86_BUILTIN_PADDUSW256,
28548 IX86_BUILTIN_PALIGNR256,
28549 IX86_BUILTIN_AND256I,
28550 IX86_BUILTIN_ANDNOT256I,
28551 IX86_BUILTIN_PAVGB256,
28552 IX86_BUILTIN_PAVGW256,
28553 IX86_BUILTIN_PBLENDVB256,
28554 IX86_BUILTIN_PBLENDVW256,
28555 IX86_BUILTIN_PCMPEQB256,
28556 IX86_BUILTIN_PCMPEQW256,
28557 IX86_BUILTIN_PCMPEQD256,
28558 IX86_BUILTIN_PCMPEQQ256,
28559 IX86_BUILTIN_PCMPGTB256,
28560 IX86_BUILTIN_PCMPGTW256,
28561 IX86_BUILTIN_PCMPGTD256,
28562 IX86_BUILTIN_PCMPGTQ256,
28563 IX86_BUILTIN_PHADDW256,
28564 IX86_BUILTIN_PHADDD256,
28565 IX86_BUILTIN_PHADDSW256,
28566 IX86_BUILTIN_PHSUBW256,
28567 IX86_BUILTIN_PHSUBD256,
28568 IX86_BUILTIN_PHSUBSW256,
28569 IX86_BUILTIN_PMADDUBSW256,
28570 IX86_BUILTIN_PMADDWD256,
28571 IX86_BUILTIN_PMAXSB256,
28572 IX86_BUILTIN_PMAXSW256,
28573 IX86_BUILTIN_PMAXSD256,
28574 IX86_BUILTIN_PMAXUB256,
28575 IX86_BUILTIN_PMAXUW256,
28576 IX86_BUILTIN_PMAXUD256,
28577 IX86_BUILTIN_PMINSB256,
28578 IX86_BUILTIN_PMINSW256,
28579 IX86_BUILTIN_PMINSD256,
28580 IX86_BUILTIN_PMINUB256,
28581 IX86_BUILTIN_PMINUW256,
28582 IX86_BUILTIN_PMINUD256,
28583 IX86_BUILTIN_PMOVMSKB256,
28584 IX86_BUILTIN_PMOVSXBW256,
28585 IX86_BUILTIN_PMOVSXBD256,
28586 IX86_BUILTIN_PMOVSXBQ256,
28587 IX86_BUILTIN_PMOVSXWD256,
28588 IX86_BUILTIN_PMOVSXWQ256,
28589 IX86_BUILTIN_PMOVSXDQ256,
28590 IX86_BUILTIN_PMOVZXBW256,
28591 IX86_BUILTIN_PMOVZXBD256,
28592 IX86_BUILTIN_PMOVZXBQ256,
28593 IX86_BUILTIN_PMOVZXWD256,
28594 IX86_BUILTIN_PMOVZXWQ256,
28595 IX86_BUILTIN_PMOVZXDQ256,
28596 IX86_BUILTIN_PMULDQ256,
28597 IX86_BUILTIN_PMULHRSW256,
28598 IX86_BUILTIN_PMULHUW256,
28599 IX86_BUILTIN_PMULHW256,
28600 IX86_BUILTIN_PMULLW256,
28601 IX86_BUILTIN_PMULLD256,
28602 IX86_BUILTIN_PMULUDQ256,
28603 IX86_BUILTIN_POR256,
28604 IX86_BUILTIN_PSADBW256,
28605 IX86_BUILTIN_PSHUFB256,
28606 IX86_BUILTIN_PSHUFD256,
28607 IX86_BUILTIN_PSHUFHW256,
28608 IX86_BUILTIN_PSHUFLW256,
28609 IX86_BUILTIN_PSIGNB256,
28610 IX86_BUILTIN_PSIGNW256,
28611 IX86_BUILTIN_PSIGND256,
28612 IX86_BUILTIN_PSLLDQI256,
28613 IX86_BUILTIN_PSLLWI256,
28614 IX86_BUILTIN_PSLLW256,
28615 IX86_BUILTIN_PSLLDI256,
28616 IX86_BUILTIN_PSLLD256,
28617 IX86_BUILTIN_PSLLQI256,
28618 IX86_BUILTIN_PSLLQ256,
28619 IX86_BUILTIN_PSRAWI256,
28620 IX86_BUILTIN_PSRAW256,
28621 IX86_BUILTIN_PSRADI256,
28622 IX86_BUILTIN_PSRAD256,
28623 IX86_BUILTIN_PSRLDQI256,
28624 IX86_BUILTIN_PSRLWI256,
28625 IX86_BUILTIN_PSRLW256,
28626 IX86_BUILTIN_PSRLDI256,
28627 IX86_BUILTIN_PSRLD256,
28628 IX86_BUILTIN_PSRLQI256,
28629 IX86_BUILTIN_PSRLQ256,
28630 IX86_BUILTIN_PSUBB256,
28631 IX86_BUILTIN_PSUBW256,
28632 IX86_BUILTIN_PSUBD256,
28633 IX86_BUILTIN_PSUBQ256,
28634 IX86_BUILTIN_PSUBSB256,
28635 IX86_BUILTIN_PSUBSW256,
28636 IX86_BUILTIN_PSUBUSB256,
28637 IX86_BUILTIN_PSUBUSW256,
28638 IX86_BUILTIN_PUNPCKHBW256,
28639 IX86_BUILTIN_PUNPCKHWD256,
28640 IX86_BUILTIN_PUNPCKHDQ256,
28641 IX86_BUILTIN_PUNPCKHQDQ256,
28642 IX86_BUILTIN_PUNPCKLBW256,
28643 IX86_BUILTIN_PUNPCKLWD256,
28644 IX86_BUILTIN_PUNPCKLDQ256,
28645 IX86_BUILTIN_PUNPCKLQDQ256,
28646 IX86_BUILTIN_PXOR256,
28647 IX86_BUILTIN_MOVNTDQA256,
28648 IX86_BUILTIN_VBROADCASTSS_PS,
28649 IX86_BUILTIN_VBROADCASTSS_PS256,
28650 IX86_BUILTIN_VBROADCASTSD_PD256,
28651 IX86_BUILTIN_VBROADCASTSI256,
28652 IX86_BUILTIN_PBLENDD256,
28653 IX86_BUILTIN_PBLENDD128,
28654 IX86_BUILTIN_PBROADCASTB256,
28655 IX86_BUILTIN_PBROADCASTW256,
28656 IX86_BUILTIN_PBROADCASTD256,
28657 IX86_BUILTIN_PBROADCASTQ256,
28658 IX86_BUILTIN_PBROADCASTB128,
28659 IX86_BUILTIN_PBROADCASTW128,
28660 IX86_BUILTIN_PBROADCASTD128,
28661 IX86_BUILTIN_PBROADCASTQ128,
28662 IX86_BUILTIN_VPERMVARSI256,
28663 IX86_BUILTIN_VPERMDF256,
28664 IX86_BUILTIN_VPERMVARSF256,
28665 IX86_BUILTIN_VPERMDI256,
28666 IX86_BUILTIN_VPERMTI256,
28667 IX86_BUILTIN_VEXTRACT128I256,
28668 IX86_BUILTIN_VINSERT128I256,
28669 IX86_BUILTIN_MASKLOADD,
28670 IX86_BUILTIN_MASKLOADQ,
28671 IX86_BUILTIN_MASKLOADD256,
28672 IX86_BUILTIN_MASKLOADQ256,
28673 IX86_BUILTIN_MASKSTORED,
28674 IX86_BUILTIN_MASKSTOREQ,
28675 IX86_BUILTIN_MASKSTORED256,
28676 IX86_BUILTIN_MASKSTOREQ256,
28677 IX86_BUILTIN_PSLLVV4DI,
28678 IX86_BUILTIN_PSLLVV2DI,
28679 IX86_BUILTIN_PSLLVV8SI,
28680 IX86_BUILTIN_PSLLVV4SI,
28681 IX86_BUILTIN_PSRAVV8SI,
28682 IX86_BUILTIN_PSRAVV4SI,
28683 IX86_BUILTIN_PSRLVV4DI,
28684 IX86_BUILTIN_PSRLVV2DI,
28685 IX86_BUILTIN_PSRLVV8SI,
28686 IX86_BUILTIN_PSRLVV4SI,
28688 IX86_BUILTIN_GATHERSIV2DF,
28689 IX86_BUILTIN_GATHERSIV4DF,
28690 IX86_BUILTIN_GATHERDIV2DF,
28691 IX86_BUILTIN_GATHERDIV4DF,
28692 IX86_BUILTIN_GATHERSIV4SF,
28693 IX86_BUILTIN_GATHERSIV8SF,
28694 IX86_BUILTIN_GATHERDIV4SF,
28695 IX86_BUILTIN_GATHERDIV8SF,
28696 IX86_BUILTIN_GATHERSIV2DI,
28697 IX86_BUILTIN_GATHERSIV4DI,
28698 IX86_BUILTIN_GATHERDIV2DI,
28699 IX86_BUILTIN_GATHERDIV4DI,
28700 IX86_BUILTIN_GATHERSIV4SI,
28701 IX86_BUILTIN_GATHERSIV8SI,
28702 IX86_BUILTIN_GATHERDIV4SI,
28703 IX86_BUILTIN_GATHERDIV8SI,
28705 /* AVX512F */
28706 IX86_BUILTIN_SI512_SI256,
28707 IX86_BUILTIN_PD512_PD256,
28708 IX86_BUILTIN_PS512_PS256,
28709 IX86_BUILTIN_SI512_SI,
28710 IX86_BUILTIN_PD512_PD,
28711 IX86_BUILTIN_PS512_PS,
28712 IX86_BUILTIN_ADDPD512,
28713 IX86_BUILTIN_ADDPS512,
28714 IX86_BUILTIN_ADDSD_ROUND,
28715 IX86_BUILTIN_ADDSS_ROUND,
28716 IX86_BUILTIN_ALIGND512,
28717 IX86_BUILTIN_ALIGNQ512,
28718 IX86_BUILTIN_BLENDMD512,
28719 IX86_BUILTIN_BLENDMPD512,
28720 IX86_BUILTIN_BLENDMPS512,
28721 IX86_BUILTIN_BLENDMQ512,
28722 IX86_BUILTIN_BROADCASTF32X4_512,
28723 IX86_BUILTIN_BROADCASTF64X4_512,
28724 IX86_BUILTIN_BROADCASTI32X4_512,
28725 IX86_BUILTIN_BROADCASTI64X4_512,
28726 IX86_BUILTIN_BROADCASTSD512,
28727 IX86_BUILTIN_BROADCASTSS512,
28728 IX86_BUILTIN_CMPD512,
28729 IX86_BUILTIN_CMPPD512,
28730 IX86_BUILTIN_CMPPS512,
28731 IX86_BUILTIN_CMPQ512,
28732 IX86_BUILTIN_CMPSD_MASK,
28733 IX86_BUILTIN_CMPSS_MASK,
28734 IX86_BUILTIN_COMIDF,
28735 IX86_BUILTIN_COMISF,
28736 IX86_BUILTIN_COMPRESSPD512,
28737 IX86_BUILTIN_COMPRESSPDSTORE512,
28738 IX86_BUILTIN_COMPRESSPS512,
28739 IX86_BUILTIN_COMPRESSPSSTORE512,
28740 IX86_BUILTIN_CVTDQ2PD512,
28741 IX86_BUILTIN_CVTDQ2PS512,
28742 IX86_BUILTIN_CVTPD2DQ512,
28743 IX86_BUILTIN_CVTPD2PS512,
28744 IX86_BUILTIN_CVTPD2UDQ512,
28745 IX86_BUILTIN_CVTPH2PS512,
28746 IX86_BUILTIN_CVTPS2DQ512,
28747 IX86_BUILTIN_CVTPS2PD512,
28748 IX86_BUILTIN_CVTPS2PH512,
28749 IX86_BUILTIN_CVTPS2UDQ512,
28750 IX86_BUILTIN_CVTSD2SS_ROUND,
28751 IX86_BUILTIN_CVTSI2SD64,
28752 IX86_BUILTIN_CVTSI2SS32,
28753 IX86_BUILTIN_CVTSI2SS64,
28754 IX86_BUILTIN_CVTSS2SD_ROUND,
28755 IX86_BUILTIN_CVTTPD2DQ512,
28756 IX86_BUILTIN_CVTTPD2UDQ512,
28757 IX86_BUILTIN_CVTTPS2DQ512,
28758 IX86_BUILTIN_CVTTPS2UDQ512,
28759 IX86_BUILTIN_CVTUDQ2PD512,
28760 IX86_BUILTIN_CVTUDQ2PS512,
28761 IX86_BUILTIN_CVTUSI2SD32,
28762 IX86_BUILTIN_CVTUSI2SD64,
28763 IX86_BUILTIN_CVTUSI2SS32,
28764 IX86_BUILTIN_CVTUSI2SS64,
28765 IX86_BUILTIN_DIVPD512,
28766 IX86_BUILTIN_DIVPS512,
28767 IX86_BUILTIN_DIVSD_ROUND,
28768 IX86_BUILTIN_DIVSS_ROUND,
28769 IX86_BUILTIN_EXPANDPD512,
28770 IX86_BUILTIN_EXPANDPD512Z,
28771 IX86_BUILTIN_EXPANDPDLOAD512,
28772 IX86_BUILTIN_EXPANDPDLOAD512Z,
28773 IX86_BUILTIN_EXPANDPS512,
28774 IX86_BUILTIN_EXPANDPS512Z,
28775 IX86_BUILTIN_EXPANDPSLOAD512,
28776 IX86_BUILTIN_EXPANDPSLOAD512Z,
28777 IX86_BUILTIN_EXTRACTF32X4,
28778 IX86_BUILTIN_EXTRACTF64X4,
28779 IX86_BUILTIN_EXTRACTI32X4,
28780 IX86_BUILTIN_EXTRACTI64X4,
28781 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28782 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28783 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28784 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28785 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28786 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28787 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28788 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28789 IX86_BUILTIN_GETEXPPD512,
28790 IX86_BUILTIN_GETEXPPS512,
28791 IX86_BUILTIN_GETEXPSD128,
28792 IX86_BUILTIN_GETEXPSS128,
28793 IX86_BUILTIN_GETMANTPD512,
28794 IX86_BUILTIN_GETMANTPS512,
28795 IX86_BUILTIN_GETMANTSD128,
28796 IX86_BUILTIN_GETMANTSS128,
28797 IX86_BUILTIN_INSERTF32X4,
28798 IX86_BUILTIN_INSERTF64X4,
28799 IX86_BUILTIN_INSERTI32X4,
28800 IX86_BUILTIN_INSERTI64X4,
28801 IX86_BUILTIN_LOADAPD512,
28802 IX86_BUILTIN_LOADAPS512,
28803 IX86_BUILTIN_LOADDQUDI512,
28804 IX86_BUILTIN_LOADDQUSI512,
28805 IX86_BUILTIN_LOADUPD512,
28806 IX86_BUILTIN_LOADUPS512,
28807 IX86_BUILTIN_MAXPD512,
28808 IX86_BUILTIN_MAXPS512,
28809 IX86_BUILTIN_MAXSD_ROUND,
28810 IX86_BUILTIN_MAXSS_ROUND,
28811 IX86_BUILTIN_MINPD512,
28812 IX86_BUILTIN_MINPS512,
28813 IX86_BUILTIN_MINSD_ROUND,
28814 IX86_BUILTIN_MINSS_ROUND,
28815 IX86_BUILTIN_MOVAPD512,
28816 IX86_BUILTIN_MOVAPS512,
28817 IX86_BUILTIN_MOVDDUP512,
28818 IX86_BUILTIN_MOVDQA32LOAD512,
28819 IX86_BUILTIN_MOVDQA32STORE512,
28820 IX86_BUILTIN_MOVDQA32_512,
28821 IX86_BUILTIN_MOVDQA64LOAD512,
28822 IX86_BUILTIN_MOVDQA64STORE512,
28823 IX86_BUILTIN_MOVDQA64_512,
28824 IX86_BUILTIN_MOVNTDQ512,
28825 IX86_BUILTIN_MOVNTDQA512,
28826 IX86_BUILTIN_MOVNTPD512,
28827 IX86_BUILTIN_MOVNTPS512,
28828 IX86_BUILTIN_MOVSHDUP512,
28829 IX86_BUILTIN_MOVSLDUP512,
28830 IX86_BUILTIN_MULPD512,
28831 IX86_BUILTIN_MULPS512,
28832 IX86_BUILTIN_MULSD_ROUND,
28833 IX86_BUILTIN_MULSS_ROUND,
28834 IX86_BUILTIN_PABSD512,
28835 IX86_BUILTIN_PABSQ512,
28836 IX86_BUILTIN_PADDD512,
28837 IX86_BUILTIN_PADDQ512,
28838 IX86_BUILTIN_PANDD512,
28839 IX86_BUILTIN_PANDND512,
28840 IX86_BUILTIN_PANDNQ512,
28841 IX86_BUILTIN_PANDQ512,
28842 IX86_BUILTIN_PBROADCASTD512,
28843 IX86_BUILTIN_PBROADCASTD512_GPR,
28844 IX86_BUILTIN_PBROADCASTMB512,
28845 IX86_BUILTIN_PBROADCASTMW512,
28846 IX86_BUILTIN_PBROADCASTQ512,
28847 IX86_BUILTIN_PBROADCASTQ512_GPR,
28848 IX86_BUILTIN_PCMPEQD512_MASK,
28849 IX86_BUILTIN_PCMPEQQ512_MASK,
28850 IX86_BUILTIN_PCMPGTD512_MASK,
28851 IX86_BUILTIN_PCMPGTQ512_MASK,
28852 IX86_BUILTIN_PCOMPRESSD512,
28853 IX86_BUILTIN_PCOMPRESSDSTORE512,
28854 IX86_BUILTIN_PCOMPRESSQ512,
28855 IX86_BUILTIN_PCOMPRESSQSTORE512,
28856 IX86_BUILTIN_PEXPANDD512,
28857 IX86_BUILTIN_PEXPANDD512Z,
28858 IX86_BUILTIN_PEXPANDDLOAD512,
28859 IX86_BUILTIN_PEXPANDDLOAD512Z,
28860 IX86_BUILTIN_PEXPANDQ512,
28861 IX86_BUILTIN_PEXPANDQ512Z,
28862 IX86_BUILTIN_PEXPANDQLOAD512,
28863 IX86_BUILTIN_PEXPANDQLOAD512Z,
28864 IX86_BUILTIN_PMAXSD512,
28865 IX86_BUILTIN_PMAXSQ512,
28866 IX86_BUILTIN_PMAXUD512,
28867 IX86_BUILTIN_PMAXUQ512,
28868 IX86_BUILTIN_PMINSD512,
28869 IX86_BUILTIN_PMINSQ512,
28870 IX86_BUILTIN_PMINUD512,
28871 IX86_BUILTIN_PMINUQ512,
28872 IX86_BUILTIN_PMOVDB512,
28873 IX86_BUILTIN_PMOVDB512_MEM,
28874 IX86_BUILTIN_PMOVDW512,
28875 IX86_BUILTIN_PMOVDW512_MEM,
28876 IX86_BUILTIN_PMOVQB512,
28877 IX86_BUILTIN_PMOVQB512_MEM,
28878 IX86_BUILTIN_PMOVQD512,
28879 IX86_BUILTIN_PMOVQD512_MEM,
28880 IX86_BUILTIN_PMOVQW512,
28881 IX86_BUILTIN_PMOVQW512_MEM,
28882 IX86_BUILTIN_PMOVSDB512,
28883 IX86_BUILTIN_PMOVSDB512_MEM,
28884 IX86_BUILTIN_PMOVSDW512,
28885 IX86_BUILTIN_PMOVSDW512_MEM,
28886 IX86_BUILTIN_PMOVSQB512,
28887 IX86_BUILTIN_PMOVSQB512_MEM,
28888 IX86_BUILTIN_PMOVSQD512,
28889 IX86_BUILTIN_PMOVSQD512_MEM,
28890 IX86_BUILTIN_PMOVSQW512,
28891 IX86_BUILTIN_PMOVSQW512_MEM,
28892 IX86_BUILTIN_PMOVSXBD512,
28893 IX86_BUILTIN_PMOVSXBQ512,
28894 IX86_BUILTIN_PMOVSXDQ512,
28895 IX86_BUILTIN_PMOVSXWD512,
28896 IX86_BUILTIN_PMOVSXWQ512,
28897 IX86_BUILTIN_PMOVUSDB512,
28898 IX86_BUILTIN_PMOVUSDB512_MEM,
28899 IX86_BUILTIN_PMOVUSDW512,
28900 IX86_BUILTIN_PMOVUSDW512_MEM,
28901 IX86_BUILTIN_PMOVUSQB512,
28902 IX86_BUILTIN_PMOVUSQB512_MEM,
28903 IX86_BUILTIN_PMOVUSQD512,
28904 IX86_BUILTIN_PMOVUSQD512_MEM,
28905 IX86_BUILTIN_PMOVUSQW512,
28906 IX86_BUILTIN_PMOVUSQW512_MEM,
28907 IX86_BUILTIN_PMOVZXBD512,
28908 IX86_BUILTIN_PMOVZXBQ512,
28909 IX86_BUILTIN_PMOVZXDQ512,
28910 IX86_BUILTIN_PMOVZXWD512,
28911 IX86_BUILTIN_PMOVZXWQ512,
28912 IX86_BUILTIN_PMULDQ512,
28913 IX86_BUILTIN_PMULLD512,
28914 IX86_BUILTIN_PMULUDQ512,
28915 IX86_BUILTIN_PORD512,
28916 IX86_BUILTIN_PORQ512,
28917 IX86_BUILTIN_PROLD512,
28918 IX86_BUILTIN_PROLQ512,
28919 IX86_BUILTIN_PROLVD512,
28920 IX86_BUILTIN_PROLVQ512,
28921 IX86_BUILTIN_PRORD512,
28922 IX86_BUILTIN_PRORQ512,
28923 IX86_BUILTIN_PRORVD512,
28924 IX86_BUILTIN_PRORVQ512,
28925 IX86_BUILTIN_PSHUFD512,
28926 IX86_BUILTIN_PSLLD512,
28927 IX86_BUILTIN_PSLLDI512,
28928 IX86_BUILTIN_PSLLQ512,
28929 IX86_BUILTIN_PSLLQI512,
28930 IX86_BUILTIN_PSLLVV16SI,
28931 IX86_BUILTIN_PSLLVV8DI,
28932 IX86_BUILTIN_PSRAD512,
28933 IX86_BUILTIN_PSRADI512,
28934 IX86_BUILTIN_PSRAQ512,
28935 IX86_BUILTIN_PSRAQI512,
28936 IX86_BUILTIN_PSRAVV16SI,
28937 IX86_BUILTIN_PSRAVV8DI,
28938 IX86_BUILTIN_PSRLD512,
28939 IX86_BUILTIN_PSRLDI512,
28940 IX86_BUILTIN_PSRLQ512,
28941 IX86_BUILTIN_PSRLQI512,
28942 IX86_BUILTIN_PSRLVV16SI,
28943 IX86_BUILTIN_PSRLVV8DI,
28944 IX86_BUILTIN_PSUBD512,
28945 IX86_BUILTIN_PSUBQ512,
28946 IX86_BUILTIN_PTESTMD512,
28947 IX86_BUILTIN_PTESTMQ512,
28948 IX86_BUILTIN_PTESTNMD512,
28949 IX86_BUILTIN_PTESTNMQ512,
28950 IX86_BUILTIN_PUNPCKHDQ512,
28951 IX86_BUILTIN_PUNPCKHQDQ512,
28952 IX86_BUILTIN_PUNPCKLDQ512,
28953 IX86_BUILTIN_PUNPCKLQDQ512,
28954 IX86_BUILTIN_PXORD512,
28955 IX86_BUILTIN_PXORQ512,
28956 IX86_BUILTIN_RCP14PD512,
28957 IX86_BUILTIN_RCP14PS512,
28958 IX86_BUILTIN_RCP14SD,
28959 IX86_BUILTIN_RCP14SS,
28960 IX86_BUILTIN_RNDSCALEPD,
28961 IX86_BUILTIN_RNDSCALEPS,
28962 IX86_BUILTIN_RNDSCALESD,
28963 IX86_BUILTIN_RNDSCALESS,
28964 IX86_BUILTIN_RSQRT14PD512,
28965 IX86_BUILTIN_RSQRT14PS512,
28966 IX86_BUILTIN_RSQRT14SD,
28967 IX86_BUILTIN_RSQRT14SS,
28968 IX86_BUILTIN_SCALEFPD512,
28969 IX86_BUILTIN_SCALEFPS512,
28970 IX86_BUILTIN_SCALEFSD,
28971 IX86_BUILTIN_SCALEFSS,
28972 IX86_BUILTIN_SHUFPD512,
28973 IX86_BUILTIN_SHUFPS512,
28974 IX86_BUILTIN_SHUF_F32x4,
28975 IX86_BUILTIN_SHUF_F64x2,
28976 IX86_BUILTIN_SHUF_I32x4,
28977 IX86_BUILTIN_SHUF_I64x2,
28978 IX86_BUILTIN_SQRTPD512,
28979 IX86_BUILTIN_SQRTPD512_MASK,
28980 IX86_BUILTIN_SQRTPS512_MASK,
28981 IX86_BUILTIN_SQRTPS_NR512,
28982 IX86_BUILTIN_SQRTSD_ROUND,
28983 IX86_BUILTIN_SQRTSS_ROUND,
28984 IX86_BUILTIN_STOREAPD512,
28985 IX86_BUILTIN_STOREAPS512,
28986 IX86_BUILTIN_STOREDQUDI512,
28987 IX86_BUILTIN_STOREDQUSI512,
28988 IX86_BUILTIN_STOREUPD512,
28989 IX86_BUILTIN_STOREUPS512,
28990 IX86_BUILTIN_SUBPD512,
28991 IX86_BUILTIN_SUBPS512,
28992 IX86_BUILTIN_SUBSD_ROUND,
28993 IX86_BUILTIN_SUBSS_ROUND,
28994 IX86_BUILTIN_UCMPD512,
28995 IX86_BUILTIN_UCMPQ512,
28996 IX86_BUILTIN_UNPCKHPD512,
28997 IX86_BUILTIN_UNPCKHPS512,
28998 IX86_BUILTIN_UNPCKLPD512,
28999 IX86_BUILTIN_UNPCKLPS512,
29000 IX86_BUILTIN_VCVTSD2SI32,
29001 IX86_BUILTIN_VCVTSD2SI64,
29002 IX86_BUILTIN_VCVTSD2USI32,
29003 IX86_BUILTIN_VCVTSD2USI64,
29004 IX86_BUILTIN_VCVTSS2SI32,
29005 IX86_BUILTIN_VCVTSS2SI64,
29006 IX86_BUILTIN_VCVTSS2USI32,
29007 IX86_BUILTIN_VCVTSS2USI64,
29008 IX86_BUILTIN_VCVTTSD2SI32,
29009 IX86_BUILTIN_VCVTTSD2SI64,
29010 IX86_BUILTIN_VCVTTSD2USI32,
29011 IX86_BUILTIN_VCVTTSD2USI64,
29012 IX86_BUILTIN_VCVTTSS2SI32,
29013 IX86_BUILTIN_VCVTTSS2SI64,
29014 IX86_BUILTIN_VCVTTSS2USI32,
29015 IX86_BUILTIN_VCVTTSS2USI64,
29016 IX86_BUILTIN_VFMADDPD512_MASK,
29017 IX86_BUILTIN_VFMADDPD512_MASK3,
29018 IX86_BUILTIN_VFMADDPD512_MASKZ,
29019 IX86_BUILTIN_VFMADDPS512_MASK,
29020 IX86_BUILTIN_VFMADDPS512_MASK3,
29021 IX86_BUILTIN_VFMADDPS512_MASKZ,
29022 IX86_BUILTIN_VFMADDSD3_ROUND,
29023 IX86_BUILTIN_VFMADDSS3_ROUND,
29024 IX86_BUILTIN_VFMADDSUBPD512_MASK,
29025 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
29026 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
29027 IX86_BUILTIN_VFMADDSUBPS512_MASK,
29028 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
29029 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
29030 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
29031 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
29032 IX86_BUILTIN_VFMSUBPD512_MASK3,
29033 IX86_BUILTIN_VFMSUBPS512_MASK3,
29034 IX86_BUILTIN_VFMSUBSD3_MASK3,
29035 IX86_BUILTIN_VFMSUBSS3_MASK3,
29036 IX86_BUILTIN_VFNMADDPD512_MASK,
29037 IX86_BUILTIN_VFNMADDPS512_MASK,
29038 IX86_BUILTIN_VFNMSUBPD512_MASK,
29039 IX86_BUILTIN_VFNMSUBPD512_MASK3,
29040 IX86_BUILTIN_VFNMSUBPS512_MASK,
29041 IX86_BUILTIN_VFNMSUBPS512_MASK3,
29042 IX86_BUILTIN_VPCLZCNTD512,
29043 IX86_BUILTIN_VPCLZCNTQ512,
29044 IX86_BUILTIN_VPCONFLICTD512,
29045 IX86_BUILTIN_VPCONFLICTQ512,
29046 IX86_BUILTIN_VPERMDF512,
29047 IX86_BUILTIN_VPERMDI512,
29048 IX86_BUILTIN_VPERMI2VARD512,
29049 IX86_BUILTIN_VPERMI2VARPD512,
29050 IX86_BUILTIN_VPERMI2VARPS512,
29051 IX86_BUILTIN_VPERMI2VARQ512,
29052 IX86_BUILTIN_VPERMILPD512,
29053 IX86_BUILTIN_VPERMILPS512,
29054 IX86_BUILTIN_VPERMILVARPD512,
29055 IX86_BUILTIN_VPERMILVARPS512,
29056 IX86_BUILTIN_VPERMT2VARD512,
29057 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
29058 IX86_BUILTIN_VPERMT2VARPD512,
29059 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
29060 IX86_BUILTIN_VPERMT2VARPS512,
29061 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
29062 IX86_BUILTIN_VPERMT2VARQ512,
29063 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
29064 IX86_BUILTIN_VPERMVARDF512,
29065 IX86_BUILTIN_VPERMVARDI512,
29066 IX86_BUILTIN_VPERMVARSF512,
29067 IX86_BUILTIN_VPERMVARSI512,
29068 IX86_BUILTIN_VTERNLOGD512_MASK,
29069 IX86_BUILTIN_VTERNLOGD512_MASKZ,
29070 IX86_BUILTIN_VTERNLOGQ512_MASK,
29071 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
29073 /* Mask arithmetic operations */
29074 IX86_BUILTIN_KAND16,
29075 IX86_BUILTIN_KANDN16,
29076 IX86_BUILTIN_KNOT16,
29077 IX86_BUILTIN_KOR16,
29078 IX86_BUILTIN_KORTESTC16,
29079 IX86_BUILTIN_KORTESTZ16,
29080 IX86_BUILTIN_KUNPCKBW,
29081 IX86_BUILTIN_KXNOR16,
29082 IX86_BUILTIN_KXOR16,
29083 IX86_BUILTIN_KMOV16,
29085 /* AVX512VL. */
29086 IX86_BUILTIN_PMOVUSQD256_MEM,
29087 IX86_BUILTIN_PMOVUSQD128_MEM,
29088 IX86_BUILTIN_PMOVSQD256_MEM,
29089 IX86_BUILTIN_PMOVSQD128_MEM,
29090 IX86_BUILTIN_PMOVQD256_MEM,
29091 IX86_BUILTIN_PMOVQD128_MEM,
29092 IX86_BUILTIN_PMOVUSQW256_MEM,
29093 IX86_BUILTIN_PMOVUSQW128_MEM,
29094 IX86_BUILTIN_PMOVSQW256_MEM,
29095 IX86_BUILTIN_PMOVSQW128_MEM,
29096 IX86_BUILTIN_PMOVQW256_MEM,
29097 IX86_BUILTIN_PMOVQW128_MEM,
29098 IX86_BUILTIN_PMOVUSQB256_MEM,
29099 IX86_BUILTIN_PMOVUSQB128_MEM,
29100 IX86_BUILTIN_PMOVSQB256_MEM,
29101 IX86_BUILTIN_PMOVSQB128_MEM,
29102 IX86_BUILTIN_PMOVQB256_MEM,
29103 IX86_BUILTIN_PMOVQB128_MEM,
29104 IX86_BUILTIN_PMOVUSDW256_MEM,
29105 IX86_BUILTIN_PMOVUSDW128_MEM,
29106 IX86_BUILTIN_PMOVSDW256_MEM,
29107 IX86_BUILTIN_PMOVSDW128_MEM,
29108 IX86_BUILTIN_PMOVDW256_MEM,
29109 IX86_BUILTIN_PMOVDW128_MEM,
29110 IX86_BUILTIN_PMOVUSDB256_MEM,
29111 IX86_BUILTIN_PMOVUSDB128_MEM,
29112 IX86_BUILTIN_PMOVSDB256_MEM,
29113 IX86_BUILTIN_PMOVSDB128_MEM,
29114 IX86_BUILTIN_PMOVDB256_MEM,
29115 IX86_BUILTIN_PMOVDB128_MEM,
29116 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
29117 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
29118 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
29119 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
29120 IX86_BUILTIN_MOVDQA64STORE256_MASK,
29121 IX86_BUILTIN_MOVDQA64STORE128_MASK,
29122 IX86_BUILTIN_MOVDQA32STORE256_MASK,
29123 IX86_BUILTIN_MOVDQA32STORE128_MASK,
29124 IX86_BUILTIN_LOADAPD256_MASK,
29125 IX86_BUILTIN_LOADAPD128_MASK,
29126 IX86_BUILTIN_LOADAPS256_MASK,
29127 IX86_BUILTIN_LOADAPS128_MASK,
29128 IX86_BUILTIN_STOREAPD256_MASK,
29129 IX86_BUILTIN_STOREAPD128_MASK,
29130 IX86_BUILTIN_STOREAPS256_MASK,
29131 IX86_BUILTIN_STOREAPS128_MASK,
29132 IX86_BUILTIN_LOADUPD256_MASK,
29133 IX86_BUILTIN_LOADUPD128_MASK,
29134 IX86_BUILTIN_LOADUPS256_MASK,
29135 IX86_BUILTIN_LOADUPS128_MASK,
29136 IX86_BUILTIN_STOREUPD256_MASK,
29137 IX86_BUILTIN_STOREUPD128_MASK,
29138 IX86_BUILTIN_STOREUPS256_MASK,
29139 IX86_BUILTIN_STOREUPS128_MASK,
29140 IX86_BUILTIN_LOADDQUDI256_MASK,
29141 IX86_BUILTIN_LOADDQUDI128_MASK,
29142 IX86_BUILTIN_LOADDQUSI256_MASK,
29143 IX86_BUILTIN_LOADDQUSI128_MASK,
29144 IX86_BUILTIN_LOADDQUHI256_MASK,
29145 IX86_BUILTIN_LOADDQUHI128_MASK,
29146 IX86_BUILTIN_LOADDQUQI256_MASK,
29147 IX86_BUILTIN_LOADDQUQI128_MASK,
29148 IX86_BUILTIN_STOREDQUDI256_MASK,
29149 IX86_BUILTIN_STOREDQUDI128_MASK,
29150 IX86_BUILTIN_STOREDQUSI256_MASK,
29151 IX86_BUILTIN_STOREDQUSI128_MASK,
29152 IX86_BUILTIN_STOREDQUHI256_MASK,
29153 IX86_BUILTIN_STOREDQUHI128_MASK,
29154 IX86_BUILTIN_STOREDQUQI256_MASK,
29155 IX86_BUILTIN_STOREDQUQI128_MASK,
29156 IX86_BUILTIN_COMPRESSPDSTORE256,
29157 IX86_BUILTIN_COMPRESSPDSTORE128,
29158 IX86_BUILTIN_COMPRESSPSSTORE256,
29159 IX86_BUILTIN_COMPRESSPSSTORE128,
29160 IX86_BUILTIN_PCOMPRESSQSTORE256,
29161 IX86_BUILTIN_PCOMPRESSQSTORE128,
29162 IX86_BUILTIN_PCOMPRESSDSTORE256,
29163 IX86_BUILTIN_PCOMPRESSDSTORE128,
29164 IX86_BUILTIN_EXPANDPDLOAD256,
29165 IX86_BUILTIN_EXPANDPDLOAD128,
29166 IX86_BUILTIN_EXPANDPSLOAD256,
29167 IX86_BUILTIN_EXPANDPSLOAD128,
29168 IX86_BUILTIN_PEXPANDQLOAD256,
29169 IX86_BUILTIN_PEXPANDQLOAD128,
29170 IX86_BUILTIN_PEXPANDDLOAD256,
29171 IX86_BUILTIN_PEXPANDDLOAD128,
29172 IX86_BUILTIN_EXPANDPDLOAD256Z,
29173 IX86_BUILTIN_EXPANDPDLOAD128Z,
29174 IX86_BUILTIN_EXPANDPSLOAD256Z,
29175 IX86_BUILTIN_EXPANDPSLOAD128Z,
29176 IX86_BUILTIN_PEXPANDQLOAD256Z,
29177 IX86_BUILTIN_PEXPANDQLOAD128Z,
29178 IX86_BUILTIN_PEXPANDDLOAD256Z,
29179 IX86_BUILTIN_PEXPANDDLOAD128Z,
29180 IX86_BUILTIN_PALIGNR256_MASK,
29181 IX86_BUILTIN_PALIGNR128_MASK,
29182 IX86_BUILTIN_MOVDQA64_256_MASK,
29183 IX86_BUILTIN_MOVDQA64_128_MASK,
29184 IX86_BUILTIN_MOVDQA32_256_MASK,
29185 IX86_BUILTIN_MOVDQA32_128_MASK,
29186 IX86_BUILTIN_MOVAPD256_MASK,
29187 IX86_BUILTIN_MOVAPD128_MASK,
29188 IX86_BUILTIN_MOVAPS256_MASK,
29189 IX86_BUILTIN_MOVAPS128_MASK,
29190 IX86_BUILTIN_MOVDQUHI256_MASK,
29191 IX86_BUILTIN_MOVDQUHI128_MASK,
29192 IX86_BUILTIN_MOVDQUQI256_MASK,
29193 IX86_BUILTIN_MOVDQUQI128_MASK,
29194 IX86_BUILTIN_MINPS128_MASK,
29195 IX86_BUILTIN_MAXPS128_MASK,
29196 IX86_BUILTIN_MINPD128_MASK,
29197 IX86_BUILTIN_MAXPD128_MASK,
29198 IX86_BUILTIN_MAXPD256_MASK,
29199 IX86_BUILTIN_MAXPS256_MASK,
29200 IX86_BUILTIN_MINPD256_MASK,
29201 IX86_BUILTIN_MINPS256_MASK,
29202 IX86_BUILTIN_MULPS128_MASK,
29203 IX86_BUILTIN_DIVPS128_MASK,
29204 IX86_BUILTIN_MULPD128_MASK,
29205 IX86_BUILTIN_DIVPD128_MASK,
29206 IX86_BUILTIN_DIVPD256_MASK,
29207 IX86_BUILTIN_DIVPS256_MASK,
29208 IX86_BUILTIN_MULPD256_MASK,
29209 IX86_BUILTIN_MULPS256_MASK,
29210 IX86_BUILTIN_ADDPD128_MASK,
29211 IX86_BUILTIN_ADDPD256_MASK,
29212 IX86_BUILTIN_ADDPS128_MASK,
29213 IX86_BUILTIN_ADDPS256_MASK,
29214 IX86_BUILTIN_SUBPD128_MASK,
29215 IX86_BUILTIN_SUBPD256_MASK,
29216 IX86_BUILTIN_SUBPS128_MASK,
29217 IX86_BUILTIN_SUBPS256_MASK,
29218 IX86_BUILTIN_XORPD256_MASK,
29219 IX86_BUILTIN_XORPD128_MASK,
29220 IX86_BUILTIN_XORPS256_MASK,
29221 IX86_BUILTIN_XORPS128_MASK,
29222 IX86_BUILTIN_ORPD256_MASK,
29223 IX86_BUILTIN_ORPD128_MASK,
29224 IX86_BUILTIN_ORPS256_MASK,
29225 IX86_BUILTIN_ORPS128_MASK,
29226 IX86_BUILTIN_BROADCASTF32x2_256,
29227 IX86_BUILTIN_BROADCASTI32x2_256,
29228 IX86_BUILTIN_BROADCASTI32x2_128,
29229 IX86_BUILTIN_BROADCASTF64X2_256,
29230 IX86_BUILTIN_BROADCASTI64X2_256,
29231 IX86_BUILTIN_BROADCASTF32X4_256,
29232 IX86_BUILTIN_BROADCASTI32X4_256,
29233 IX86_BUILTIN_EXTRACTF32X4_256,
29234 IX86_BUILTIN_EXTRACTI32X4_256,
29235 IX86_BUILTIN_DBPSADBW256,
29236 IX86_BUILTIN_DBPSADBW128,
29237 IX86_BUILTIN_CVTTPD2QQ256,
29238 IX86_BUILTIN_CVTTPD2QQ128,
29239 IX86_BUILTIN_CVTTPD2UQQ256,
29240 IX86_BUILTIN_CVTTPD2UQQ128,
29241 IX86_BUILTIN_CVTPD2QQ256,
29242 IX86_BUILTIN_CVTPD2QQ128,
29243 IX86_BUILTIN_CVTPD2UQQ256,
29244 IX86_BUILTIN_CVTPD2UQQ128,
29245 IX86_BUILTIN_CVTPD2UDQ256_MASK,
29246 IX86_BUILTIN_CVTPD2UDQ128_MASK,
29247 IX86_BUILTIN_CVTTPS2QQ256,
29248 IX86_BUILTIN_CVTTPS2QQ128,
29249 IX86_BUILTIN_CVTTPS2UQQ256,
29250 IX86_BUILTIN_CVTTPS2UQQ128,
29251 IX86_BUILTIN_CVTTPS2DQ256_MASK,
29252 IX86_BUILTIN_CVTTPS2DQ128_MASK,
29253 IX86_BUILTIN_CVTTPS2UDQ256,
29254 IX86_BUILTIN_CVTTPS2UDQ128,
29255 IX86_BUILTIN_CVTTPD2DQ256_MASK,
29256 IX86_BUILTIN_CVTTPD2DQ128_MASK,
29257 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
29258 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
29259 IX86_BUILTIN_CVTPD2DQ256_MASK,
29260 IX86_BUILTIN_CVTPD2DQ128_MASK,
29261 IX86_BUILTIN_CVTDQ2PD256_MASK,
29262 IX86_BUILTIN_CVTDQ2PD128_MASK,
29263 IX86_BUILTIN_CVTUDQ2PD256_MASK,
29264 IX86_BUILTIN_CVTUDQ2PD128_MASK,
29265 IX86_BUILTIN_CVTDQ2PS256_MASK,
29266 IX86_BUILTIN_CVTDQ2PS128_MASK,
29267 IX86_BUILTIN_CVTUDQ2PS256_MASK,
29268 IX86_BUILTIN_CVTUDQ2PS128_MASK,
29269 IX86_BUILTIN_CVTPS2PD256_MASK,
29270 IX86_BUILTIN_CVTPS2PD128_MASK,
29271 IX86_BUILTIN_PBROADCASTB256_MASK,
29272 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
29273 IX86_BUILTIN_PBROADCASTB128_MASK,
29274 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
29275 IX86_BUILTIN_PBROADCASTW256_MASK,
29276 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
29277 IX86_BUILTIN_PBROADCASTW128_MASK,
29278 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
29279 IX86_BUILTIN_PBROADCASTD256_MASK,
29280 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
29281 IX86_BUILTIN_PBROADCASTD128_MASK,
29282 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
29283 IX86_BUILTIN_PBROADCASTQ256_MASK,
29284 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
29285 IX86_BUILTIN_PBROADCASTQ128_MASK,
29286 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
29287 IX86_BUILTIN_BROADCASTSS256,
29288 IX86_BUILTIN_BROADCASTSS128,
29289 IX86_BUILTIN_BROADCASTSD256,
29290 IX86_BUILTIN_EXTRACTF64X2_256,
29291 IX86_BUILTIN_EXTRACTI64X2_256,
29292 IX86_BUILTIN_INSERTF32X4_256,
29293 IX86_BUILTIN_INSERTI32X4_256,
29294 IX86_BUILTIN_PMOVSXBW256_MASK,
29295 IX86_BUILTIN_PMOVSXBW128_MASK,
29296 IX86_BUILTIN_PMOVSXBD256_MASK,
29297 IX86_BUILTIN_PMOVSXBD128_MASK,
29298 IX86_BUILTIN_PMOVSXBQ256_MASK,
29299 IX86_BUILTIN_PMOVSXBQ128_MASK,
29300 IX86_BUILTIN_PMOVSXWD256_MASK,
29301 IX86_BUILTIN_PMOVSXWD128_MASK,
29302 IX86_BUILTIN_PMOVSXWQ256_MASK,
29303 IX86_BUILTIN_PMOVSXWQ128_MASK,
29304 IX86_BUILTIN_PMOVSXDQ256_MASK,
29305 IX86_BUILTIN_PMOVSXDQ128_MASK,
29306 IX86_BUILTIN_PMOVZXBW256_MASK,
29307 IX86_BUILTIN_PMOVZXBW128_MASK,
29308 IX86_BUILTIN_PMOVZXBD256_MASK,
29309 IX86_BUILTIN_PMOVZXBD128_MASK,
29310 IX86_BUILTIN_PMOVZXBQ256_MASK,
29311 IX86_BUILTIN_PMOVZXBQ128_MASK,
29312 IX86_BUILTIN_PMOVZXWD256_MASK,
29313 IX86_BUILTIN_PMOVZXWD128_MASK,
29314 IX86_BUILTIN_PMOVZXWQ256_MASK,
29315 IX86_BUILTIN_PMOVZXWQ128_MASK,
29316 IX86_BUILTIN_PMOVZXDQ256_MASK,
29317 IX86_BUILTIN_PMOVZXDQ128_MASK,
29318 IX86_BUILTIN_REDUCEPD256_MASK,
29319 IX86_BUILTIN_REDUCEPD128_MASK,
29320 IX86_BUILTIN_REDUCEPS256_MASK,
29321 IX86_BUILTIN_REDUCEPS128_MASK,
29322 IX86_BUILTIN_REDUCESD_MASK,
29323 IX86_BUILTIN_REDUCESS_MASK,
29324 IX86_BUILTIN_VPERMVARHI256_MASK,
29325 IX86_BUILTIN_VPERMVARHI128_MASK,
29326 IX86_BUILTIN_VPERMT2VARHI256,
29327 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
29328 IX86_BUILTIN_VPERMT2VARHI128,
29329 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
29330 IX86_BUILTIN_VPERMI2VARHI256,
29331 IX86_BUILTIN_VPERMI2VARHI128,
29332 IX86_BUILTIN_RCP14PD256,
29333 IX86_BUILTIN_RCP14PD128,
29334 IX86_BUILTIN_RCP14PS256,
29335 IX86_BUILTIN_RCP14PS128,
29336 IX86_BUILTIN_RSQRT14PD256_MASK,
29337 IX86_BUILTIN_RSQRT14PD128_MASK,
29338 IX86_BUILTIN_RSQRT14PS256_MASK,
29339 IX86_BUILTIN_RSQRT14PS128_MASK,
29340 IX86_BUILTIN_SQRTPD256_MASK,
29341 IX86_BUILTIN_SQRTPD128_MASK,
29342 IX86_BUILTIN_SQRTPS256_MASK,
29343 IX86_BUILTIN_SQRTPS128_MASK,
29344 IX86_BUILTIN_PADDB128_MASK,
29345 IX86_BUILTIN_PADDW128_MASK,
29346 IX86_BUILTIN_PADDD128_MASK,
29347 IX86_BUILTIN_PADDQ128_MASK,
29348 IX86_BUILTIN_PSUBB128_MASK,
29349 IX86_BUILTIN_PSUBW128_MASK,
29350 IX86_BUILTIN_PSUBD128_MASK,
29351 IX86_BUILTIN_PSUBQ128_MASK,
29352 IX86_BUILTIN_PADDSB128_MASK,
29353 IX86_BUILTIN_PADDSW128_MASK,
29354 IX86_BUILTIN_PSUBSB128_MASK,
29355 IX86_BUILTIN_PSUBSW128_MASK,
29356 IX86_BUILTIN_PADDUSB128_MASK,
29357 IX86_BUILTIN_PADDUSW128_MASK,
29358 IX86_BUILTIN_PSUBUSB128_MASK,
29359 IX86_BUILTIN_PSUBUSW128_MASK,
29360 IX86_BUILTIN_PADDB256_MASK,
29361 IX86_BUILTIN_PADDW256_MASK,
29362 IX86_BUILTIN_PADDD256_MASK,
29363 IX86_BUILTIN_PADDQ256_MASK,
29364 IX86_BUILTIN_PADDSB256_MASK,
29365 IX86_BUILTIN_PADDSW256_MASK,
29366 IX86_BUILTIN_PADDUSB256_MASK,
29367 IX86_BUILTIN_PADDUSW256_MASK,
29368 IX86_BUILTIN_PSUBB256_MASK,
29369 IX86_BUILTIN_PSUBW256_MASK,
29370 IX86_BUILTIN_PSUBD256_MASK,
29371 IX86_BUILTIN_PSUBQ256_MASK,
29372 IX86_BUILTIN_PSUBSB256_MASK,
29373 IX86_BUILTIN_PSUBSW256_MASK,
29374 IX86_BUILTIN_PSUBUSB256_MASK,
29375 IX86_BUILTIN_PSUBUSW256_MASK,
29376 IX86_BUILTIN_SHUF_F64x2_256,
29377 IX86_BUILTIN_SHUF_I64x2_256,
29378 IX86_BUILTIN_SHUF_I32x4_256,
29379 IX86_BUILTIN_SHUF_F32x4_256,
29380 IX86_BUILTIN_PMOVWB128,
29381 IX86_BUILTIN_PMOVWB256,
29382 IX86_BUILTIN_PMOVSWB128,
29383 IX86_BUILTIN_PMOVSWB256,
29384 IX86_BUILTIN_PMOVUSWB128,
29385 IX86_BUILTIN_PMOVUSWB256,
29386 IX86_BUILTIN_PMOVDB128,
29387 IX86_BUILTIN_PMOVDB256,
29388 IX86_BUILTIN_PMOVSDB128,
29389 IX86_BUILTIN_PMOVSDB256,
29390 IX86_BUILTIN_PMOVUSDB128,
29391 IX86_BUILTIN_PMOVUSDB256,
29392 IX86_BUILTIN_PMOVDW128,
29393 IX86_BUILTIN_PMOVDW256,
29394 IX86_BUILTIN_PMOVSDW128,
29395 IX86_BUILTIN_PMOVSDW256,
29396 IX86_BUILTIN_PMOVUSDW128,
29397 IX86_BUILTIN_PMOVUSDW256,
29398 IX86_BUILTIN_PMOVQB128,
29399 IX86_BUILTIN_PMOVQB256,
29400 IX86_BUILTIN_PMOVSQB128,
29401 IX86_BUILTIN_PMOVSQB256,
29402 IX86_BUILTIN_PMOVUSQB128,
29403 IX86_BUILTIN_PMOVUSQB256,
29404 IX86_BUILTIN_PMOVQW128,
29405 IX86_BUILTIN_PMOVQW256,
29406 IX86_BUILTIN_PMOVSQW128,
29407 IX86_BUILTIN_PMOVSQW256,
29408 IX86_BUILTIN_PMOVUSQW128,
29409 IX86_BUILTIN_PMOVUSQW256,
29410 IX86_BUILTIN_PMOVQD128,
29411 IX86_BUILTIN_PMOVQD256,
29412 IX86_BUILTIN_PMOVSQD128,
29413 IX86_BUILTIN_PMOVSQD256,
29414 IX86_BUILTIN_PMOVUSQD128,
29415 IX86_BUILTIN_PMOVUSQD256,
29416 IX86_BUILTIN_RANGEPD256,
29417 IX86_BUILTIN_RANGEPD128,
29418 IX86_BUILTIN_RANGEPS256,
29419 IX86_BUILTIN_RANGEPS128,
29420 IX86_BUILTIN_GETEXPPS256,
29421 IX86_BUILTIN_GETEXPPD256,
29422 IX86_BUILTIN_GETEXPPS128,
29423 IX86_BUILTIN_GETEXPPD128,
29424 IX86_BUILTIN_FIXUPIMMPD256_MASK,
29425 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
29426 IX86_BUILTIN_FIXUPIMMPS256_MASK,
29427 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
29428 IX86_BUILTIN_FIXUPIMMPD128_MASK,
29429 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
29430 IX86_BUILTIN_FIXUPIMMPS128_MASK,
29431 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
29432 IX86_BUILTIN_PABSQ256,
29433 IX86_BUILTIN_PABSQ128,
29434 IX86_BUILTIN_PABSD256_MASK,
29435 IX86_BUILTIN_PABSD128_MASK,
29436 IX86_BUILTIN_PMULHRSW256_MASK,
29437 IX86_BUILTIN_PMULHRSW128_MASK,
29438 IX86_BUILTIN_PMULHUW128_MASK,
29439 IX86_BUILTIN_PMULHUW256_MASK,
29440 IX86_BUILTIN_PMULHW256_MASK,
29441 IX86_BUILTIN_PMULHW128_MASK,
29442 IX86_BUILTIN_PMULLW256_MASK,
29443 IX86_BUILTIN_PMULLW128_MASK,
29444 IX86_BUILTIN_PMULLQ256,
29445 IX86_BUILTIN_PMULLQ128,
29446 IX86_BUILTIN_ANDPD256_MASK,
29447 IX86_BUILTIN_ANDPD128_MASK,
29448 IX86_BUILTIN_ANDPS256_MASK,
29449 IX86_BUILTIN_ANDPS128_MASK,
29450 IX86_BUILTIN_ANDNPD256_MASK,
29451 IX86_BUILTIN_ANDNPD128_MASK,
29452 IX86_BUILTIN_ANDNPS256_MASK,
29453 IX86_BUILTIN_ANDNPS128_MASK,
29454 IX86_BUILTIN_PSLLWI128_MASK,
29455 IX86_BUILTIN_PSLLDI128_MASK,
29456 IX86_BUILTIN_PSLLQI128_MASK,
29457 IX86_BUILTIN_PSLLW128_MASK,
29458 IX86_BUILTIN_PSLLD128_MASK,
29459 IX86_BUILTIN_PSLLQ128_MASK,
29460 IX86_BUILTIN_PSLLWI256_MASK ,
29461 IX86_BUILTIN_PSLLW256_MASK,
29462 IX86_BUILTIN_PSLLDI256_MASK,
29463 IX86_BUILTIN_PSLLD256_MASK,
29464 IX86_BUILTIN_PSLLQI256_MASK,
29465 IX86_BUILTIN_PSLLQ256_MASK,
29466 IX86_BUILTIN_PSRADI128_MASK,
29467 IX86_BUILTIN_PSRAD128_MASK,
29468 IX86_BUILTIN_PSRADI256_MASK,
29469 IX86_BUILTIN_PSRAD256_MASK,
29470 IX86_BUILTIN_PSRAQI128_MASK,
29471 IX86_BUILTIN_PSRAQ128_MASK,
29472 IX86_BUILTIN_PSRAQI256_MASK,
29473 IX86_BUILTIN_PSRAQ256_MASK,
29474 IX86_BUILTIN_PANDD256,
29475 IX86_BUILTIN_PANDD128,
29476 IX86_BUILTIN_PSRLDI128_MASK,
29477 IX86_BUILTIN_PSRLD128_MASK,
29478 IX86_BUILTIN_PSRLDI256_MASK,
29479 IX86_BUILTIN_PSRLD256_MASK,
29480 IX86_BUILTIN_PSRLQI128_MASK,
29481 IX86_BUILTIN_PSRLQ128_MASK,
29482 IX86_BUILTIN_PSRLQI256_MASK,
29483 IX86_BUILTIN_PSRLQ256_MASK,
29484 IX86_BUILTIN_PANDQ256,
29485 IX86_BUILTIN_PANDQ128,
29486 IX86_BUILTIN_PANDND256,
29487 IX86_BUILTIN_PANDND128,
29488 IX86_BUILTIN_PANDNQ256,
29489 IX86_BUILTIN_PANDNQ128,
29490 IX86_BUILTIN_PORD256,
29491 IX86_BUILTIN_PORD128,
29492 IX86_BUILTIN_PORQ256,
29493 IX86_BUILTIN_PORQ128,
29494 IX86_BUILTIN_PXORD256,
29495 IX86_BUILTIN_PXORD128,
29496 IX86_BUILTIN_PXORQ256,
29497 IX86_BUILTIN_PXORQ128,
29498 IX86_BUILTIN_PACKSSWB256_MASK,
29499 IX86_BUILTIN_PACKSSWB128_MASK,
29500 IX86_BUILTIN_PACKUSWB256_MASK,
29501 IX86_BUILTIN_PACKUSWB128_MASK,
29502 IX86_BUILTIN_RNDSCALEPS256,
29503 IX86_BUILTIN_RNDSCALEPD256,
29504 IX86_BUILTIN_RNDSCALEPS128,
29505 IX86_BUILTIN_RNDSCALEPD128,
29506 IX86_BUILTIN_VTERNLOGQ256_MASK,
29507 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29508 IX86_BUILTIN_VTERNLOGD256_MASK,
29509 IX86_BUILTIN_VTERNLOGD256_MASKZ,
29510 IX86_BUILTIN_VTERNLOGQ128_MASK,
29511 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29512 IX86_BUILTIN_VTERNLOGD128_MASK,
29513 IX86_BUILTIN_VTERNLOGD128_MASKZ,
29514 IX86_BUILTIN_SCALEFPD256,
29515 IX86_BUILTIN_SCALEFPS256,
29516 IX86_BUILTIN_SCALEFPD128,
29517 IX86_BUILTIN_SCALEFPS128,
29518 IX86_BUILTIN_VFMADDPD256_MASK,
29519 IX86_BUILTIN_VFMADDPD256_MASK3,
29520 IX86_BUILTIN_VFMADDPD256_MASKZ,
29521 IX86_BUILTIN_VFMADDPD128_MASK,
29522 IX86_BUILTIN_VFMADDPD128_MASK3,
29523 IX86_BUILTIN_VFMADDPD128_MASKZ,
29524 IX86_BUILTIN_VFMADDPS256_MASK,
29525 IX86_BUILTIN_VFMADDPS256_MASK3,
29526 IX86_BUILTIN_VFMADDPS256_MASKZ,
29527 IX86_BUILTIN_VFMADDPS128_MASK,
29528 IX86_BUILTIN_VFMADDPS128_MASK3,
29529 IX86_BUILTIN_VFMADDPS128_MASKZ,
29530 IX86_BUILTIN_VFMSUBPD256_MASK3,
29531 IX86_BUILTIN_VFMSUBPD128_MASK3,
29532 IX86_BUILTIN_VFMSUBPS256_MASK3,
29533 IX86_BUILTIN_VFMSUBPS128_MASK3,
29534 IX86_BUILTIN_VFNMADDPD256_MASK,
29535 IX86_BUILTIN_VFNMADDPD128_MASK,
29536 IX86_BUILTIN_VFNMADDPS256_MASK,
29537 IX86_BUILTIN_VFNMADDPS128_MASK,
29538 IX86_BUILTIN_VFNMSUBPD256_MASK,
29539 IX86_BUILTIN_VFNMSUBPD256_MASK3,
29540 IX86_BUILTIN_VFNMSUBPD128_MASK,
29541 IX86_BUILTIN_VFNMSUBPD128_MASK3,
29542 IX86_BUILTIN_VFNMSUBPS256_MASK,
29543 IX86_BUILTIN_VFNMSUBPS256_MASK3,
29544 IX86_BUILTIN_VFNMSUBPS128_MASK,
29545 IX86_BUILTIN_VFNMSUBPS128_MASK3,
29546 IX86_BUILTIN_VFMADDSUBPD256_MASK,
29547 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29548 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29549 IX86_BUILTIN_VFMADDSUBPD128_MASK,
29550 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29551 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29552 IX86_BUILTIN_VFMADDSUBPS256_MASK,
29553 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29554 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29555 IX86_BUILTIN_VFMADDSUBPS128_MASK,
29556 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29557 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29558 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29559 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29560 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29561 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29562 IX86_BUILTIN_INSERTF64X2_256,
29563 IX86_BUILTIN_INSERTI64X2_256,
29564 IX86_BUILTIN_PSRAVV16HI,
29565 IX86_BUILTIN_PSRAVV8HI,
29566 IX86_BUILTIN_PMADDUBSW256_MASK,
29567 IX86_BUILTIN_PMADDUBSW128_MASK,
29568 IX86_BUILTIN_PMADDWD256_MASK,
29569 IX86_BUILTIN_PMADDWD128_MASK,
29570 IX86_BUILTIN_PSRLVV16HI,
29571 IX86_BUILTIN_PSRLVV8HI,
29572 IX86_BUILTIN_CVTPS2DQ256_MASK,
29573 IX86_BUILTIN_CVTPS2DQ128_MASK,
29574 IX86_BUILTIN_CVTPS2UDQ256,
29575 IX86_BUILTIN_CVTPS2UDQ128,
29576 IX86_BUILTIN_CVTPS2QQ256,
29577 IX86_BUILTIN_CVTPS2QQ128,
29578 IX86_BUILTIN_CVTPS2UQQ256,
29579 IX86_BUILTIN_CVTPS2UQQ128,
29580 IX86_BUILTIN_GETMANTPS256,
29581 IX86_BUILTIN_GETMANTPS128,
29582 IX86_BUILTIN_GETMANTPD256,
29583 IX86_BUILTIN_GETMANTPD128,
29584 IX86_BUILTIN_MOVDDUP256_MASK,
29585 IX86_BUILTIN_MOVDDUP128_MASK,
29586 IX86_BUILTIN_MOVSHDUP256_MASK,
29587 IX86_BUILTIN_MOVSHDUP128_MASK,
29588 IX86_BUILTIN_MOVSLDUP256_MASK,
29589 IX86_BUILTIN_MOVSLDUP128_MASK,
29590 IX86_BUILTIN_CVTQQ2PS256,
29591 IX86_BUILTIN_CVTQQ2PS128,
29592 IX86_BUILTIN_CVTUQQ2PS256,
29593 IX86_BUILTIN_CVTUQQ2PS128,
29594 IX86_BUILTIN_CVTQQ2PD256,
29595 IX86_BUILTIN_CVTQQ2PD128,
29596 IX86_BUILTIN_CVTUQQ2PD256,
29597 IX86_BUILTIN_CVTUQQ2PD128,
29598 IX86_BUILTIN_VPERMT2VARQ256,
29599 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29600 IX86_BUILTIN_VPERMT2VARD256,
29601 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29602 IX86_BUILTIN_VPERMI2VARQ256,
29603 IX86_BUILTIN_VPERMI2VARD256,
29604 IX86_BUILTIN_VPERMT2VARPD256,
29605 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29606 IX86_BUILTIN_VPERMT2VARPS256,
29607 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29608 IX86_BUILTIN_VPERMI2VARPD256,
29609 IX86_BUILTIN_VPERMI2VARPS256,
29610 IX86_BUILTIN_VPERMT2VARQ128,
29611 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29612 IX86_BUILTIN_VPERMT2VARD128,
29613 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29614 IX86_BUILTIN_VPERMI2VARQ128,
29615 IX86_BUILTIN_VPERMI2VARD128,
29616 IX86_BUILTIN_VPERMT2VARPD128,
29617 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29618 IX86_BUILTIN_VPERMT2VARPS128,
29619 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29620 IX86_BUILTIN_VPERMI2VARPD128,
29621 IX86_BUILTIN_VPERMI2VARPS128,
29622 IX86_BUILTIN_PSHUFB256_MASK,
29623 IX86_BUILTIN_PSHUFB128_MASK,
29624 IX86_BUILTIN_PSHUFHW256_MASK,
29625 IX86_BUILTIN_PSHUFHW128_MASK,
29626 IX86_BUILTIN_PSHUFLW256_MASK,
29627 IX86_BUILTIN_PSHUFLW128_MASK,
29628 IX86_BUILTIN_PSHUFD256_MASK,
29629 IX86_BUILTIN_PSHUFD128_MASK,
29630 IX86_BUILTIN_SHUFPD256_MASK,
29631 IX86_BUILTIN_SHUFPD128_MASK,
29632 IX86_BUILTIN_SHUFPS256_MASK,
29633 IX86_BUILTIN_SHUFPS128_MASK,
29634 IX86_BUILTIN_PROLVQ256,
29635 IX86_BUILTIN_PROLVQ128,
29636 IX86_BUILTIN_PROLQ256,
29637 IX86_BUILTIN_PROLQ128,
29638 IX86_BUILTIN_PRORVQ256,
29639 IX86_BUILTIN_PRORVQ128,
29640 IX86_BUILTIN_PRORQ256,
29641 IX86_BUILTIN_PRORQ128,
29642 IX86_BUILTIN_PSRAVQ128,
29643 IX86_BUILTIN_PSRAVQ256,
29644 IX86_BUILTIN_PSLLVV4DI_MASK,
29645 IX86_BUILTIN_PSLLVV2DI_MASK,
29646 IX86_BUILTIN_PSLLVV8SI_MASK,
29647 IX86_BUILTIN_PSLLVV4SI_MASK,
29648 IX86_BUILTIN_PSRAVV8SI_MASK,
29649 IX86_BUILTIN_PSRAVV4SI_MASK,
29650 IX86_BUILTIN_PSRLVV4DI_MASK,
29651 IX86_BUILTIN_PSRLVV2DI_MASK,
29652 IX86_BUILTIN_PSRLVV8SI_MASK,
29653 IX86_BUILTIN_PSRLVV4SI_MASK,
29654 IX86_BUILTIN_PSRAWI256_MASK,
29655 IX86_BUILTIN_PSRAW256_MASK,
29656 IX86_BUILTIN_PSRAWI128_MASK,
29657 IX86_BUILTIN_PSRAW128_MASK,
29658 IX86_BUILTIN_PSRLWI256_MASK,
29659 IX86_BUILTIN_PSRLW256_MASK,
29660 IX86_BUILTIN_PSRLWI128_MASK,
29661 IX86_BUILTIN_PSRLW128_MASK,
29662 IX86_BUILTIN_PRORVD256,
29663 IX86_BUILTIN_PROLVD256,
29664 IX86_BUILTIN_PRORD256,
29665 IX86_BUILTIN_PROLD256,
29666 IX86_BUILTIN_PRORVD128,
29667 IX86_BUILTIN_PROLVD128,
29668 IX86_BUILTIN_PRORD128,
29669 IX86_BUILTIN_PROLD128,
29670 IX86_BUILTIN_FPCLASSPD256,
29671 IX86_BUILTIN_FPCLASSPD128,
29672 IX86_BUILTIN_FPCLASSSD,
29673 IX86_BUILTIN_FPCLASSPS256,
29674 IX86_BUILTIN_FPCLASSPS128,
29675 IX86_BUILTIN_FPCLASSSS,
29676 IX86_BUILTIN_CVTB2MASK128,
29677 IX86_BUILTIN_CVTB2MASK256,
29678 IX86_BUILTIN_CVTW2MASK128,
29679 IX86_BUILTIN_CVTW2MASK256,
29680 IX86_BUILTIN_CVTD2MASK128,
29681 IX86_BUILTIN_CVTD2MASK256,
29682 IX86_BUILTIN_CVTQ2MASK128,
29683 IX86_BUILTIN_CVTQ2MASK256,
29684 IX86_BUILTIN_CVTMASK2B128,
29685 IX86_BUILTIN_CVTMASK2B256,
29686 IX86_BUILTIN_CVTMASK2W128,
29687 IX86_BUILTIN_CVTMASK2W256,
29688 IX86_BUILTIN_CVTMASK2D128,
29689 IX86_BUILTIN_CVTMASK2D256,
29690 IX86_BUILTIN_CVTMASK2Q128,
29691 IX86_BUILTIN_CVTMASK2Q256,
29692 IX86_BUILTIN_PCMPEQB128_MASK,
29693 IX86_BUILTIN_PCMPEQB256_MASK,
29694 IX86_BUILTIN_PCMPEQW128_MASK,
29695 IX86_BUILTIN_PCMPEQW256_MASK,
29696 IX86_BUILTIN_PCMPEQD128_MASK,
29697 IX86_BUILTIN_PCMPEQD256_MASK,
29698 IX86_BUILTIN_PCMPEQQ128_MASK,
29699 IX86_BUILTIN_PCMPEQQ256_MASK,
29700 IX86_BUILTIN_PCMPGTB128_MASK,
29701 IX86_BUILTIN_PCMPGTB256_MASK,
29702 IX86_BUILTIN_PCMPGTW128_MASK,
29703 IX86_BUILTIN_PCMPGTW256_MASK,
29704 IX86_BUILTIN_PCMPGTD128_MASK,
29705 IX86_BUILTIN_PCMPGTD256_MASK,
29706 IX86_BUILTIN_PCMPGTQ128_MASK,
29707 IX86_BUILTIN_PCMPGTQ256_MASK,
29708 IX86_BUILTIN_PTESTMB128,
29709 IX86_BUILTIN_PTESTMB256,
29710 IX86_BUILTIN_PTESTMW128,
29711 IX86_BUILTIN_PTESTMW256,
29712 IX86_BUILTIN_PTESTMD128,
29713 IX86_BUILTIN_PTESTMD256,
29714 IX86_BUILTIN_PTESTMQ128,
29715 IX86_BUILTIN_PTESTMQ256,
29716 IX86_BUILTIN_PTESTNMB128,
29717 IX86_BUILTIN_PTESTNMB256,
29718 IX86_BUILTIN_PTESTNMW128,
29719 IX86_BUILTIN_PTESTNMW256,
29720 IX86_BUILTIN_PTESTNMD128,
29721 IX86_BUILTIN_PTESTNMD256,
29722 IX86_BUILTIN_PTESTNMQ128,
29723 IX86_BUILTIN_PTESTNMQ256,
29724 IX86_BUILTIN_PBROADCASTMB128,
29725 IX86_BUILTIN_PBROADCASTMB256,
29726 IX86_BUILTIN_PBROADCASTMW128,
29727 IX86_BUILTIN_PBROADCASTMW256,
29728 IX86_BUILTIN_COMPRESSPD256,
29729 IX86_BUILTIN_COMPRESSPD128,
29730 IX86_BUILTIN_COMPRESSPS256,
29731 IX86_BUILTIN_COMPRESSPS128,
29732 IX86_BUILTIN_PCOMPRESSQ256,
29733 IX86_BUILTIN_PCOMPRESSQ128,
29734 IX86_BUILTIN_PCOMPRESSD256,
29735 IX86_BUILTIN_PCOMPRESSD128,
29736 IX86_BUILTIN_EXPANDPD256,
29737 IX86_BUILTIN_EXPANDPD128,
29738 IX86_BUILTIN_EXPANDPS256,
29739 IX86_BUILTIN_EXPANDPS128,
29740 IX86_BUILTIN_PEXPANDQ256,
29741 IX86_BUILTIN_PEXPANDQ128,
29742 IX86_BUILTIN_PEXPANDD256,
29743 IX86_BUILTIN_PEXPANDD128,
29744 IX86_BUILTIN_EXPANDPD256Z,
29745 IX86_BUILTIN_EXPANDPD128Z,
29746 IX86_BUILTIN_EXPANDPS256Z,
29747 IX86_BUILTIN_EXPANDPS128Z,
29748 IX86_BUILTIN_PEXPANDQ256Z,
29749 IX86_BUILTIN_PEXPANDQ128Z,
29750 IX86_BUILTIN_PEXPANDD256Z,
29751 IX86_BUILTIN_PEXPANDD128Z,
29752 IX86_BUILTIN_PMAXSD256_MASK,
29753 IX86_BUILTIN_PMINSD256_MASK,
29754 IX86_BUILTIN_PMAXUD256_MASK,
29755 IX86_BUILTIN_PMINUD256_MASK,
29756 IX86_BUILTIN_PMAXSD128_MASK,
29757 IX86_BUILTIN_PMINSD128_MASK,
29758 IX86_BUILTIN_PMAXUD128_MASK,
29759 IX86_BUILTIN_PMINUD128_MASK,
29760 IX86_BUILTIN_PMAXSQ256_MASK,
29761 IX86_BUILTIN_PMINSQ256_MASK,
29762 IX86_BUILTIN_PMAXUQ256_MASK,
29763 IX86_BUILTIN_PMINUQ256_MASK,
29764 IX86_BUILTIN_PMAXSQ128_MASK,
29765 IX86_BUILTIN_PMINSQ128_MASK,
29766 IX86_BUILTIN_PMAXUQ128_MASK,
29767 IX86_BUILTIN_PMINUQ128_MASK,
29768 IX86_BUILTIN_PMINSB256_MASK,
29769 IX86_BUILTIN_PMINUB256_MASK,
29770 IX86_BUILTIN_PMAXSB256_MASK,
29771 IX86_BUILTIN_PMAXUB256_MASK,
29772 IX86_BUILTIN_PMINSB128_MASK,
29773 IX86_BUILTIN_PMINUB128_MASK,
29774 IX86_BUILTIN_PMAXSB128_MASK,
29775 IX86_BUILTIN_PMAXUB128_MASK,
29776 IX86_BUILTIN_PMINSW256_MASK,
29777 IX86_BUILTIN_PMINUW256_MASK,
29778 IX86_BUILTIN_PMAXSW256_MASK,
29779 IX86_BUILTIN_PMAXUW256_MASK,
29780 IX86_BUILTIN_PMINSW128_MASK,
29781 IX86_BUILTIN_PMINUW128_MASK,
29782 IX86_BUILTIN_PMAXSW128_MASK,
29783 IX86_BUILTIN_PMAXUW128_MASK,
29784 IX86_BUILTIN_VPCONFLICTQ256,
29785 IX86_BUILTIN_VPCONFLICTD256,
29786 IX86_BUILTIN_VPCLZCNTQ256,
29787 IX86_BUILTIN_VPCLZCNTD256,
29788 IX86_BUILTIN_UNPCKHPD256_MASK,
29789 IX86_BUILTIN_UNPCKHPD128_MASK,
29790 IX86_BUILTIN_UNPCKHPS256_MASK,
29791 IX86_BUILTIN_UNPCKHPS128_MASK,
29792 IX86_BUILTIN_UNPCKLPD256_MASK,
29793 IX86_BUILTIN_UNPCKLPD128_MASK,
29794 IX86_BUILTIN_UNPCKLPS256_MASK,
29795 IX86_BUILTIN_VPCONFLICTQ128,
29796 IX86_BUILTIN_VPCONFLICTD128,
29797 IX86_BUILTIN_VPCLZCNTQ128,
29798 IX86_BUILTIN_VPCLZCNTD128,
29799 IX86_BUILTIN_UNPCKLPS128_MASK,
29800 IX86_BUILTIN_ALIGND256,
29801 IX86_BUILTIN_ALIGNQ256,
29802 IX86_BUILTIN_ALIGND128,
29803 IX86_BUILTIN_ALIGNQ128,
29804 IX86_BUILTIN_CVTPS2PH256_MASK,
29805 IX86_BUILTIN_CVTPS2PH_MASK,
29806 IX86_BUILTIN_CVTPH2PS_MASK,
29807 IX86_BUILTIN_CVTPH2PS256_MASK,
29808 IX86_BUILTIN_PUNPCKHDQ128_MASK,
29809 IX86_BUILTIN_PUNPCKHDQ256_MASK,
29810 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
29811 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
29812 IX86_BUILTIN_PUNPCKLDQ128_MASK,
29813 IX86_BUILTIN_PUNPCKLDQ256_MASK,
29814 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
29815 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
29816 IX86_BUILTIN_PUNPCKHBW128_MASK,
29817 IX86_BUILTIN_PUNPCKHBW256_MASK,
29818 IX86_BUILTIN_PUNPCKHWD128_MASK,
29819 IX86_BUILTIN_PUNPCKHWD256_MASK,
29820 IX86_BUILTIN_PUNPCKLBW128_MASK,
29821 IX86_BUILTIN_PUNPCKLBW256_MASK,
29822 IX86_BUILTIN_PUNPCKLWD128_MASK,
29823 IX86_BUILTIN_PUNPCKLWD256_MASK,
29824 IX86_BUILTIN_PSLLVV16HI,
29825 IX86_BUILTIN_PSLLVV8HI,
29826 IX86_BUILTIN_PACKSSDW256_MASK,
29827 IX86_BUILTIN_PACKSSDW128_MASK,
29828 IX86_BUILTIN_PACKUSDW256_MASK,
29829 IX86_BUILTIN_PACKUSDW128_MASK,
29830 IX86_BUILTIN_PAVGB256_MASK,
29831 IX86_BUILTIN_PAVGW256_MASK,
29832 IX86_BUILTIN_PAVGB128_MASK,
29833 IX86_BUILTIN_PAVGW128_MASK,
29834 IX86_BUILTIN_VPERMVARSF256_MASK,
29835 IX86_BUILTIN_VPERMVARDF256_MASK,
29836 IX86_BUILTIN_VPERMDF256_MASK,
29837 IX86_BUILTIN_PABSB256_MASK,
29838 IX86_BUILTIN_PABSB128_MASK,
29839 IX86_BUILTIN_PABSW256_MASK,
29840 IX86_BUILTIN_PABSW128_MASK,
29841 IX86_BUILTIN_VPERMILVARPD_MASK,
29842 IX86_BUILTIN_VPERMILVARPS_MASK,
29843 IX86_BUILTIN_VPERMILVARPD256_MASK,
29844 IX86_BUILTIN_VPERMILVARPS256_MASK,
29845 IX86_BUILTIN_VPERMILPD_MASK,
29846 IX86_BUILTIN_VPERMILPS_MASK,
29847 IX86_BUILTIN_VPERMILPD256_MASK,
29848 IX86_BUILTIN_VPERMILPS256_MASK,
29849 IX86_BUILTIN_BLENDMQ256,
29850 IX86_BUILTIN_BLENDMD256,
29851 IX86_BUILTIN_BLENDMPD256,
29852 IX86_BUILTIN_BLENDMPS256,
29853 IX86_BUILTIN_BLENDMQ128,
29854 IX86_BUILTIN_BLENDMD128,
29855 IX86_BUILTIN_BLENDMPD128,
29856 IX86_BUILTIN_BLENDMPS128,
29857 IX86_BUILTIN_BLENDMW256,
29858 IX86_BUILTIN_BLENDMB256,
29859 IX86_BUILTIN_BLENDMW128,
29860 IX86_BUILTIN_BLENDMB128,
29861 IX86_BUILTIN_PMULLD256_MASK,
29862 IX86_BUILTIN_PMULLD128_MASK,
29863 IX86_BUILTIN_PMULUDQ256_MASK,
29864 IX86_BUILTIN_PMULDQ256_MASK,
29865 IX86_BUILTIN_PMULDQ128_MASK,
29866 IX86_BUILTIN_PMULUDQ128_MASK,
29867 IX86_BUILTIN_CVTPD2PS256_MASK,
29868 IX86_BUILTIN_CVTPD2PS_MASK,
29869 IX86_BUILTIN_VPERMVARSI256_MASK,
29870 IX86_BUILTIN_VPERMVARDI256_MASK,
29871 IX86_BUILTIN_VPERMDI256_MASK,
29872 IX86_BUILTIN_CMPQ256,
29873 IX86_BUILTIN_CMPD256,
29874 IX86_BUILTIN_UCMPQ256,
29875 IX86_BUILTIN_UCMPD256,
29876 IX86_BUILTIN_CMPB256,
29877 IX86_BUILTIN_CMPW256,
29878 IX86_BUILTIN_UCMPB256,
29879 IX86_BUILTIN_UCMPW256,
29880 IX86_BUILTIN_CMPPD256_MASK,
29881 IX86_BUILTIN_CMPPS256_MASK,
29882 IX86_BUILTIN_CMPQ128,
29883 IX86_BUILTIN_CMPD128,
29884 IX86_BUILTIN_UCMPQ128,
29885 IX86_BUILTIN_UCMPD128,
29886 IX86_BUILTIN_CMPB128,
29887 IX86_BUILTIN_CMPW128,
29888 IX86_BUILTIN_UCMPB128,
29889 IX86_BUILTIN_UCMPW128,
29890 IX86_BUILTIN_CMPPD128_MASK,
29891 IX86_BUILTIN_CMPPS128_MASK,
29893 IX86_BUILTIN_GATHER3SIV8SF,
29894 IX86_BUILTIN_GATHER3SIV4SF,
29895 IX86_BUILTIN_GATHER3SIV4DF,
29896 IX86_BUILTIN_GATHER3SIV2DF,
29897 IX86_BUILTIN_GATHER3DIV8SF,
29898 IX86_BUILTIN_GATHER3DIV4SF,
29899 IX86_BUILTIN_GATHER3DIV4DF,
29900 IX86_BUILTIN_GATHER3DIV2DF,
29901 IX86_BUILTIN_GATHER3SIV8SI,
29902 IX86_BUILTIN_GATHER3SIV4SI,
29903 IX86_BUILTIN_GATHER3SIV4DI,
29904 IX86_BUILTIN_GATHER3SIV2DI,
29905 IX86_BUILTIN_GATHER3DIV8SI,
29906 IX86_BUILTIN_GATHER3DIV4SI,
29907 IX86_BUILTIN_GATHER3DIV4DI,
29908 IX86_BUILTIN_GATHER3DIV2DI,
29909 IX86_BUILTIN_SCATTERSIV8SF,
29910 IX86_BUILTIN_SCATTERSIV4SF,
29911 IX86_BUILTIN_SCATTERSIV4DF,
29912 IX86_BUILTIN_SCATTERSIV2DF,
29913 IX86_BUILTIN_SCATTERDIV8SF,
29914 IX86_BUILTIN_SCATTERDIV4SF,
29915 IX86_BUILTIN_SCATTERDIV4DF,
29916 IX86_BUILTIN_SCATTERDIV2DF,
29917 IX86_BUILTIN_SCATTERSIV8SI,
29918 IX86_BUILTIN_SCATTERSIV4SI,
29919 IX86_BUILTIN_SCATTERSIV4DI,
29920 IX86_BUILTIN_SCATTERSIV2DI,
29921 IX86_BUILTIN_SCATTERDIV8SI,
29922 IX86_BUILTIN_SCATTERDIV4SI,
29923 IX86_BUILTIN_SCATTERDIV4DI,
29924 IX86_BUILTIN_SCATTERDIV2DI,
29926 /* AVX512DQ. */
29927 IX86_BUILTIN_RANGESD128,
29928 IX86_BUILTIN_RANGESS128,
29929 IX86_BUILTIN_KUNPCKWD,
29930 IX86_BUILTIN_KUNPCKDQ,
29931 IX86_BUILTIN_BROADCASTF32x2_512,
29932 IX86_BUILTIN_BROADCASTI32x2_512,
29933 IX86_BUILTIN_BROADCASTF64X2_512,
29934 IX86_BUILTIN_BROADCASTI64X2_512,
29935 IX86_BUILTIN_BROADCASTF32X8_512,
29936 IX86_BUILTIN_BROADCASTI32X8_512,
29937 IX86_BUILTIN_EXTRACTF64X2_512,
29938 IX86_BUILTIN_EXTRACTF32X8,
29939 IX86_BUILTIN_EXTRACTI64X2_512,
29940 IX86_BUILTIN_EXTRACTI32X8,
29941 IX86_BUILTIN_REDUCEPD512_MASK,
29942 IX86_BUILTIN_REDUCEPS512_MASK,
29943 IX86_BUILTIN_PMULLQ512,
29944 IX86_BUILTIN_XORPD512,
29945 IX86_BUILTIN_XORPS512,
29946 IX86_BUILTIN_ORPD512,
29947 IX86_BUILTIN_ORPS512,
29948 IX86_BUILTIN_ANDPD512,
29949 IX86_BUILTIN_ANDPS512,
29950 IX86_BUILTIN_ANDNPD512,
29951 IX86_BUILTIN_ANDNPS512,
29952 IX86_BUILTIN_INSERTF32X8,
29953 IX86_BUILTIN_INSERTI32X8,
29954 IX86_BUILTIN_INSERTF64X2_512,
29955 IX86_BUILTIN_INSERTI64X2_512,
29956 IX86_BUILTIN_FPCLASSPD512,
29957 IX86_BUILTIN_FPCLASSPS512,
29958 IX86_BUILTIN_CVTD2MASK512,
29959 IX86_BUILTIN_CVTQ2MASK512,
29960 IX86_BUILTIN_CVTMASK2D512,
29961 IX86_BUILTIN_CVTMASK2Q512,
29962 IX86_BUILTIN_CVTPD2QQ512,
29963 IX86_BUILTIN_CVTPS2QQ512,
29964 IX86_BUILTIN_CVTPD2UQQ512,
29965 IX86_BUILTIN_CVTPS2UQQ512,
29966 IX86_BUILTIN_CVTQQ2PS512,
29967 IX86_BUILTIN_CVTUQQ2PS512,
29968 IX86_BUILTIN_CVTQQ2PD512,
29969 IX86_BUILTIN_CVTUQQ2PD512,
29970 IX86_BUILTIN_CVTTPS2QQ512,
29971 IX86_BUILTIN_CVTTPS2UQQ512,
29972 IX86_BUILTIN_CVTTPD2QQ512,
29973 IX86_BUILTIN_CVTTPD2UQQ512,
29974 IX86_BUILTIN_RANGEPS512,
29975 IX86_BUILTIN_RANGEPD512,
29977 /* AVX512BW. */
29978 IX86_BUILTIN_PACKUSDW512,
29979 IX86_BUILTIN_PACKSSDW512,
29980 IX86_BUILTIN_LOADDQUHI512_MASK,
29981 IX86_BUILTIN_LOADDQUQI512_MASK,
29982 IX86_BUILTIN_PSLLDQ512,
29983 IX86_BUILTIN_PSRLDQ512,
29984 IX86_BUILTIN_STOREDQUHI512_MASK,
29985 IX86_BUILTIN_STOREDQUQI512_MASK,
29986 IX86_BUILTIN_PALIGNR512,
29987 IX86_BUILTIN_PALIGNR512_MASK,
29988 IX86_BUILTIN_MOVDQUHI512_MASK,
29989 IX86_BUILTIN_MOVDQUQI512_MASK,
29990 IX86_BUILTIN_PSADBW512,
29991 IX86_BUILTIN_DBPSADBW512,
29992 IX86_BUILTIN_PBROADCASTB512,
29993 IX86_BUILTIN_PBROADCASTB512_GPR,
29994 IX86_BUILTIN_PBROADCASTW512,
29995 IX86_BUILTIN_PBROADCASTW512_GPR,
29996 IX86_BUILTIN_PMOVSXBW512_MASK,
29997 IX86_BUILTIN_PMOVZXBW512_MASK,
29998 IX86_BUILTIN_VPERMVARHI512_MASK,
29999 IX86_BUILTIN_VPERMT2VARHI512,
30000 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
30001 IX86_BUILTIN_VPERMI2VARHI512,
30002 IX86_BUILTIN_PAVGB512,
30003 IX86_BUILTIN_PAVGW512,
30004 IX86_BUILTIN_PADDB512,
30005 IX86_BUILTIN_PSUBB512,
30006 IX86_BUILTIN_PSUBSB512,
30007 IX86_BUILTIN_PADDSB512,
30008 IX86_BUILTIN_PSUBUSB512,
30009 IX86_BUILTIN_PADDUSB512,
30010 IX86_BUILTIN_PSUBW512,
30011 IX86_BUILTIN_PADDW512,
30012 IX86_BUILTIN_PSUBSW512,
30013 IX86_BUILTIN_PADDSW512,
30014 IX86_BUILTIN_PSUBUSW512,
30015 IX86_BUILTIN_PADDUSW512,
30016 IX86_BUILTIN_PMAXUW512,
30017 IX86_BUILTIN_PMAXSW512,
30018 IX86_BUILTIN_PMINUW512,
30019 IX86_BUILTIN_PMINSW512,
30020 IX86_BUILTIN_PMAXUB512,
30021 IX86_BUILTIN_PMAXSB512,
30022 IX86_BUILTIN_PMINUB512,
30023 IX86_BUILTIN_PMINSB512,
30024 IX86_BUILTIN_PMOVWB512,
30025 IX86_BUILTIN_PMOVSWB512,
30026 IX86_BUILTIN_PMOVUSWB512,
30027 IX86_BUILTIN_PMULHRSW512_MASK,
30028 IX86_BUILTIN_PMULHUW512_MASK,
30029 IX86_BUILTIN_PMULHW512_MASK,
30030 IX86_BUILTIN_PMULLW512_MASK,
30031 IX86_BUILTIN_PSLLWI512_MASK,
30032 IX86_BUILTIN_PSLLW512_MASK,
30033 IX86_BUILTIN_PACKSSWB512,
30034 IX86_BUILTIN_PACKUSWB512,
30035 IX86_BUILTIN_PSRAVV32HI,
30036 IX86_BUILTIN_PMADDUBSW512_MASK,
30037 IX86_BUILTIN_PMADDWD512_MASK,
30038 IX86_BUILTIN_PSRLVV32HI,
30039 IX86_BUILTIN_PUNPCKHBW512,
30040 IX86_BUILTIN_PUNPCKHWD512,
30041 IX86_BUILTIN_PUNPCKLBW512,
30042 IX86_BUILTIN_PUNPCKLWD512,
30043 IX86_BUILTIN_PSHUFB512,
30044 IX86_BUILTIN_PSHUFHW512,
30045 IX86_BUILTIN_PSHUFLW512,
30046 IX86_BUILTIN_PSRAWI512,
30047 IX86_BUILTIN_PSRAW512,
30048 IX86_BUILTIN_PSRLWI512,
30049 IX86_BUILTIN_PSRLW512,
30050 IX86_BUILTIN_CVTB2MASK512,
30051 IX86_BUILTIN_CVTW2MASK512,
30052 IX86_BUILTIN_CVTMASK2B512,
30053 IX86_BUILTIN_CVTMASK2W512,
30054 IX86_BUILTIN_PCMPEQB512_MASK,
30055 IX86_BUILTIN_PCMPEQW512_MASK,
30056 IX86_BUILTIN_PCMPGTB512_MASK,
30057 IX86_BUILTIN_PCMPGTW512_MASK,
30058 IX86_BUILTIN_PTESTMB512,
30059 IX86_BUILTIN_PTESTMW512,
30060 IX86_BUILTIN_PTESTNMB512,
30061 IX86_BUILTIN_PTESTNMW512,
30062 IX86_BUILTIN_PSLLVV32HI,
30063 IX86_BUILTIN_PABSB512,
30064 IX86_BUILTIN_PABSW512,
30065 IX86_BUILTIN_BLENDMW512,
30066 IX86_BUILTIN_BLENDMB512,
30067 IX86_BUILTIN_CMPB512,
30068 IX86_BUILTIN_CMPW512,
30069 IX86_BUILTIN_UCMPB512,
30070 IX86_BUILTIN_UCMPW512,
30072 /* Alternate 4 and 8 element gather/scatter for the vectorizer
30073 where all operands are 32-byte or 64-byte wide respectively. */
30074 IX86_BUILTIN_GATHERALTSIV4DF,
30075 IX86_BUILTIN_GATHERALTDIV8SF,
30076 IX86_BUILTIN_GATHERALTSIV4DI,
30077 IX86_BUILTIN_GATHERALTDIV8SI,
30078 IX86_BUILTIN_GATHER3ALTDIV16SF,
30079 IX86_BUILTIN_GATHER3ALTDIV16SI,
30080 IX86_BUILTIN_GATHER3ALTSIV4DF,
30081 IX86_BUILTIN_GATHER3ALTDIV8SF,
30082 IX86_BUILTIN_GATHER3ALTSIV4DI,
30083 IX86_BUILTIN_GATHER3ALTDIV8SI,
30084 IX86_BUILTIN_GATHER3ALTSIV8DF,
30085 IX86_BUILTIN_GATHER3ALTSIV8DI,
30086 IX86_BUILTIN_GATHER3DIV16SF,
30087 IX86_BUILTIN_GATHER3DIV16SI,
30088 IX86_BUILTIN_GATHER3DIV8DF,
30089 IX86_BUILTIN_GATHER3DIV8DI,
30090 IX86_BUILTIN_GATHER3SIV16SF,
30091 IX86_BUILTIN_GATHER3SIV16SI,
30092 IX86_BUILTIN_GATHER3SIV8DF,
30093 IX86_BUILTIN_GATHER3SIV8DI,
30094 IX86_BUILTIN_SCATTERDIV16SF,
30095 IX86_BUILTIN_SCATTERDIV16SI,
30096 IX86_BUILTIN_SCATTERDIV8DF,
30097 IX86_BUILTIN_SCATTERDIV8DI,
30098 IX86_BUILTIN_SCATTERSIV16SF,
30099 IX86_BUILTIN_SCATTERSIV16SI,
30100 IX86_BUILTIN_SCATTERSIV8DF,
30101 IX86_BUILTIN_SCATTERSIV8DI,
30103 /* AVX512PF */
30104 IX86_BUILTIN_GATHERPFQPD,
30105 IX86_BUILTIN_GATHERPFDPS,
30106 IX86_BUILTIN_GATHERPFDPD,
30107 IX86_BUILTIN_GATHERPFQPS,
30108 IX86_BUILTIN_SCATTERPFDPD,
30109 IX86_BUILTIN_SCATTERPFDPS,
30110 IX86_BUILTIN_SCATTERPFQPD,
30111 IX86_BUILTIN_SCATTERPFQPS,
30113 /* AVX-512ER */
30114 IX86_BUILTIN_EXP2PD_MASK,
30115 IX86_BUILTIN_EXP2PS_MASK,
30116 IX86_BUILTIN_EXP2PS,
30117 IX86_BUILTIN_RCP28PD,
30118 IX86_BUILTIN_RCP28PS,
30119 IX86_BUILTIN_RCP28SD,
30120 IX86_BUILTIN_RCP28SS,
30121 IX86_BUILTIN_RSQRT28PD,
30122 IX86_BUILTIN_RSQRT28PS,
30123 IX86_BUILTIN_RSQRT28SD,
30124 IX86_BUILTIN_RSQRT28SS,
30126 /* AVX-512IFMA */
30127 IX86_BUILTIN_VPMADD52LUQ512,
30128 IX86_BUILTIN_VPMADD52HUQ512,
30129 IX86_BUILTIN_VPMADD52LUQ256,
30130 IX86_BUILTIN_VPMADD52HUQ256,
30131 IX86_BUILTIN_VPMADD52LUQ128,
30132 IX86_BUILTIN_VPMADD52HUQ128,
30133 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
30134 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
30135 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
30136 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
30137 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
30138 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
30140 /* AVX-512VBMI */
30141 IX86_BUILTIN_VPMULTISHIFTQB512,
30142 IX86_BUILTIN_VPMULTISHIFTQB256,
30143 IX86_BUILTIN_VPMULTISHIFTQB128,
30144 IX86_BUILTIN_VPERMVARQI512_MASK,
30145 IX86_BUILTIN_VPERMT2VARQI512,
30146 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
30147 IX86_BUILTIN_VPERMI2VARQI512,
30148 IX86_BUILTIN_VPERMVARQI256_MASK,
30149 IX86_BUILTIN_VPERMVARQI128_MASK,
30150 IX86_BUILTIN_VPERMT2VARQI256,
30151 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
30152 IX86_BUILTIN_VPERMT2VARQI128,
30153 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
30154 IX86_BUILTIN_VPERMI2VARQI256,
30155 IX86_BUILTIN_VPERMI2VARQI128,
30157 /* SHA builtins. */
30158 IX86_BUILTIN_SHA1MSG1,
30159 IX86_BUILTIN_SHA1MSG2,
30160 IX86_BUILTIN_SHA1NEXTE,
30161 IX86_BUILTIN_SHA1RNDS4,
30162 IX86_BUILTIN_SHA256MSG1,
30163 IX86_BUILTIN_SHA256MSG2,
30164 IX86_BUILTIN_SHA256RNDS2,
30166 /* CLWB instructions. */
30167 IX86_BUILTIN_CLWB,
30169 /* PCOMMIT instructions. */
30170 IX86_BUILTIN_PCOMMIT,
30172 /* CLFLUSHOPT instructions. */
30173 IX86_BUILTIN_CLFLUSHOPT,
30175 /* TFmode support builtins. */
30176 IX86_BUILTIN_INFQ,
30177 IX86_BUILTIN_HUGE_VALQ,
30178 IX86_BUILTIN_FABSQ,
30179 IX86_BUILTIN_COPYSIGNQ,
30181 /* Vectorizer support builtins. */
30182 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
30183 IX86_BUILTIN_CPYSGNPS,
30184 IX86_BUILTIN_CPYSGNPD,
30185 IX86_BUILTIN_CPYSGNPS256,
30186 IX86_BUILTIN_CPYSGNPS512,
30187 IX86_BUILTIN_CPYSGNPD256,
30188 IX86_BUILTIN_CPYSGNPD512,
30189 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
30190 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
30193 /* FMA4 instructions. */
30194 IX86_BUILTIN_VFMADDSS,
30195 IX86_BUILTIN_VFMADDSD,
30196 IX86_BUILTIN_VFMADDPS,
30197 IX86_BUILTIN_VFMADDPD,
30198 IX86_BUILTIN_VFMADDPS256,
30199 IX86_BUILTIN_VFMADDPD256,
30200 IX86_BUILTIN_VFMADDSUBPS,
30201 IX86_BUILTIN_VFMADDSUBPD,
30202 IX86_BUILTIN_VFMADDSUBPS256,
30203 IX86_BUILTIN_VFMADDSUBPD256,
30205 /* FMA3 instructions. */
30206 IX86_BUILTIN_VFMADDSS3,
30207 IX86_BUILTIN_VFMADDSD3,
30209 /* XOP instructions. */
30210 IX86_BUILTIN_VPCMOV,
30211 IX86_BUILTIN_VPCMOV_V2DI,
30212 IX86_BUILTIN_VPCMOV_V4SI,
30213 IX86_BUILTIN_VPCMOV_V8HI,
30214 IX86_BUILTIN_VPCMOV_V16QI,
30215 IX86_BUILTIN_VPCMOV_V4SF,
30216 IX86_BUILTIN_VPCMOV_V2DF,
30217 IX86_BUILTIN_VPCMOV256,
30218 IX86_BUILTIN_VPCMOV_V4DI256,
30219 IX86_BUILTIN_VPCMOV_V8SI256,
30220 IX86_BUILTIN_VPCMOV_V16HI256,
30221 IX86_BUILTIN_VPCMOV_V32QI256,
30222 IX86_BUILTIN_VPCMOV_V8SF256,
30223 IX86_BUILTIN_VPCMOV_V4DF256,
30225 IX86_BUILTIN_VPPERM,
30227 IX86_BUILTIN_VPMACSSWW,
30228 IX86_BUILTIN_VPMACSWW,
30229 IX86_BUILTIN_VPMACSSWD,
30230 IX86_BUILTIN_VPMACSWD,
30231 IX86_BUILTIN_VPMACSSDD,
30232 IX86_BUILTIN_VPMACSDD,
30233 IX86_BUILTIN_VPMACSSDQL,
30234 IX86_BUILTIN_VPMACSSDQH,
30235 IX86_BUILTIN_VPMACSDQL,
30236 IX86_BUILTIN_VPMACSDQH,
30237 IX86_BUILTIN_VPMADCSSWD,
30238 IX86_BUILTIN_VPMADCSWD,
30240 IX86_BUILTIN_VPHADDBW,
30241 IX86_BUILTIN_VPHADDBD,
30242 IX86_BUILTIN_VPHADDBQ,
30243 IX86_BUILTIN_VPHADDWD,
30244 IX86_BUILTIN_VPHADDWQ,
30245 IX86_BUILTIN_VPHADDDQ,
30246 IX86_BUILTIN_VPHADDUBW,
30247 IX86_BUILTIN_VPHADDUBD,
30248 IX86_BUILTIN_VPHADDUBQ,
30249 IX86_BUILTIN_VPHADDUWD,
30250 IX86_BUILTIN_VPHADDUWQ,
30251 IX86_BUILTIN_VPHADDUDQ,
30252 IX86_BUILTIN_VPHSUBBW,
30253 IX86_BUILTIN_VPHSUBWD,
30254 IX86_BUILTIN_VPHSUBDQ,
30256 IX86_BUILTIN_VPROTB,
30257 IX86_BUILTIN_VPROTW,
30258 IX86_BUILTIN_VPROTD,
30259 IX86_BUILTIN_VPROTQ,
30260 IX86_BUILTIN_VPROTB_IMM,
30261 IX86_BUILTIN_VPROTW_IMM,
30262 IX86_BUILTIN_VPROTD_IMM,
30263 IX86_BUILTIN_VPROTQ_IMM,
30265 IX86_BUILTIN_VPSHLB,
30266 IX86_BUILTIN_VPSHLW,
30267 IX86_BUILTIN_VPSHLD,
30268 IX86_BUILTIN_VPSHLQ,
30269 IX86_BUILTIN_VPSHAB,
30270 IX86_BUILTIN_VPSHAW,
30271 IX86_BUILTIN_VPSHAD,
30272 IX86_BUILTIN_VPSHAQ,
30274 IX86_BUILTIN_VFRCZSS,
30275 IX86_BUILTIN_VFRCZSD,
30276 IX86_BUILTIN_VFRCZPS,
30277 IX86_BUILTIN_VFRCZPD,
30278 IX86_BUILTIN_VFRCZPS256,
30279 IX86_BUILTIN_VFRCZPD256,
30281 IX86_BUILTIN_VPCOMEQUB,
30282 IX86_BUILTIN_VPCOMNEUB,
30283 IX86_BUILTIN_VPCOMLTUB,
30284 IX86_BUILTIN_VPCOMLEUB,
30285 IX86_BUILTIN_VPCOMGTUB,
30286 IX86_BUILTIN_VPCOMGEUB,
30287 IX86_BUILTIN_VPCOMFALSEUB,
30288 IX86_BUILTIN_VPCOMTRUEUB,
30290 IX86_BUILTIN_VPCOMEQUW,
30291 IX86_BUILTIN_VPCOMNEUW,
30292 IX86_BUILTIN_VPCOMLTUW,
30293 IX86_BUILTIN_VPCOMLEUW,
30294 IX86_BUILTIN_VPCOMGTUW,
30295 IX86_BUILTIN_VPCOMGEUW,
30296 IX86_BUILTIN_VPCOMFALSEUW,
30297 IX86_BUILTIN_VPCOMTRUEUW,
30299 IX86_BUILTIN_VPCOMEQUD,
30300 IX86_BUILTIN_VPCOMNEUD,
30301 IX86_BUILTIN_VPCOMLTUD,
30302 IX86_BUILTIN_VPCOMLEUD,
30303 IX86_BUILTIN_VPCOMGTUD,
30304 IX86_BUILTIN_VPCOMGEUD,
30305 IX86_BUILTIN_VPCOMFALSEUD,
30306 IX86_BUILTIN_VPCOMTRUEUD,
30308 IX86_BUILTIN_VPCOMEQUQ,
30309 IX86_BUILTIN_VPCOMNEUQ,
30310 IX86_BUILTIN_VPCOMLTUQ,
30311 IX86_BUILTIN_VPCOMLEUQ,
30312 IX86_BUILTIN_VPCOMGTUQ,
30313 IX86_BUILTIN_VPCOMGEUQ,
30314 IX86_BUILTIN_VPCOMFALSEUQ,
30315 IX86_BUILTIN_VPCOMTRUEUQ,
30317 IX86_BUILTIN_VPCOMEQB,
30318 IX86_BUILTIN_VPCOMNEB,
30319 IX86_BUILTIN_VPCOMLTB,
30320 IX86_BUILTIN_VPCOMLEB,
30321 IX86_BUILTIN_VPCOMGTB,
30322 IX86_BUILTIN_VPCOMGEB,
30323 IX86_BUILTIN_VPCOMFALSEB,
30324 IX86_BUILTIN_VPCOMTRUEB,
30326 IX86_BUILTIN_VPCOMEQW,
30327 IX86_BUILTIN_VPCOMNEW,
30328 IX86_BUILTIN_VPCOMLTW,
30329 IX86_BUILTIN_VPCOMLEW,
30330 IX86_BUILTIN_VPCOMGTW,
30331 IX86_BUILTIN_VPCOMGEW,
30332 IX86_BUILTIN_VPCOMFALSEW,
30333 IX86_BUILTIN_VPCOMTRUEW,
30335 IX86_BUILTIN_VPCOMEQD,
30336 IX86_BUILTIN_VPCOMNED,
30337 IX86_BUILTIN_VPCOMLTD,
30338 IX86_BUILTIN_VPCOMLED,
30339 IX86_BUILTIN_VPCOMGTD,
30340 IX86_BUILTIN_VPCOMGED,
30341 IX86_BUILTIN_VPCOMFALSED,
30342 IX86_BUILTIN_VPCOMTRUED,
30344 IX86_BUILTIN_VPCOMEQQ,
30345 IX86_BUILTIN_VPCOMNEQ,
30346 IX86_BUILTIN_VPCOMLTQ,
30347 IX86_BUILTIN_VPCOMLEQ,
30348 IX86_BUILTIN_VPCOMGTQ,
30349 IX86_BUILTIN_VPCOMGEQ,
30350 IX86_BUILTIN_VPCOMFALSEQ,
30351 IX86_BUILTIN_VPCOMTRUEQ,
30353 /* LWP instructions. */
30354 IX86_BUILTIN_LLWPCB,
30355 IX86_BUILTIN_SLWPCB,
30356 IX86_BUILTIN_LWPVAL32,
30357 IX86_BUILTIN_LWPVAL64,
30358 IX86_BUILTIN_LWPINS32,
30359 IX86_BUILTIN_LWPINS64,
30361 IX86_BUILTIN_CLZS,
30363 /* RTM */
30364 IX86_BUILTIN_XBEGIN,
30365 IX86_BUILTIN_XEND,
30366 IX86_BUILTIN_XABORT,
30367 IX86_BUILTIN_XTEST,
30369 /* MPX */
30370 IX86_BUILTIN_BNDMK,
30371 IX86_BUILTIN_BNDSTX,
30372 IX86_BUILTIN_BNDLDX,
30373 IX86_BUILTIN_BNDCL,
30374 IX86_BUILTIN_BNDCU,
30375 IX86_BUILTIN_BNDRET,
30376 IX86_BUILTIN_BNDNARROW,
30377 IX86_BUILTIN_BNDINT,
30378 IX86_BUILTIN_SIZEOF,
30379 IX86_BUILTIN_BNDLOWER,
30380 IX86_BUILTIN_BNDUPPER,
30382 /* BMI instructions. */
30383 IX86_BUILTIN_BEXTR32,
30384 IX86_BUILTIN_BEXTR64,
30385 IX86_BUILTIN_CTZS,
30387 /* TBM instructions. */
30388 IX86_BUILTIN_BEXTRI32,
30389 IX86_BUILTIN_BEXTRI64,
30391 /* BMI2 instructions. */
30392 IX86_BUILTIN_BZHI32,
30393 IX86_BUILTIN_BZHI64,
30394 IX86_BUILTIN_PDEP32,
30395 IX86_BUILTIN_PDEP64,
30396 IX86_BUILTIN_PEXT32,
30397 IX86_BUILTIN_PEXT64,
30399 /* ADX instructions. */
30400 IX86_BUILTIN_ADDCARRYX32,
30401 IX86_BUILTIN_ADDCARRYX64,
30403 /* SBB instructions. */
30404 IX86_BUILTIN_SBB32,
30405 IX86_BUILTIN_SBB64,
30407 /* FSGSBASE instructions. */
30408 IX86_BUILTIN_RDFSBASE32,
30409 IX86_BUILTIN_RDFSBASE64,
30410 IX86_BUILTIN_RDGSBASE32,
30411 IX86_BUILTIN_RDGSBASE64,
30412 IX86_BUILTIN_WRFSBASE32,
30413 IX86_BUILTIN_WRFSBASE64,
30414 IX86_BUILTIN_WRGSBASE32,
30415 IX86_BUILTIN_WRGSBASE64,
30417 /* RDRND instructions. */
30418 IX86_BUILTIN_RDRAND16_STEP,
30419 IX86_BUILTIN_RDRAND32_STEP,
30420 IX86_BUILTIN_RDRAND64_STEP,
30422 /* RDSEED instructions. */
30423 IX86_BUILTIN_RDSEED16_STEP,
30424 IX86_BUILTIN_RDSEED32_STEP,
30425 IX86_BUILTIN_RDSEED64_STEP,
30427 /* F16C instructions. */
30428 IX86_BUILTIN_CVTPH2PS,
30429 IX86_BUILTIN_CVTPH2PS256,
30430 IX86_BUILTIN_CVTPS2PH,
30431 IX86_BUILTIN_CVTPS2PH256,
30433 /* CFString built-in for darwin */
30434 IX86_BUILTIN_CFSTRING,
30436 /* Builtins to get CPU type and supported features. */
30437 IX86_BUILTIN_CPU_INIT,
30438 IX86_BUILTIN_CPU_IS,
30439 IX86_BUILTIN_CPU_SUPPORTS,
30441 /* Read/write FLAGS register built-ins. */
30442 IX86_BUILTIN_READ_FLAGS,
30443 IX86_BUILTIN_WRITE_FLAGS,
30445 IX86_BUILTIN_MAX
30448 /* Table for the ix86 builtin decls. */
30449 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30451 /* Table of all of the builtin functions that are possible with different ISA's
30452 but are waiting to be built until a function is declared to use that
30453 ISA. */
30454 struct builtin_isa {
30455 const char *name; /* function name */
30456 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30457 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
30458 bool const_p; /* true if the declaration is constant */
30459 bool leaf_p; /* true if the declaration has leaf attribute */
30460 bool nothrow_p; /* true if the declaration has nothrow attribute */
30461 bool set_and_not_built_p;
30464 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30467 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
30468 of which isa_flags to use in the ix86_builtins_isa array. Stores the
30469 function decl in the ix86_builtins array. Returns the function decl or
30470 NULL_TREE, if the builtin was not added.
30472 If the front end has a special hook for builtin functions, delay adding
30473 builtin functions that aren't in the current ISA until the ISA is changed
30474 with function specific optimization. Doing so, can save about 300K for the
30475 default compiler. When the builtin is expanded, check at that time whether
30476 it is valid.
30478 If the front end doesn't have a special hook, record all builtins, even if
30479 it isn't an instruction set in the current ISA in case the user uses
30480 function specific options for a different ISA, so that we don't get scope
30481 errors if a builtin is added in the middle of a function scope. */
30483 static inline tree
30484 def_builtin (HOST_WIDE_INT mask, const char *name,
30485 enum ix86_builtin_func_type tcode,
30486 enum ix86_builtins code)
30488 tree decl = NULL_TREE;
30490 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30492 ix86_builtins_isa[(int) code].isa = mask;
30494 mask &= ~OPTION_MASK_ISA_64BIT;
30495 if (mask == 0
30496 || (mask & ix86_isa_flags) != 0
30497 || (lang_hooks.builtin_function
30498 == lang_hooks.builtin_function_ext_scope))
30501 tree type = ix86_get_builtin_func_type (tcode);
30502 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30503 NULL, NULL_TREE);
30504 ix86_builtins[(int) code] = decl;
30505 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30507 else
30509 ix86_builtins[(int) code] = NULL_TREE;
30510 ix86_builtins_isa[(int) code].tcode = tcode;
30511 ix86_builtins_isa[(int) code].name = name;
30512 ix86_builtins_isa[(int) code].leaf_p = false;
30513 ix86_builtins_isa[(int) code].nothrow_p = false;
30514 ix86_builtins_isa[(int) code].const_p = false;
30515 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30519 return decl;
30522 /* Like def_builtin, but also marks the function decl "const". */
30524 static inline tree
30525 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30526 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30528 tree decl = def_builtin (mask, name, tcode, code);
30529 if (decl)
30530 TREE_READONLY (decl) = 1;
30531 else
30532 ix86_builtins_isa[(int) code].const_p = true;
30534 return decl;
30537 /* Add any new builtin functions for a given ISA that may not have been
30538 declared. This saves a bit of space compared to adding all of the
30539 declarations to the tree, even if we didn't use them. */
30541 static void
30542 ix86_add_new_builtins (HOST_WIDE_INT isa)
30544 int i;
30546 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30548 if ((ix86_builtins_isa[i].isa & isa) != 0
30549 && ix86_builtins_isa[i].set_and_not_built_p)
30551 tree decl, type;
30553 /* Don't define the builtin again. */
30554 ix86_builtins_isa[i].set_and_not_built_p = false;
30556 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30557 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30558 type, i, BUILT_IN_MD, NULL,
30559 NULL_TREE);
30561 ix86_builtins[i] = decl;
30562 if (ix86_builtins_isa[i].const_p)
30563 TREE_READONLY (decl) = 1;
30564 if (ix86_builtins_isa[i].leaf_p)
30565 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30566 NULL_TREE);
30567 if (ix86_builtins_isa[i].nothrow_p)
30568 TREE_NOTHROW (decl) = 1;
30573 /* Bits for builtin_description.flag. */
30575 /* Set when we don't support the comparison natively, and should
30576 swap_comparison in order to support it. */
30577 #define BUILTIN_DESC_SWAP_OPERANDS 1
30579 struct builtin_description
30581 const HOST_WIDE_INT mask;
30582 const enum insn_code icode;
30583 const char *const name;
30584 const enum ix86_builtins code;
30585 const enum rtx_code comparison;
30586 const int flag;
30589 static const struct builtin_description bdesc_comi[] =
30591 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30592 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30593 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30594 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30595 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30596 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30597 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30598 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30599 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30600 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30601 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30602 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30603 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30605 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30606 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30607 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30608 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30609 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30610 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30611 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30612 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30613 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30614 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30617 static const struct builtin_description bdesc_pcmpestr[] =
30619 /* SSE4.2 */
30620 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30621 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30622 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30623 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30624 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30625 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30626 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30629 static const struct builtin_description bdesc_pcmpistr[] =
30631 /* SSE4.2 */
30632 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30633 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30634 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
30635 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
30636 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
30637 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
30638 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
30641 /* Special builtins with variable number of arguments. */
30642 static const struct builtin_description bdesc_special_args[] =
30644 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
30645 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
30646 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
30648 /* 80387 (for use internally for atomic compound assignment). */
30649 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
30650 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
30651 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
30652 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
30654 /* MMX */
30655 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30657 /* 3DNow! */
30658 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30660 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
30661 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
30662 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
30663 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30664 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30665 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30666 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30667 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30668 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30670 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30671 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30672 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30673 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30674 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30675 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30676 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30677 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30679 /* SSE */
30680 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30681 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30682 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30684 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30685 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30686 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30687 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30689 /* SSE or 3DNow!A */
30690 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30691 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
30693 /* SSE2 */
30694 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30695 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30696 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30697 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
30698 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30699 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
30700 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
30701 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
30702 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
30703 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30705 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30706 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30708 /* SSE3 */
30709 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30711 /* SSE4.1 */
30712 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
30714 /* SSE4A */
30715 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30716 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30718 /* AVX */
30719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
30720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
30722 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30723 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30724 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
30726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
30728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
30734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
30737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
30741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
30742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
30743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
30744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
30745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
30746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
30747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
30749 /* AVX2 */
30750 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
30751 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
30752 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
30753 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
30754 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
30755 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
30756 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
30757 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
30758 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
30760 /* AVX512F */
30761 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30762 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30763 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30764 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30765 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30766 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30767 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30768 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30769 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30770 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30771 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30772 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30773 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30774 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30775 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30776 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30777 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30778 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30779 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30780 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30781 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
30782 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
30783 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
30784 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
30785 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30786 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30787 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30788 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30789 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30790 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30791 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30792 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30793 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30794 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30795 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30796 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30797 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30798 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30799 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30800 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30801 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30802 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30803 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30804 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30805 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30806 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30807 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30809 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
30810 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
30811 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
30812 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
30813 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
30814 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
30816 /* FSGSBASE */
30817 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30818 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30819 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30820 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30821 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30822 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30823 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30824 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30826 /* RTM */
30827 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30828 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
30829 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
30831 /* AVX512BW */
30832 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
30833 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
30834 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
30835 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
30837 /* AVX512VL */
30838 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
30839 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
30840 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
30841 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
30842 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30843 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30844 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30845 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30846 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30847 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30848 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30849 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30850 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30851 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30852 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30853 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30854 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30855 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30856 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30857 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30858 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30859 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30860 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30861 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30862 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30863 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30864 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30865 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30866 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30867 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30868 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30869 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30870 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30871 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30872 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30873 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30874 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
30875 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
30876 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
30877 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
30878 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30879 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30880 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30881 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30882 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30883 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30884 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30885 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30886 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30887 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30888 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30889 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30890 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30891 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30892 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30893 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30894 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30895 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30896 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30897 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30898 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30899 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30900 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30901 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30902 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30903 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30904 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30905 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30906 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30907 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30908 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30909 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30910 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30911 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30912 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30913 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30914 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30915 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30916 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30917 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30918 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30919 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30920 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30921 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30922 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30923 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30924 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30925 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30926 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30927 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30928 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30929 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30930 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30931 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30933 /* PCOMMIT. */
30934 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
30937 /* Builtins with variable number of arguments. */
30938 static const struct builtin_description bdesc_args[] =
30940 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
30941 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
30942 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
30943 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
30944 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
30945 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
30946 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
30948 /* MMX */
30949 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30950 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30951 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30952 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30953 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30954 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30956 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30957 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30958 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30959 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30960 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30961 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30962 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30963 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30965 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30966 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30968 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30969 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30970 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30971 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30973 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30974 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30975 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30976 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30977 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30978 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30980 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30981 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30982 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30983 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30984 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
30985 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
30987 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
30988 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
30989 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
30991 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
30993 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
30994 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
30995 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
30996 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
30997 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
30998 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31000 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31001 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31002 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31003 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31004 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31005 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31007 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31008 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31009 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31010 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31012 /* 3DNow! */
31013 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31014 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31015 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31016 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31018 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31019 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31020 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31021 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31022 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31023 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31024 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31025 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31026 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31027 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31028 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31029 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31030 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31031 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31032 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31034 /* 3DNow!A */
31035 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31036 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31037 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31038 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31039 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31040 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31042 /* SSE */
31043 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
31044 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31045 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31046 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31047 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31048 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31049 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31050 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31051 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31052 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31053 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31054 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31056 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31058 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31059 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31060 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31061 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31062 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31063 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31064 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31065 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31067 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31068 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31069 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31070 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31071 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31072 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31073 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31074 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31075 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31076 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31077 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
31078 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31079 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31080 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31081 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31082 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31083 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31084 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31085 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31086 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31088 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31089 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31090 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31091 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31093 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31094 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31095 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31096 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31098 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31100 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31101 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31102 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31103 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31104 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31106 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
31107 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
31108 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
31110 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
31112 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31113 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31114 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31116 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
31117 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
31119 /* SSE MMX or 3Dnow!A */
31120 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31121 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31122 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31124 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31125 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31126 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31127 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31129 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
31130 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
31132 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
31134 /* SSE2 */
31135 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31137 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
31138 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
31139 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31140 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
31141 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
31143 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31144 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31145 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
31146 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31147 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31149 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
31151 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31152 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31153 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31154 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31156 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31157 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
31158 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31160 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31161 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31162 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31163 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31164 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31165 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31166 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31167 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31169 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31170 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31171 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31172 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31173 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
31174 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31175 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31176 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31177 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31178 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31179 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31180 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31181 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31182 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31183 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31184 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31185 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31186 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31187 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31188 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31190 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31191 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31192 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31193 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31195 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31196 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31197 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31198 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31200 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31202 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31203 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31204 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31206 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31208 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31209 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31210 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31211 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31212 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31213 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31214 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31215 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31217 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31218 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31219 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31220 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31221 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31222 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31223 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31224 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31226 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31227 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
31229 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31230 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31231 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31232 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31234 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31235 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31237 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31238 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31239 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31240 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31241 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31242 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31244 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31245 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31246 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31247 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31249 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31250 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31251 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31252 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31253 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31254 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31255 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31256 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31258 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31259 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31260 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31262 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31263 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
31265 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
31266 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31268 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
31270 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
31271 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
31272 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
31273 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
31275 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31276 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31277 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31278 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31279 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31280 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31281 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31283 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31284 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31285 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31286 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31287 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31288 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31289 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31291 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31292 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31293 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31294 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31296 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
31297 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31298 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31300 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
31302 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31304 /* SSE2 MMX */
31305 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31306 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31308 /* SSE3 */
31309 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
31310 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31312 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31313 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31314 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31315 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31316 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31317 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31319 /* SSSE3 */
31320 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31321 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
31322 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31323 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
31324 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31325 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31327 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31328 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31329 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31330 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31331 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31332 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31333 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31334 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31335 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31336 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31337 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31338 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31339 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
31340 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
31341 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31342 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31343 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31344 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31345 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31346 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31347 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31348 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31349 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31350 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31352 /* SSSE3. */
31353 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
31354 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
31356 /* SSE4.1 */
31357 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31358 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31359 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
31360 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
31361 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31362 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31363 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31364 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
31365 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
31366 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
31368 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31369 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31370 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31371 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31372 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31373 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31374 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31375 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31376 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31377 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31378 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31379 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31380 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31382 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31383 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31384 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31385 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31386 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31387 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31388 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31389 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31390 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31391 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31392 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31393 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31395 /* SSE4.1 */
31396 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31397 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31398 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31399 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31401 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
31402 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
31403 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
31404 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
31406 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31407 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31409 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31410 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31412 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
31413 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
31414 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
31415 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
31417 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
31418 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
31420 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31421 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31423 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31424 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31425 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31427 /* SSE4.2 */
31428 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31429 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
31430 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
31431 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31432 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31434 /* SSE4A */
31435 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
31436 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
31437 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
31438 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31440 /* AES */
31441 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
31442 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31444 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31445 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31446 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31447 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31449 /* PCLMUL */
31450 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
31452 /* AVX */
31453 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31454 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31455 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31456 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31457 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31458 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31459 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31460 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31461 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31462 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31463 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31464 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31465 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31466 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31467 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31468 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31469 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31470 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31471 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31472 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31473 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31474 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31475 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31476 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31477 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31478 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31480 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
31481 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
31482 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
31483 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31485 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31486 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31487 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
31488 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
31489 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31490 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31491 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31492 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31493 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31494 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31495 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31496 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31497 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31498 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
31499 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
31500 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
31501 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
31502 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
31503 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
31504 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31505 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
31506 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31507 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31508 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31509 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31510 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31511 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31512 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31513 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31514 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31515 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31516 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31517 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31518 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31520 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31521 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31522 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31524 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31525 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31526 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31527 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31528 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31530 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31532 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31533 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31535 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31536 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31537 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31538 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31540 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31541 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31543 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31544 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31546 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31547 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31548 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31549 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31551 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31552 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31554 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31555 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31557 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31558 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31559 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31560 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31562 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31563 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31564 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31565 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31566 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31567 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31569 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31570 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31571 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31572 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31573 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31574 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31575 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31576 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31577 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31578 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31579 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31580 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31581 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31582 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31583 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31585 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
31586 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31588 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31589 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31591 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31593 /* AVX2 */
31594 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31595 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31596 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31597 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31598 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31599 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31600 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31601 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31602 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31603 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31604 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31605 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31606 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31607 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31608 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31609 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31610 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31611 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31612 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31613 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31614 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31615 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31616 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31617 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31618 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31619 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31620 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31621 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31622 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31623 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31624 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31625 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31626 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31627 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31628 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31629 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31630 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31631 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31632 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31633 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31634 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31635 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31636 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31637 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31638 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31639 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31640 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31641 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31642 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31643 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31644 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31645 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
31646 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31647 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31648 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31649 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31650 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31651 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31652 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31653 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31654 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31655 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31656 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31657 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31658 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31659 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31660 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31661 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31662 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31663 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31664 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31665 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31666 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31667 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31668 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
31669 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31670 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31671 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31672 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31673 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31674 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31675 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31676 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31677 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31678 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31679 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31680 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31681 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31682 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31683 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31684 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31685 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31686 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31687 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31688 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31689 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31690 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31691 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31692 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31693 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31694 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31695 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31696 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31697 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31698 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31699 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31700 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31701 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31702 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31703 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31704 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31705 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31706 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31707 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31708 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31709 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31710 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31711 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31712 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31713 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31714 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31715 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
31716 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
31717 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31718 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31719 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31720 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31721 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31722 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31723 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31724 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31725 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31726 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
31727 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
31728 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
31729 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
31730 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31731 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31732 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31733 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31734 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31735 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31736 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31737 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31738 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31739 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31741 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31743 /* BMI */
31744 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31745 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31746 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31748 /* TBM */
31749 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31750 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31752 /* F16C */
31753 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
31754 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
31755 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
31756 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
31758 /* BMI2 */
31759 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31760 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31761 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31762 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31763 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31764 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31766 /* AVX512F */
31767 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
31768 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
31769 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
31770 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
31771 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
31772 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
31773 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31774 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31775 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31776 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31777 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31778 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31779 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31780 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
31781 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31782 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
31783 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
31784 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31785 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31786 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31787 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31788 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31789 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31790 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
31791 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31792 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
31793 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31794 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31795 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31796 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31797 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
31798 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
31799 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
31800 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
31801 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
31802 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
31803 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
31804 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
31805 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31806 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31807 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31808 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31809 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31810 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31811 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31812 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31813 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31814 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31815 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31816 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31817 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31818 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31819 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31820 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31821 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
31822 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
31823 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
31824 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
31825 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
31826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31827 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31829 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31831 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31832 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31833 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31834 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31835 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31836 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31837 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31838 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31839 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31840 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31841 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31842 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31843 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31844 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31845 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31846 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31847 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31848 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31857 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31858 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31860 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31861 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31862 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31863 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31864 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31865 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31866 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31867 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31868 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31869 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31870 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31871 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31872 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31873 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31874 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31875 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31876 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31877 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31878 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31879 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31880 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31881 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31882 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31883 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31884 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31885 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31886 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31887 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31888 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31889 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31890 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31891 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31892 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31893 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31894 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31895 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31896 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31897 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31898 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31899 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31900 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31901 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31902 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31903 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31904 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31905 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31906 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31907 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31908 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31909 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31910 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31911 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31912 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31913 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31914 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31915 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31916 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31917 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31918 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
31922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
31923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
31924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
31925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
31930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
31931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
31932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
31933 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31934 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31935 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31936 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
31938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
31944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
31945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
31950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
31951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
31952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
31953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
31960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
31961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
31962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
31964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
31965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
31966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
31967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
31968 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
31969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
31970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
31971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
31973 /* Mask arithmetic operations */
31974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
31977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
31981 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31982 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31983 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
31985 /* SHA */
31986 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31987 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31988 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31989 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31990 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31991 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31992 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
31994 /* AVX512VL. */
31995 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
31996 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
31997 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
31998 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
31999 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32000 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32001 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32002 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32003 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32004 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32005 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32006 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32007 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32008 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32009 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32010 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32011 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32012 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32013 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32014 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32015 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32016 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32017 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32018 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32019 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32020 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32021 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32022 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32023 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32024 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32025 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32026 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32027 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32028 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32029 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32030 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32031 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32032 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32033 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32034 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32035 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32036 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32037 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32038 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32039 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32040 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32041 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32042 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32043 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32044 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32045 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32046 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32047 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32048 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
32049 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
32050 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
32051 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
32052 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32053 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32054 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32055 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32056 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32057 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32058 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32059 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32060 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32061 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32062 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32063 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32064 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32065 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32066 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32067 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32068 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32069 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32070 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32071 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32072 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32073 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32074 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32075 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32076 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32077 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32078 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32079 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32080 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32081 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32082 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32083 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32084 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
32085 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
32086 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
32087 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
32088 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32089 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
32090 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
32091 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
32092 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32093 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
32094 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32095 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
32096 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32097 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
32098 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32099 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
32100 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32101 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
32102 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32103 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32104 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32105 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
32106 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
32107 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
32108 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
32109 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32110 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32111 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32112 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32113 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32114 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32115 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32116 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32117 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32118 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32119 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32120 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32121 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32122 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32123 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32124 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32125 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32126 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32127 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32128 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32129 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32130 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32131 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32132 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32133 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32134 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32135 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32136 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32137 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32138 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32139 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32140 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32141 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32142 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32143 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32144 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32145 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32146 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32147 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32148 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32149 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32150 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32151 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32152 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32153 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32154 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32155 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32156 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32157 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32158 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32159 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32160 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32161 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32162 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32163 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32164 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32165 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32166 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32167 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32168 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32169 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32170 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32171 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32172 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32173 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32174 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32175 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32176 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32177 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32178 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32179 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32180 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32181 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32182 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32183 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32184 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32185 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32186 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32187 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32188 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32189 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32190 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32191 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32192 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32193 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32194 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32195 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32196 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32197 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32198 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32199 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32200 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32201 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32202 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32203 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32204 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32205 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32206 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32207 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32208 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32209 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32210 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32211 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32212 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32213 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32214 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32215 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32216 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32217 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32218 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32219 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32220 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32221 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32222 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32223 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32224 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32225 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32226 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32227 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32228 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32229 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32230 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32231 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32232 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32233 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32234 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32235 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32236 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32237 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32238 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32239 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32240 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32241 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32242 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32243 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32244 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32245 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32246 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32247 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32248 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32249 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32250 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32251 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32252 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32253 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32254 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32255 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32256 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32257 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32258 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32259 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32260 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32261 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32262 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32263 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32264 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32265 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32266 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32267 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32268 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32269 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32270 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32271 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32272 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32273 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32274 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32275 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32276 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32277 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32278 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32279 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32280 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32281 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32282 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32283 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32284 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32285 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32286 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32287 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32288 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32289 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32290 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32291 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32292 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32293 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32294 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32295 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32296 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32297 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32298 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32299 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32300 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32301 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32302 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32303 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32304 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32305 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32306 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32307 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32308 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32309 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32310 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32311 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32312 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32313 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32314 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32315 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32316 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32317 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32318 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32319 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32320 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32321 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32322 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32323 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32324 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32325 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32326 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32327 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32328 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32329 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32330 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32331 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32332 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32333 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32334 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32335 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32336 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32337 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32338 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32339 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32340 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32341 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32342 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32343 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32344 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32345 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32346 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32347 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32348 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32349 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32350 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32351 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32352 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32353 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32354 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32355 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32356 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32357 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32358 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32359 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32360 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32361 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32362 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32363 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32364 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32365 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32366 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32367 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32368 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32369 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32370 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32371 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32372 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32373 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32374 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32375 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32376 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32377 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
32378 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
32379 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32380 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32381 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
32382 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
32383 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
32384 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
32385 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32386 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32387 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32388 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32389 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32390 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32391 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32392 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32393 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32394 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32395 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32396 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32397 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32398 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32399 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32400 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32401 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32402 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32403 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32404 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32405 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32406 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32407 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32408 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32409 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32410 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32411 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32412 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32413 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32414 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32415 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32416 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32417 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32418 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32419 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32420 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32421 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32422 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32423 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32424 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32425 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32426 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32427 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32428 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32429 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32430 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32431 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32432 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32433 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32434 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32435 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32436 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32437 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32438 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32439 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32440 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32441 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32442 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32443 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32444 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32445 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32446 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32447 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32448 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32449 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32450 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32451 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32452 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32453 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32454 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32455 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32456 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32457 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32458 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32459 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32460 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32461 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32462 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32463 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32464 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32465 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32466 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32467 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32468 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32469 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32470 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32471 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32472 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32473 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32474 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32475 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32476 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32477 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32478 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32479 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32480 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32481 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32482 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32483 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32484 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32485 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
32486 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
32487 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
32488 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
32489 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
32490 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
32491 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
32492 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
32493 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
32494 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
32495 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
32496 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
32497 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
32498 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
32499 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
32500 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
32501 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
32502 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
32503 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
32504 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
32505 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
32506 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
32507 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32508 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32509 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32510 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32511 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32512 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32513 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32514 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32515 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32516 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32517 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32518 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32519 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32520 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32521 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32522 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32523 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32524 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32525 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32526 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32527 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32528 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32529 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32530 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32531 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32532 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32533 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32534 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32535 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32536 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32537 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32538 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32539 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
32540 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
32541 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
32542 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
32543 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32544 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32545 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32546 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32547 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32548 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32549 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32550 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32551 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32552 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32553 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32554 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32555 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32556 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32557 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32558 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32559 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32560 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32561 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32562 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32563 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32564 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32565 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32566 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32567 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32568 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32569 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32570 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32571 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32572 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32573 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32574 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32575 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32576 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32577 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32578 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32579 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32580 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32581 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32582 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32583 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32584 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32585 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32586 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32587 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32588 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32589 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32590 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32591 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32592 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32593 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32594 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32595 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32596 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32597 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32598 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32599 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32600 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32601 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32602 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32603 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32604 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32605 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32606 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32607 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32608 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32609 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32610 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32611 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32612 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32613 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32614 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32615 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32616 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32617 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
32618 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
32619 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
32620 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
32621 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
32622 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
32623 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32624 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32625 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32626 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32627 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32628 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32629 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32630 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32631 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32632 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32633 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32634 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32635 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32636 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32637 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32638 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32639 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32640 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32641 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32642 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32643 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32644 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32645 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32646 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32647 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32648 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32649 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32650 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32651 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32652 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32653 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32654 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32655 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32656 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32657 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32658 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32659 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32660 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32661 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32662 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32663 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32664 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32665 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32666 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32667 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32668 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32669 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32670 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32671 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32672 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32673 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32674 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32675 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32676 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32677 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32678 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32679 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32680 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32681 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32682 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
32683 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
32684 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32685 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32686 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32687 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32688 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32689 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32690 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32691 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32692 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32693 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32694 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32695 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
32696 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
32697 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32698 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32699 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32700 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32701 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32702 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32703 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32704 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32705 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
32706 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
32708 /* AVX512DQ. */
32709 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32710 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32711 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
32712 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
32713 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
32714 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
32715 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
32716 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
32717 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
32718 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
32719 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32720 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32721 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32722 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32723 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32724 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32725 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32726 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32727 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32728 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
32729 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32730 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
32731 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
32732 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
32733 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
32734 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
32735 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
32736 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
32737 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
32738 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
32739 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
32741 /* AVX512BW. */
32742 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
32743 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
32744 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32745 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32746 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32747 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32748 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
32749 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
32750 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32751 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32752 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
32753 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
32754 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
32755 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
32756 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
32757 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
32758 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32759 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32760 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32761 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32762 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32763 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32764 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32765 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32766 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32767 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32768 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32769 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32770 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32771 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32772 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32773 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32774 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32775 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32776 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32777 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32778 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32779 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32780 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32781 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32782 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32783 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32784 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32785 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32786 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32787 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32788 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32789 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32790 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32791 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32792 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32793 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32794 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32795 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32796 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32797 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32798 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
32799 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
32800 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32801 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32802 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32803 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32804 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32805 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32806 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32807 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32808 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32809 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32810 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32811 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32812 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
32813 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
32814 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
32815 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
32816 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32817 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32818 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32819 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32820 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32821 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32822 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32823 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32824 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32825 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32826 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32827 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32828 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32829 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32830 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32831 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32832 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32834 /* AVX512IFMA */
32835 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32836 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32837 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32838 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32839 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32840 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32841 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32842 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32843 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32844 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32845 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32846 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32848 /* AVX512VBMI */
32849 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32850 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32851 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32852 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32853 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32854 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32855 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32856 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32857 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32858 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32859 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32860 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32861 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32862 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32863 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32866 /* Builtins with rounding support. */
32867 static const struct builtin_description bdesc_round_args[] =
32869 /* AVX512F */
32870 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32871 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32872 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32873 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32874 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
32875 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
32876 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
32877 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
32878 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
32879 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
32880 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32881 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32882 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
32883 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32884 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
32885 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32886 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
32887 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32888 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
32889 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
32890 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
32891 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
32892 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
32893 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32894 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32895 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32896 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32897 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32898 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
32899 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
32900 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
32901 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32902 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32903 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32904 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32905 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32906 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32907 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32908 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32909 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32910 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32911 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
32912 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
32913 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32914 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32915 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32916 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32917 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
32918 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
32919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
32934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
32935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
32950 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
32951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
32952 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
32953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
32954 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
32955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
32956 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
32957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
32958 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
32959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
32960 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
32961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
32962 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
32963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
32964 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
32965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
32972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
32973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32981 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32982 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32983 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32984 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32985 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32986 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32988 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32990 /* AVX512ER */
32991 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32992 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32993 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32994 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32995 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32996 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32997 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32998 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32999 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33000 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33002 /* AVX512DQ. */
33003 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33004 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33005 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33006 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33007 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33008 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33009 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33010 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33011 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33012 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33013 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33014 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33015 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33016 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33017 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
33018 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
33021 /* Bultins for MPX. */
33022 static const struct builtin_description bdesc_mpx[] =
33024 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
33025 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33026 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33029 /* Const builtins for MPX. */
33030 static const struct builtin_description bdesc_mpx_const[] =
33032 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
33033 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
33034 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
33035 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
33036 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
33037 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
33038 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
33039 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
33042 /* FMA4 and XOP. */
33043 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
33044 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
33045 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
33046 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
33047 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
33048 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
33049 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
33050 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
33051 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
33052 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
33053 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
33054 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
33055 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
33056 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
33057 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
33058 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
33059 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
33060 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
33061 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
33062 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
33063 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
33064 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
33065 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
33066 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
33067 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
33068 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
33069 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
33070 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
33071 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
33072 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
33073 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
33074 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
33075 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
33076 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
33077 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
33078 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
33079 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
33080 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
33081 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
33082 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
33083 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
33084 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
33085 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
33086 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
33087 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
33088 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
33089 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
33090 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
33091 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
33092 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
33093 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
33094 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
33096 static const struct builtin_description bdesc_multi_arg[] =
33098 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
33099 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
33100 UNKNOWN, (int)MULTI_ARG_3_SF },
33101 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
33102 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
33103 UNKNOWN, (int)MULTI_ARG_3_DF },
33105 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
33106 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
33107 UNKNOWN, (int)MULTI_ARG_3_SF },
33108 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
33109 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
33110 UNKNOWN, (int)MULTI_ARG_3_DF },
33112 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
33113 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
33114 UNKNOWN, (int)MULTI_ARG_3_SF },
33115 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
33116 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
33117 UNKNOWN, (int)MULTI_ARG_3_DF },
33118 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
33119 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
33120 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33121 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
33122 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
33123 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33125 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
33126 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
33127 UNKNOWN, (int)MULTI_ARG_3_SF },
33128 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
33129 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
33130 UNKNOWN, (int)MULTI_ARG_3_DF },
33131 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
33132 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
33133 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33134 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
33135 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
33136 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33138 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
33139 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
33140 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
33141 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
33142 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
33143 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
33144 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
33146 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33147 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33148 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
33149 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
33150 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
33151 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
33152 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
33154 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
33156 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33157 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33158 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33159 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33160 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33161 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33162 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33163 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33164 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33165 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33166 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33167 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33169 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33170 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
33171 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
33172 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
33173 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
33174 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
33175 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
33176 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
33177 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33178 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
33179 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
33180 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
33181 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33182 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
33183 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
33184 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
33186 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
33187 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
33188 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
33189 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
33190 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
33191 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
33193 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33194 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33195 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33196 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33197 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33198 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33199 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33200 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33201 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33202 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33203 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33204 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33205 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33206 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33207 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33209 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
33210 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33211 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33212 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
33213 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
33214 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
33215 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
33217 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
33218 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33219 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33220 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
33221 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
33222 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
33223 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
33225 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
33226 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33227 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33228 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
33229 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
33230 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
33231 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
33233 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33234 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33235 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33236 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
33237 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
33238 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
33239 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
33241 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
33242 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33243 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33244 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
33245 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
33246 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
33247 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
33249 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
33250 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33251 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33252 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
33253 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
33254 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
33255 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
33257 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
33258 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33259 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33260 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
33261 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
33262 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
33263 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
33265 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33266 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33267 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33268 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
33269 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
33270 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
33271 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
33273 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33274 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33275 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33276 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33277 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33278 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33279 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33280 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33282 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33283 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33284 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33285 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33286 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33287 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33288 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33289 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33291 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
33292 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
33293 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
33294 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
33298 /* TM vector builtins. */
33300 /* Reuse the existing x86-specific `struct builtin_description' cause
33301 we're lazy. Add casts to make them fit. */
33302 static const struct builtin_description bdesc_tm[] =
33304 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33305 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33306 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33307 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33308 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33309 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33310 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33312 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33313 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33314 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33315 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33316 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33317 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33318 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33320 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33321 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33322 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33323 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33324 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33325 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33326 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33328 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
33329 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
33330 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
33333 /* TM callbacks. */
33335 /* Return the builtin decl needed to load a vector of TYPE. */
33337 static tree
33338 ix86_builtin_tm_load (tree type)
33340 if (TREE_CODE (type) == VECTOR_TYPE)
33342 switch (tree_to_uhwi (TYPE_SIZE (type)))
33344 case 64:
33345 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
33346 case 128:
33347 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
33348 case 256:
33349 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
33352 return NULL_TREE;
33355 /* Return the builtin decl needed to store a vector of TYPE. */
33357 static tree
33358 ix86_builtin_tm_store (tree type)
33360 if (TREE_CODE (type) == VECTOR_TYPE)
33362 switch (tree_to_uhwi (TYPE_SIZE (type)))
33364 case 64:
33365 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
33366 case 128:
33367 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
33368 case 256:
33369 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
33372 return NULL_TREE;
33375 /* Initialize the transactional memory vector load/store builtins. */
33377 static void
33378 ix86_init_tm_builtins (void)
33380 enum ix86_builtin_func_type ftype;
33381 const struct builtin_description *d;
33382 size_t i;
33383 tree decl;
33384 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
33385 tree attrs_log, attrs_type_log;
33387 if (!flag_tm)
33388 return;
33390 /* If there are no builtins defined, we must be compiling in a
33391 language without trans-mem support. */
33392 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
33393 return;
33395 /* Use whatever attributes a normal TM load has. */
33396 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
33397 attrs_load = DECL_ATTRIBUTES (decl);
33398 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33399 /* Use whatever attributes a normal TM store has. */
33400 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
33401 attrs_store = DECL_ATTRIBUTES (decl);
33402 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33403 /* Use whatever attributes a normal TM log has. */
33404 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
33405 attrs_log = DECL_ATTRIBUTES (decl);
33406 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33408 for (i = 0, d = bdesc_tm;
33409 i < ARRAY_SIZE (bdesc_tm);
33410 i++, d++)
33412 if ((d->mask & ix86_isa_flags) != 0
33413 || (lang_hooks.builtin_function
33414 == lang_hooks.builtin_function_ext_scope))
33416 tree type, attrs, attrs_type;
33417 enum built_in_function code = (enum built_in_function) d->code;
33419 ftype = (enum ix86_builtin_func_type) d->flag;
33420 type = ix86_get_builtin_func_type (ftype);
33422 if (BUILTIN_TM_LOAD_P (code))
33424 attrs = attrs_load;
33425 attrs_type = attrs_type_load;
33427 else if (BUILTIN_TM_STORE_P (code))
33429 attrs = attrs_store;
33430 attrs_type = attrs_type_store;
33432 else
33434 attrs = attrs_log;
33435 attrs_type = attrs_type_log;
33437 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
33438 /* The builtin without the prefix for
33439 calling it directly. */
33440 d->name + strlen ("__builtin_"),
33441 attrs);
33442 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
33443 set the TYPE_ATTRIBUTES. */
33444 type_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
33446 set_builtin_decl (code, decl, false);
33451 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
33452 in the current target ISA to allow the user to compile particular modules
33453 with different target specific options that differ from the command line
33454 options. */
33455 static void
33456 ix86_init_mmx_sse_builtins (void)
33458 const struct builtin_description * d;
33459 enum ix86_builtin_func_type ftype;
33460 size_t i;
33462 /* Add all special builtins with variable number of operands. */
33463 for (i = 0, d = bdesc_special_args;
33464 i < ARRAY_SIZE (bdesc_special_args);
33465 i++, d++)
33467 if (d->name == 0)
33468 continue;
33470 ftype = (enum ix86_builtin_func_type) d->flag;
33471 def_builtin (d->mask, d->name, ftype, d->code);
33474 /* Add all builtins with variable number of operands. */
33475 for (i = 0, d = bdesc_args;
33476 i < ARRAY_SIZE (bdesc_args);
33477 i++, d++)
33479 if (d->name == 0)
33480 continue;
33482 ftype = (enum ix86_builtin_func_type) d->flag;
33483 def_builtin_const (d->mask, d->name, ftype, d->code);
33486 /* Add all builtins with rounding. */
33487 for (i = 0, d = bdesc_round_args;
33488 i < ARRAY_SIZE (bdesc_round_args);
33489 i++, d++)
33491 if (d->name == 0)
33492 continue;
33494 ftype = (enum ix86_builtin_func_type) d->flag;
33495 def_builtin_const (d->mask, d->name, ftype, d->code);
33498 /* pcmpestr[im] insns. */
33499 for (i = 0, d = bdesc_pcmpestr;
33500 i < ARRAY_SIZE (bdesc_pcmpestr);
33501 i++, d++)
33503 if (d->code == IX86_BUILTIN_PCMPESTRM128)
33504 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
33505 else
33506 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
33507 def_builtin_const (d->mask, d->name, ftype, d->code);
33510 /* pcmpistr[im] insns. */
33511 for (i = 0, d = bdesc_pcmpistr;
33512 i < ARRAY_SIZE (bdesc_pcmpistr);
33513 i++, d++)
33515 if (d->code == IX86_BUILTIN_PCMPISTRM128)
33516 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
33517 else
33518 ftype = INT_FTYPE_V16QI_V16QI_INT;
33519 def_builtin_const (d->mask, d->name, ftype, d->code);
33522 /* comi/ucomi insns. */
33523 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
33525 if (d->mask == OPTION_MASK_ISA_SSE2)
33526 ftype = INT_FTYPE_V2DF_V2DF;
33527 else
33528 ftype = INT_FTYPE_V4SF_V4SF;
33529 def_builtin_const (d->mask, d->name, ftype, d->code);
33532 /* SSE */
33533 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
33534 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
33535 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
33536 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
33538 /* SSE or 3DNow!A */
33539 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33540 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
33541 IX86_BUILTIN_MASKMOVQ);
33543 /* SSE2 */
33544 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
33545 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
33547 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
33548 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
33549 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
33550 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
33552 /* SSE3. */
33553 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
33554 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
33555 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
33556 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
33558 /* AES */
33559 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
33560 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
33561 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33562 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33563 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33564 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33565 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33566 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33567 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33568 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33569 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33570 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33572 /* PCLMUL */
33573 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33574 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33576 /* RDRND */
33577 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33578 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33579 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33580 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33581 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33582 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33583 IX86_BUILTIN_RDRAND64_STEP);
33585 /* AVX2 */
33586 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33587 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33588 IX86_BUILTIN_GATHERSIV2DF);
33590 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33591 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33592 IX86_BUILTIN_GATHERSIV4DF);
33594 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33595 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33596 IX86_BUILTIN_GATHERDIV2DF);
33598 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33599 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33600 IX86_BUILTIN_GATHERDIV4DF);
33602 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33603 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33604 IX86_BUILTIN_GATHERSIV4SF);
33606 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33607 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33608 IX86_BUILTIN_GATHERSIV8SF);
33610 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33611 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33612 IX86_BUILTIN_GATHERDIV4SF);
33614 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33615 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33616 IX86_BUILTIN_GATHERDIV8SF);
33618 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33619 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33620 IX86_BUILTIN_GATHERSIV2DI);
33622 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33623 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33624 IX86_BUILTIN_GATHERSIV4DI);
33626 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33627 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33628 IX86_BUILTIN_GATHERDIV2DI);
33630 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33631 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33632 IX86_BUILTIN_GATHERDIV4DI);
33634 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
33635 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
33636 IX86_BUILTIN_GATHERSIV4SI);
33638 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
33639 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
33640 IX86_BUILTIN_GATHERSIV8SI);
33642 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
33643 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
33644 IX86_BUILTIN_GATHERDIV4SI);
33646 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
33647 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
33648 IX86_BUILTIN_GATHERDIV8SI);
33650 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
33651 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
33652 IX86_BUILTIN_GATHERALTSIV4DF);
33654 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
33655 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
33656 IX86_BUILTIN_GATHERALTDIV8SF);
33658 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
33659 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
33660 IX86_BUILTIN_GATHERALTSIV4DI);
33662 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
33663 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
33664 IX86_BUILTIN_GATHERALTDIV8SI);
33666 /* AVX512F */
33667 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
33668 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
33669 IX86_BUILTIN_GATHER3SIV16SF);
33671 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
33672 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
33673 IX86_BUILTIN_GATHER3SIV8DF);
33675 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
33676 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
33677 IX86_BUILTIN_GATHER3DIV16SF);
33679 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
33680 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
33681 IX86_BUILTIN_GATHER3DIV8DF);
33683 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
33684 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
33685 IX86_BUILTIN_GATHER3SIV16SI);
33687 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
33688 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
33689 IX86_BUILTIN_GATHER3SIV8DI);
33691 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
33692 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
33693 IX86_BUILTIN_GATHER3DIV16SI);
33695 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
33696 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
33697 IX86_BUILTIN_GATHER3DIV8DI);
33699 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
33700 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
33701 IX86_BUILTIN_GATHER3ALTSIV8DF);
33703 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
33704 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
33705 IX86_BUILTIN_GATHER3ALTDIV16SF);
33707 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
33708 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
33709 IX86_BUILTIN_GATHER3ALTSIV8DI);
33711 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
33712 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
33713 IX86_BUILTIN_GATHER3ALTDIV16SI);
33715 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
33716 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
33717 IX86_BUILTIN_SCATTERSIV16SF);
33719 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
33720 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
33721 IX86_BUILTIN_SCATTERSIV8DF);
33723 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
33724 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
33725 IX86_BUILTIN_SCATTERDIV16SF);
33727 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
33728 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
33729 IX86_BUILTIN_SCATTERDIV8DF);
33731 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
33732 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
33733 IX86_BUILTIN_SCATTERSIV16SI);
33735 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
33736 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
33737 IX86_BUILTIN_SCATTERSIV8DI);
33739 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
33740 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
33741 IX86_BUILTIN_SCATTERDIV16SI);
33743 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
33744 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
33745 IX86_BUILTIN_SCATTERDIV8DI);
33747 /* AVX512VL */
33748 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
33749 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
33750 IX86_BUILTIN_GATHER3SIV2DF);
33752 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
33753 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
33754 IX86_BUILTIN_GATHER3SIV4DF);
33756 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
33757 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
33758 IX86_BUILTIN_GATHER3DIV2DF);
33760 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
33761 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
33762 IX86_BUILTIN_GATHER3DIV4DF);
33764 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
33765 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
33766 IX86_BUILTIN_GATHER3SIV4SF);
33768 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
33769 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
33770 IX86_BUILTIN_GATHER3SIV8SF);
33772 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
33773 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
33774 IX86_BUILTIN_GATHER3DIV4SF);
33776 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
33777 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
33778 IX86_BUILTIN_GATHER3DIV8SF);
33780 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
33781 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
33782 IX86_BUILTIN_GATHER3SIV2DI);
33784 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
33785 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
33786 IX86_BUILTIN_GATHER3SIV4DI);
33788 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
33789 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
33790 IX86_BUILTIN_GATHER3DIV2DI);
33792 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
33793 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
33794 IX86_BUILTIN_GATHER3DIV4DI);
33796 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
33797 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
33798 IX86_BUILTIN_GATHER3SIV4SI);
33800 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
33801 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
33802 IX86_BUILTIN_GATHER3SIV8SI);
33804 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
33805 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
33806 IX86_BUILTIN_GATHER3DIV4SI);
33808 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
33809 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
33810 IX86_BUILTIN_GATHER3DIV8SI);
33812 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
33813 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
33814 IX86_BUILTIN_GATHER3ALTSIV4DF);
33816 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
33817 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
33818 IX86_BUILTIN_GATHER3ALTDIV8SF);
33820 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
33821 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
33822 IX86_BUILTIN_GATHER3ALTSIV4DI);
33824 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
33825 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
33826 IX86_BUILTIN_GATHER3ALTDIV8SI);
33828 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
33829 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
33830 IX86_BUILTIN_SCATTERSIV8SF);
33832 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
33833 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
33834 IX86_BUILTIN_SCATTERSIV4SF);
33836 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
33837 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
33838 IX86_BUILTIN_SCATTERSIV4DF);
33840 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
33841 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
33842 IX86_BUILTIN_SCATTERSIV2DF);
33844 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
33845 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
33846 IX86_BUILTIN_SCATTERDIV8SF);
33848 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
33849 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
33850 IX86_BUILTIN_SCATTERDIV4SF);
33852 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
33853 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
33854 IX86_BUILTIN_SCATTERDIV4DF);
33856 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
33857 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
33858 IX86_BUILTIN_SCATTERDIV2DF);
33860 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
33861 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
33862 IX86_BUILTIN_SCATTERSIV8SI);
33864 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
33865 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
33866 IX86_BUILTIN_SCATTERSIV4SI);
33868 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
33869 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
33870 IX86_BUILTIN_SCATTERSIV4DI);
33872 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
33873 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
33874 IX86_BUILTIN_SCATTERSIV2DI);
33876 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
33877 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
33878 IX86_BUILTIN_SCATTERDIV8SI);
33880 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
33881 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
33882 IX86_BUILTIN_SCATTERDIV4SI);
33884 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
33885 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
33886 IX86_BUILTIN_SCATTERDIV4DI);
33888 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
33889 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
33890 IX86_BUILTIN_SCATTERDIV2DI);
33892 /* AVX512PF */
33893 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
33894 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33895 IX86_BUILTIN_GATHERPFDPD);
33896 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
33897 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33898 IX86_BUILTIN_GATHERPFDPS);
33899 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
33900 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33901 IX86_BUILTIN_GATHERPFQPD);
33902 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
33903 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33904 IX86_BUILTIN_GATHERPFQPS);
33905 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
33906 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33907 IX86_BUILTIN_SCATTERPFDPD);
33908 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
33909 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33910 IX86_BUILTIN_SCATTERPFDPS);
33911 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
33912 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33913 IX86_BUILTIN_SCATTERPFQPD);
33914 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
33915 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33916 IX86_BUILTIN_SCATTERPFQPS);
33918 /* SHA */
33919 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
33920 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
33921 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
33922 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
33923 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
33924 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
33925 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
33926 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
33927 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
33928 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
33929 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
33930 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
33931 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
33932 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
33934 /* RTM. */
33935 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
33936 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
33938 /* MMX access to the vec_init patterns. */
33939 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
33940 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
33942 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
33943 V4HI_FTYPE_HI_HI_HI_HI,
33944 IX86_BUILTIN_VEC_INIT_V4HI);
33946 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
33947 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
33948 IX86_BUILTIN_VEC_INIT_V8QI);
33950 /* Access to the vec_extract patterns. */
33951 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
33952 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
33953 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
33954 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
33955 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
33956 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
33957 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
33958 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
33959 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
33960 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
33962 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33963 "__builtin_ia32_vec_ext_v4hi",
33964 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
33966 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
33967 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
33969 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
33970 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
33972 /* Access to the vec_set patterns. */
33973 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
33974 "__builtin_ia32_vec_set_v2di",
33975 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
33977 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
33978 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
33980 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
33981 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
33983 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
33984 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
33986 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33987 "__builtin_ia32_vec_set_v4hi",
33988 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
33990 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
33991 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
33993 /* RDSEED */
33994 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
33995 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
33996 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
33997 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
33998 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
33999 "__builtin_ia32_rdseed_di_step",
34000 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
34002 /* ADCX */
34003 def_builtin (0, "__builtin_ia32_addcarryx_u32",
34004 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
34005 def_builtin (OPTION_MASK_ISA_64BIT,
34006 "__builtin_ia32_addcarryx_u64",
34007 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34008 IX86_BUILTIN_ADDCARRYX64);
34010 /* SBB */
34011 def_builtin (0, "__builtin_ia32_sbb_u32",
34012 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
34013 def_builtin (OPTION_MASK_ISA_64BIT,
34014 "__builtin_ia32_sbb_u64",
34015 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34016 IX86_BUILTIN_SBB64);
34018 /* Read/write FLAGS. */
34019 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
34020 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34021 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
34022 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34023 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
34024 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
34025 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
34026 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
34028 /* CLFLUSHOPT. */
34029 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
34030 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
34032 /* CLWB. */
34033 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
34034 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
34036 /* Add FMA4 multi-arg argument instructions */
34037 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
34039 if (d->name == 0)
34040 continue;
34042 ftype = (enum ix86_builtin_func_type) d->flag;
34043 def_builtin_const (d->mask, d->name, ftype, d->code);
34047 static void
34048 ix86_init_mpx_builtins ()
34050 const struct builtin_description * d;
34051 enum ix86_builtin_func_type ftype;
34052 tree decl;
34053 size_t i;
34055 for (i = 0, d = bdesc_mpx;
34056 i < ARRAY_SIZE (bdesc_mpx);
34057 i++, d++)
34059 if (d->name == 0)
34060 continue;
34062 ftype = (enum ix86_builtin_func_type) d->flag;
34063 decl = def_builtin (d->mask, d->name, ftype, d->code);
34065 /* With no leaf and nothrow flags for MPX builtins
34066 abnormal edges may follow its call when setjmp
34067 presents in the function. Since we may have a lot
34068 of MPX builtins calls it causes lots of useless
34069 edges and enormous PHI nodes. To avoid this we mark
34070 MPX builtins as leaf and nothrow. */
34071 if (decl)
34073 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34074 NULL_TREE);
34075 TREE_NOTHROW (decl) = 1;
34077 else
34079 ix86_builtins_isa[(int)d->code].leaf_p = true;
34080 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34084 for (i = 0, d = bdesc_mpx_const;
34085 i < ARRAY_SIZE (bdesc_mpx_const);
34086 i++, d++)
34088 if (d->name == 0)
34089 continue;
34091 ftype = (enum ix86_builtin_func_type) d->flag;
34092 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
34094 if (decl)
34096 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34097 NULL_TREE);
34098 TREE_NOTHROW (decl) = 1;
34100 else
34102 ix86_builtins_isa[(int)d->code].leaf_p = true;
34103 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34108 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
34109 to return a pointer to VERSION_DECL if the outcome of the expression
34110 formed by PREDICATE_CHAIN is true. This function will be called during
34111 version dispatch to decide which function version to execute. It returns
34112 the basic block at the end, to which more conditions can be added. */
34114 static basic_block
34115 add_condition_to_bb (tree function_decl, tree version_decl,
34116 tree predicate_chain, basic_block new_bb)
34118 gimple return_stmt;
34119 tree convert_expr, result_var;
34120 gimple convert_stmt;
34121 gimple call_cond_stmt;
34122 gimple if_else_stmt;
34124 basic_block bb1, bb2, bb3;
34125 edge e12, e23;
34127 tree cond_var, and_expr_var = NULL_TREE;
34128 gimple_seq gseq;
34130 tree predicate_decl, predicate_arg;
34132 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
34134 gcc_assert (new_bb != NULL);
34135 gseq = bb_seq (new_bb);
34138 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
34139 build_fold_addr_expr (version_decl));
34140 result_var = create_tmp_var (ptr_type_node);
34141 convert_stmt = gimple_build_assign (result_var, convert_expr);
34142 return_stmt = gimple_build_return (result_var);
34144 if (predicate_chain == NULL_TREE)
34146 gimple_seq_add_stmt (&gseq, convert_stmt);
34147 gimple_seq_add_stmt (&gseq, return_stmt);
34148 set_bb_seq (new_bb, gseq);
34149 gimple_set_bb (convert_stmt, new_bb);
34150 gimple_set_bb (return_stmt, new_bb);
34151 pop_cfun ();
34152 return new_bb;
34155 while (predicate_chain != NULL)
34157 cond_var = create_tmp_var (integer_type_node);
34158 predicate_decl = TREE_PURPOSE (predicate_chain);
34159 predicate_arg = TREE_VALUE (predicate_chain);
34160 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
34161 gimple_call_set_lhs (call_cond_stmt, cond_var);
34163 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
34164 gimple_set_bb (call_cond_stmt, new_bb);
34165 gimple_seq_add_stmt (&gseq, call_cond_stmt);
34167 predicate_chain = TREE_CHAIN (predicate_chain);
34169 if (and_expr_var == NULL)
34170 and_expr_var = cond_var;
34171 else
34173 gimple assign_stmt;
34174 /* Use MIN_EXPR to check if any integer is zero?.
34175 and_expr_var = min_expr <cond_var, and_expr_var> */
34176 assign_stmt = gimple_build_assign (and_expr_var,
34177 build2 (MIN_EXPR, integer_type_node,
34178 cond_var, and_expr_var));
34180 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
34181 gimple_set_bb (assign_stmt, new_bb);
34182 gimple_seq_add_stmt (&gseq, assign_stmt);
34186 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
34187 integer_zero_node,
34188 NULL_TREE, NULL_TREE);
34189 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
34190 gimple_set_bb (if_else_stmt, new_bb);
34191 gimple_seq_add_stmt (&gseq, if_else_stmt);
34193 gimple_seq_add_stmt (&gseq, convert_stmt);
34194 gimple_seq_add_stmt (&gseq, return_stmt);
34195 set_bb_seq (new_bb, gseq);
34197 bb1 = new_bb;
34198 e12 = split_block (bb1, if_else_stmt);
34199 bb2 = e12->dest;
34200 e12->flags &= ~EDGE_FALLTHRU;
34201 e12->flags |= EDGE_TRUE_VALUE;
34203 e23 = split_block (bb2, return_stmt);
34205 gimple_set_bb (convert_stmt, bb2);
34206 gimple_set_bb (return_stmt, bb2);
34208 bb3 = e23->dest;
34209 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
34211 remove_edge (e23);
34212 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
34214 pop_cfun ();
34216 return bb3;
34219 /* This parses the attribute arguments to target in DECL and determines
34220 the right builtin to use to match the platform specification.
34221 It returns the priority value for this version decl. If PREDICATE_LIST
34222 is not NULL, it stores the list of cpu features that need to be checked
34223 before dispatching this function. */
34225 static unsigned int
34226 get_builtin_code_for_version (tree decl, tree *predicate_list)
34228 tree attrs;
34229 struct cl_target_option cur_target;
34230 tree target_node;
34231 struct cl_target_option *new_target;
34232 const char *arg_str = NULL;
34233 const char *attrs_str = NULL;
34234 char *tok_str = NULL;
34235 char *token;
34237 /* Priority of i386 features, greater value is higher priority. This is
34238 used to decide the order in which function dispatch must happen. For
34239 instance, a version specialized for SSE4.2 should be checked for dispatch
34240 before a version for SSE3, as SSE4.2 implies SSE3. */
34241 enum feature_priority
34243 P_ZERO = 0,
34244 P_MMX,
34245 P_SSE,
34246 P_SSE2,
34247 P_SSE3,
34248 P_SSSE3,
34249 P_PROC_SSSE3,
34250 P_SSE4_A,
34251 P_PROC_SSE4_A,
34252 P_SSE4_1,
34253 P_SSE4_2,
34254 P_PROC_SSE4_2,
34255 P_POPCNT,
34256 P_AVX,
34257 P_PROC_AVX,
34258 P_FMA4,
34259 P_XOP,
34260 P_PROC_XOP,
34261 P_FMA,
34262 P_PROC_FMA,
34263 P_AVX2,
34264 P_PROC_AVX2,
34265 P_AVX512F,
34266 P_PROC_AVX512F
34269 enum feature_priority priority = P_ZERO;
34271 /* These are the target attribute strings for which a dispatcher is
34272 available, from fold_builtin_cpu. */
34274 static struct _feature_list
34276 const char *const name;
34277 const enum feature_priority priority;
34279 const feature_list[] =
34281 {"mmx", P_MMX},
34282 {"sse", P_SSE},
34283 {"sse2", P_SSE2},
34284 {"sse3", P_SSE3},
34285 {"sse4a", P_SSE4_A},
34286 {"ssse3", P_SSSE3},
34287 {"sse4.1", P_SSE4_1},
34288 {"sse4.2", P_SSE4_2},
34289 {"popcnt", P_POPCNT},
34290 {"avx", P_AVX},
34291 {"fma4", P_FMA4},
34292 {"xop", P_XOP},
34293 {"fma", P_FMA},
34294 {"avx2", P_AVX2},
34295 {"avx512f", P_AVX512F}
34299 static unsigned int NUM_FEATURES
34300 = sizeof (feature_list) / sizeof (struct _feature_list);
34302 unsigned int i;
34304 tree predicate_chain = NULL_TREE;
34305 tree predicate_decl, predicate_arg;
34307 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34308 gcc_assert (attrs != NULL);
34310 attrs = TREE_VALUE (TREE_VALUE (attrs));
34312 gcc_assert (TREE_CODE (attrs) == STRING_CST);
34313 attrs_str = TREE_STRING_POINTER (attrs);
34315 /* Return priority zero for default function. */
34316 if (strcmp (attrs_str, "default") == 0)
34317 return 0;
34319 /* Handle arch= if specified. For priority, set it to be 1 more than
34320 the best instruction set the processor can handle. For instance, if
34321 there is a version for atom and a version for ssse3 (the highest ISA
34322 priority for atom), the atom version must be checked for dispatch
34323 before the ssse3 version. */
34324 if (strstr (attrs_str, "arch=") != NULL)
34326 cl_target_option_save (&cur_target, &global_options);
34327 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
34328 &global_options_set);
34330 gcc_assert (target_node);
34331 new_target = TREE_TARGET_OPTION (target_node);
34332 gcc_assert (new_target);
34334 if (new_target->arch_specified && new_target->arch > 0)
34336 switch (new_target->arch)
34338 case PROCESSOR_CORE2:
34339 arg_str = "core2";
34340 priority = P_PROC_SSSE3;
34341 break;
34342 case PROCESSOR_NEHALEM:
34343 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
34344 arg_str = "westmere";
34345 else
34346 /* We translate "arch=corei7" and "arch=nehalem" to
34347 "corei7" so that it will be mapped to M_INTEL_COREI7
34348 as cpu type to cover all M_INTEL_COREI7_XXXs. */
34349 arg_str = "corei7";
34350 priority = P_PROC_SSE4_2;
34351 break;
34352 case PROCESSOR_SANDYBRIDGE:
34353 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
34354 arg_str = "ivybridge";
34355 else
34356 arg_str = "sandybridge";
34357 priority = P_PROC_AVX;
34358 break;
34359 case PROCESSOR_HASWELL:
34360 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
34361 arg_str = "broadwell";
34362 else
34363 arg_str = "haswell";
34364 priority = P_PROC_AVX2;
34365 break;
34366 case PROCESSOR_BONNELL:
34367 arg_str = "bonnell";
34368 priority = P_PROC_SSSE3;
34369 break;
34370 case PROCESSOR_KNL:
34371 arg_str = "knl";
34372 priority = P_PROC_AVX512F;
34373 break;
34374 case PROCESSOR_SILVERMONT:
34375 arg_str = "silvermont";
34376 priority = P_PROC_SSE4_2;
34377 break;
34378 case PROCESSOR_AMDFAM10:
34379 arg_str = "amdfam10h";
34380 priority = P_PROC_SSE4_A;
34381 break;
34382 case PROCESSOR_BTVER1:
34383 arg_str = "btver1";
34384 priority = P_PROC_SSE4_A;
34385 break;
34386 case PROCESSOR_BTVER2:
34387 arg_str = "btver2";
34388 priority = P_PROC_AVX;
34389 break;
34390 case PROCESSOR_BDVER1:
34391 arg_str = "bdver1";
34392 priority = P_PROC_XOP;
34393 break;
34394 case PROCESSOR_BDVER2:
34395 arg_str = "bdver2";
34396 priority = P_PROC_FMA;
34397 break;
34398 case PROCESSOR_BDVER3:
34399 arg_str = "bdver3";
34400 priority = P_PROC_FMA;
34401 break;
34402 case PROCESSOR_BDVER4:
34403 arg_str = "bdver4";
34404 priority = P_PROC_AVX2;
34405 break;
34409 cl_target_option_restore (&global_options, &cur_target);
34411 if (predicate_list && arg_str == NULL)
34413 error_at (DECL_SOURCE_LOCATION (decl),
34414 "No dispatcher found for the versioning attributes");
34415 return 0;
34418 if (predicate_list)
34420 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
34421 /* For a C string literal the length includes the trailing NULL. */
34422 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
34423 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34424 predicate_chain);
34428 /* Process feature name. */
34429 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
34430 strcpy (tok_str, attrs_str);
34431 token = strtok (tok_str, ",");
34432 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
34434 while (token != NULL)
34436 /* Do not process "arch=" */
34437 if (strncmp (token, "arch=", 5) == 0)
34439 token = strtok (NULL, ",");
34440 continue;
34442 for (i = 0; i < NUM_FEATURES; ++i)
34444 if (strcmp (token, feature_list[i].name) == 0)
34446 if (predicate_list)
34448 predicate_arg = build_string_literal (
34449 strlen (feature_list[i].name) + 1,
34450 feature_list[i].name);
34451 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34452 predicate_chain);
34454 /* Find the maximum priority feature. */
34455 if (feature_list[i].priority > priority)
34456 priority = feature_list[i].priority;
34458 break;
34461 if (predicate_list && i == NUM_FEATURES)
34463 error_at (DECL_SOURCE_LOCATION (decl),
34464 "No dispatcher found for %s", token);
34465 return 0;
34467 token = strtok (NULL, ",");
34469 free (tok_str);
34471 if (predicate_list && predicate_chain == NULL_TREE)
34473 error_at (DECL_SOURCE_LOCATION (decl),
34474 "No dispatcher found for the versioning attributes : %s",
34475 attrs_str);
34476 return 0;
34478 else if (predicate_list)
34480 predicate_chain = nreverse (predicate_chain);
34481 *predicate_list = predicate_chain;
34484 return priority;
34487 /* This compares the priority of target features in function DECL1
34488 and DECL2. It returns positive value if DECL1 is higher priority,
34489 negative value if DECL2 is higher priority and 0 if they are the
34490 same. */
34492 static int
34493 ix86_compare_version_priority (tree decl1, tree decl2)
34495 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
34496 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
34498 return (int)priority1 - (int)priority2;
34501 /* V1 and V2 point to function versions with different priorities
34502 based on the target ISA. This function compares their priorities. */
34504 static int
34505 feature_compare (const void *v1, const void *v2)
34507 typedef struct _function_version_info
34509 tree version_decl;
34510 tree predicate_chain;
34511 unsigned int dispatch_priority;
34512 } function_version_info;
34514 const function_version_info c1 = *(const function_version_info *)v1;
34515 const function_version_info c2 = *(const function_version_info *)v2;
34516 return (c2.dispatch_priority - c1.dispatch_priority);
34519 /* This function generates the dispatch function for
34520 multi-versioned functions. DISPATCH_DECL is the function which will
34521 contain the dispatch logic. FNDECLS are the function choices for
34522 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
34523 in DISPATCH_DECL in which the dispatch code is generated. */
34525 static int
34526 dispatch_function_versions (tree dispatch_decl,
34527 void *fndecls_p,
34528 basic_block *empty_bb)
34530 tree default_decl;
34531 gimple ifunc_cpu_init_stmt;
34532 gimple_seq gseq;
34533 int ix;
34534 tree ele;
34535 vec<tree> *fndecls;
34536 unsigned int num_versions = 0;
34537 unsigned int actual_versions = 0;
34538 unsigned int i;
34540 struct _function_version_info
34542 tree version_decl;
34543 tree predicate_chain;
34544 unsigned int dispatch_priority;
34545 }*function_version_info;
34547 gcc_assert (dispatch_decl != NULL
34548 && fndecls_p != NULL
34549 && empty_bb != NULL);
34551 /*fndecls_p is actually a vector. */
34552 fndecls = static_cast<vec<tree> *> (fndecls_p);
34554 /* At least one more version other than the default. */
34555 num_versions = fndecls->length ();
34556 gcc_assert (num_versions >= 2);
34558 function_version_info = (struct _function_version_info *)
34559 XNEWVEC (struct _function_version_info, (num_versions - 1));
34561 /* The first version in the vector is the default decl. */
34562 default_decl = (*fndecls)[0];
34564 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
34566 gseq = bb_seq (*empty_bb);
34567 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
34568 constructors, so explicity call __builtin_cpu_init here. */
34569 ifunc_cpu_init_stmt = gimple_build_call_vec (
34570 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
34571 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
34572 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
34573 set_bb_seq (*empty_bb, gseq);
34575 pop_cfun ();
34578 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
34580 tree version_decl = ele;
34581 tree predicate_chain = NULL_TREE;
34582 unsigned int priority;
34583 /* Get attribute string, parse it and find the right predicate decl.
34584 The predicate function could be a lengthy combination of many
34585 features, like arch-type and various isa-variants. */
34586 priority = get_builtin_code_for_version (version_decl,
34587 &predicate_chain);
34589 if (predicate_chain == NULL_TREE)
34590 continue;
34592 function_version_info [actual_versions].version_decl = version_decl;
34593 function_version_info [actual_versions].predicate_chain
34594 = predicate_chain;
34595 function_version_info [actual_versions].dispatch_priority = priority;
34596 actual_versions++;
34599 /* Sort the versions according to descending order of dispatch priority. The
34600 priority is based on the ISA. This is not a perfect solution. There
34601 could still be ambiguity. If more than one function version is suitable
34602 to execute, which one should be dispatched? In future, allow the user
34603 to specify a dispatch priority next to the version. */
34604 qsort (function_version_info, actual_versions,
34605 sizeof (struct _function_version_info), feature_compare);
34607 for (i = 0; i < actual_versions; ++i)
34608 *empty_bb = add_condition_to_bb (dispatch_decl,
34609 function_version_info[i].version_decl,
34610 function_version_info[i].predicate_chain,
34611 *empty_bb);
34613 /* dispatch default version at the end. */
34614 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
34615 NULL, *empty_bb);
34617 free (function_version_info);
34618 return 0;
34621 /* Comparator function to be used in qsort routine to sort attribute
34622 specification strings to "target". */
34624 static int
34625 attr_strcmp (const void *v1, const void *v2)
34627 const char *c1 = *(char *const*)v1;
34628 const char *c2 = *(char *const*)v2;
34629 return strcmp (c1, c2);
34632 /* ARGLIST is the argument to target attribute. This function tokenizes
34633 the comma separated arguments, sorts them and returns a string which
34634 is a unique identifier for the comma separated arguments. It also
34635 replaces non-identifier characters "=,-" with "_". */
34637 static char *
34638 sorted_attr_string (tree arglist)
34640 tree arg;
34641 size_t str_len_sum = 0;
34642 char **args = NULL;
34643 char *attr_str, *ret_str;
34644 char *attr = NULL;
34645 unsigned int argnum = 1;
34646 unsigned int i;
34648 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34650 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34651 size_t len = strlen (str);
34652 str_len_sum += len + 1;
34653 if (arg != arglist)
34654 argnum++;
34655 for (i = 0; i < strlen (str); i++)
34656 if (str[i] == ',')
34657 argnum++;
34660 attr_str = XNEWVEC (char, str_len_sum);
34661 str_len_sum = 0;
34662 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34664 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34665 size_t len = strlen (str);
34666 memcpy (attr_str + str_len_sum, str, len);
34667 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
34668 str_len_sum += len + 1;
34671 /* Replace "=,-" with "_". */
34672 for (i = 0; i < strlen (attr_str); i++)
34673 if (attr_str[i] == '=' || attr_str[i]== '-')
34674 attr_str[i] = '_';
34676 if (argnum == 1)
34677 return attr_str;
34679 args = XNEWVEC (char *, argnum);
34681 i = 0;
34682 attr = strtok (attr_str, ",");
34683 while (attr != NULL)
34685 args[i] = attr;
34686 i++;
34687 attr = strtok (NULL, ",");
34690 qsort (args, argnum, sizeof (char *), attr_strcmp);
34692 ret_str = XNEWVEC (char, str_len_sum);
34693 str_len_sum = 0;
34694 for (i = 0; i < argnum; i++)
34696 size_t len = strlen (args[i]);
34697 memcpy (ret_str + str_len_sum, args[i], len);
34698 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
34699 str_len_sum += len + 1;
34702 XDELETEVEC (args);
34703 XDELETEVEC (attr_str);
34704 return ret_str;
34707 /* This function changes the assembler name for functions that are
34708 versions. If DECL is a function version and has a "target"
34709 attribute, it appends the attribute string to its assembler name. */
34711 static tree
34712 ix86_mangle_function_version_assembler_name (tree decl, tree id)
34714 tree version_attr;
34715 const char *orig_name, *version_string;
34716 char *attr_str, *assembler_name;
34718 if (DECL_DECLARED_INLINE_P (decl)
34719 && lookup_attribute ("gnu_inline",
34720 DECL_ATTRIBUTES (decl)))
34721 error_at (DECL_SOURCE_LOCATION (decl),
34722 "Function versions cannot be marked as gnu_inline,"
34723 " bodies have to be generated");
34725 if (DECL_VIRTUAL_P (decl)
34726 || DECL_VINDEX (decl))
34727 sorry ("Virtual function multiversioning not supported");
34729 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34731 /* target attribute string cannot be NULL. */
34732 gcc_assert (version_attr != NULL_TREE);
34734 orig_name = IDENTIFIER_POINTER (id);
34735 version_string
34736 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
34738 if (strcmp (version_string, "default") == 0)
34739 return id;
34741 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
34742 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
34744 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
34746 /* Allow assembler name to be modified if already set. */
34747 if (DECL_ASSEMBLER_NAME_SET_P (decl))
34748 SET_DECL_RTL (decl, NULL);
34750 tree ret = get_identifier (assembler_name);
34751 XDELETEVEC (attr_str);
34752 XDELETEVEC (assembler_name);
34753 return ret;
34756 /* This function returns true if FN1 and FN2 are versions of the same function,
34757 that is, the target strings of the function decls are different. This assumes
34758 that FN1 and FN2 have the same signature. */
34760 static bool
34761 ix86_function_versions (tree fn1, tree fn2)
34763 tree attr1, attr2;
34764 char *target1, *target2;
34765 bool result;
34767 if (TREE_CODE (fn1) != FUNCTION_DECL
34768 || TREE_CODE (fn2) != FUNCTION_DECL)
34769 return false;
34771 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
34772 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
34774 /* At least one function decl should have the target attribute specified. */
34775 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
34776 return false;
34778 /* Diagnose missing target attribute if one of the decls is already
34779 multi-versioned. */
34780 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
34782 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
34784 if (attr2 != NULL_TREE)
34786 tree tem = fn1;
34787 fn1 = fn2;
34788 fn2 = tem;
34789 attr1 = attr2;
34791 error_at (DECL_SOURCE_LOCATION (fn2),
34792 "missing %<target%> attribute for multi-versioned %D",
34793 fn2);
34794 inform (DECL_SOURCE_LOCATION (fn1),
34795 "previous declaration of %D", fn1);
34796 /* Prevent diagnosing of the same error multiple times. */
34797 DECL_ATTRIBUTES (fn2)
34798 = tree_cons (get_identifier ("target"),
34799 copy_node (TREE_VALUE (attr1)),
34800 DECL_ATTRIBUTES (fn2));
34802 return false;
34805 target1 = sorted_attr_string (TREE_VALUE (attr1));
34806 target2 = sorted_attr_string (TREE_VALUE (attr2));
34808 /* The sorted target strings must be different for fn1 and fn2
34809 to be versions. */
34810 if (strcmp (target1, target2) == 0)
34811 result = false;
34812 else
34813 result = true;
34815 XDELETEVEC (target1);
34816 XDELETEVEC (target2);
34818 return result;
34821 static tree
34822 ix86_mangle_decl_assembler_name (tree decl, tree id)
34824 /* For function version, add the target suffix to the assembler name. */
34825 if (TREE_CODE (decl) == FUNCTION_DECL
34826 && DECL_FUNCTION_VERSIONED (decl))
34827 id = ix86_mangle_function_version_assembler_name (decl, id);
34828 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
34829 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
34830 #endif
34832 return id;
34835 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
34836 is true, append the full path name of the source file. */
34838 static char *
34839 make_name (tree decl, const char *suffix, bool make_unique)
34841 char *global_var_name;
34842 int name_len;
34843 const char *name;
34844 const char *unique_name = NULL;
34846 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
34848 /* Get a unique name that can be used globally without any chances
34849 of collision at link time. */
34850 if (make_unique)
34851 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
34853 name_len = strlen (name) + strlen (suffix) + 2;
34855 if (make_unique)
34856 name_len += strlen (unique_name) + 1;
34857 global_var_name = XNEWVEC (char, name_len);
34859 /* Use '.' to concatenate names as it is demangler friendly. */
34860 if (make_unique)
34861 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
34862 suffix);
34863 else
34864 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
34866 return global_var_name;
34869 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
34871 /* Make a dispatcher declaration for the multi-versioned function DECL.
34872 Calls to DECL function will be replaced with calls to the dispatcher
34873 by the front-end. Return the decl created. */
34875 static tree
34876 make_dispatcher_decl (const tree decl)
34878 tree func_decl;
34879 char *func_name;
34880 tree fn_type, func_type;
34881 bool is_uniq = false;
34883 if (TREE_PUBLIC (decl) == 0)
34884 is_uniq = true;
34886 func_name = make_name (decl, "ifunc", is_uniq);
34888 fn_type = TREE_TYPE (decl);
34889 func_type = build_function_type (TREE_TYPE (fn_type),
34890 TYPE_ARG_TYPES (fn_type));
34892 func_decl = build_fn_decl (func_name, func_type);
34893 XDELETEVEC (func_name);
34894 TREE_USED (func_decl) = 1;
34895 DECL_CONTEXT (func_decl) = NULL_TREE;
34896 DECL_INITIAL (func_decl) = error_mark_node;
34897 DECL_ARTIFICIAL (func_decl) = 1;
34898 /* Mark this func as external, the resolver will flip it again if
34899 it gets generated. */
34900 DECL_EXTERNAL (func_decl) = 1;
34901 /* This will be of type IFUNCs have to be externally visible. */
34902 TREE_PUBLIC (func_decl) = 1;
34904 return func_decl;
34907 #endif
34909 /* Returns true if decl is multi-versioned and DECL is the default function,
34910 that is it is not tagged with target specific optimization. */
34912 static bool
34913 is_function_default_version (const tree decl)
34915 if (TREE_CODE (decl) != FUNCTION_DECL
34916 || !DECL_FUNCTION_VERSIONED (decl))
34917 return false;
34918 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34919 gcc_assert (attr);
34920 attr = TREE_VALUE (TREE_VALUE (attr));
34921 return (TREE_CODE (attr) == STRING_CST
34922 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
34925 /* Make a dispatcher declaration for the multi-versioned function DECL.
34926 Calls to DECL function will be replaced with calls to the dispatcher
34927 by the front-end. Returns the decl of the dispatcher function. */
34929 static tree
34930 ix86_get_function_versions_dispatcher (void *decl)
34932 tree fn = (tree) decl;
34933 struct cgraph_node *node = NULL;
34934 struct cgraph_node *default_node = NULL;
34935 struct cgraph_function_version_info *node_v = NULL;
34936 struct cgraph_function_version_info *first_v = NULL;
34938 tree dispatch_decl = NULL;
34940 struct cgraph_function_version_info *default_version_info = NULL;
34942 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
34944 node = cgraph_node::get (fn);
34945 gcc_assert (node != NULL);
34947 node_v = node->function_version ();
34948 gcc_assert (node_v != NULL);
34950 if (node_v->dispatcher_resolver != NULL)
34951 return node_v->dispatcher_resolver;
34953 /* Find the default version and make it the first node. */
34954 first_v = node_v;
34955 /* Go to the beginning of the chain. */
34956 while (first_v->prev != NULL)
34957 first_v = first_v->prev;
34958 default_version_info = first_v;
34959 while (default_version_info != NULL)
34961 if (is_function_default_version
34962 (default_version_info->this_node->decl))
34963 break;
34964 default_version_info = default_version_info->next;
34967 /* If there is no default node, just return NULL. */
34968 if (default_version_info == NULL)
34969 return NULL;
34971 /* Make default info the first node. */
34972 if (first_v != default_version_info)
34974 default_version_info->prev->next = default_version_info->next;
34975 if (default_version_info->next)
34976 default_version_info->next->prev = default_version_info->prev;
34977 first_v->prev = default_version_info;
34978 default_version_info->next = first_v;
34979 default_version_info->prev = NULL;
34982 default_node = default_version_info->this_node;
34984 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
34985 if (targetm.has_ifunc_p ())
34987 struct cgraph_function_version_info *it_v = NULL;
34988 struct cgraph_node *dispatcher_node = NULL;
34989 struct cgraph_function_version_info *dispatcher_version_info = NULL;
34991 /* Right now, the dispatching is done via ifunc. */
34992 dispatch_decl = make_dispatcher_decl (default_node->decl);
34994 dispatcher_node = cgraph_node::get_create (dispatch_decl);
34995 gcc_assert (dispatcher_node != NULL);
34996 dispatcher_node->dispatcher_function = 1;
34997 dispatcher_version_info
34998 = dispatcher_node->insert_new_function_version ();
34999 dispatcher_version_info->next = default_version_info;
35000 dispatcher_node->definition = 1;
35002 /* Set the dispatcher for all the versions. */
35003 it_v = default_version_info;
35004 while (it_v != NULL)
35006 it_v->dispatcher_resolver = dispatch_decl;
35007 it_v = it_v->next;
35010 else
35011 #endif
35013 error_at (DECL_SOURCE_LOCATION (default_node->decl),
35014 "multiversioning needs ifunc which is not supported "
35015 "on this target");
35018 return dispatch_decl;
35021 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
35022 it to CHAIN. */
35024 static tree
35025 make_attribute (const char *name, const char *arg_name, tree chain)
35027 tree attr_name;
35028 tree attr_arg_name;
35029 tree attr_args;
35030 tree attr;
35032 attr_name = get_identifier (name);
35033 attr_arg_name = build_string (strlen (arg_name), arg_name);
35034 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
35035 attr = tree_cons (attr_name, attr_args, chain);
35036 return attr;
35039 /* Make the resolver function decl to dispatch the versions of
35040 a multi-versioned function, DEFAULT_DECL. Create an
35041 empty basic block in the resolver and store the pointer in
35042 EMPTY_BB. Return the decl of the resolver function. */
35044 static tree
35045 make_resolver_func (const tree default_decl,
35046 const tree dispatch_decl,
35047 basic_block *empty_bb)
35049 char *resolver_name;
35050 tree decl, type, decl_name, t;
35051 bool is_uniq = false;
35053 /* IFUNC's have to be globally visible. So, if the default_decl is
35054 not, then the name of the IFUNC should be made unique. */
35055 if (TREE_PUBLIC (default_decl) == 0)
35056 is_uniq = true;
35058 /* Append the filename to the resolver function if the versions are
35059 not externally visible. This is because the resolver function has
35060 to be externally visible for the loader to find it. So, appending
35061 the filename will prevent conflicts with a resolver function from
35062 another module which is based on the same version name. */
35063 resolver_name = make_name (default_decl, "resolver", is_uniq);
35065 /* The resolver function should return a (void *). */
35066 type = build_function_type_list (ptr_type_node, NULL_TREE);
35068 decl = build_fn_decl (resolver_name, type);
35069 decl_name = get_identifier (resolver_name);
35070 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
35072 DECL_NAME (decl) = decl_name;
35073 TREE_USED (decl) = 1;
35074 DECL_ARTIFICIAL (decl) = 1;
35075 DECL_IGNORED_P (decl) = 0;
35076 /* IFUNC resolvers have to be externally visible. */
35077 TREE_PUBLIC (decl) = 1;
35078 DECL_UNINLINABLE (decl) = 1;
35080 /* Resolver is not external, body is generated. */
35081 DECL_EXTERNAL (decl) = 0;
35082 DECL_EXTERNAL (dispatch_decl) = 0;
35084 DECL_CONTEXT (decl) = NULL_TREE;
35085 DECL_INITIAL (decl) = make_node (BLOCK);
35086 DECL_STATIC_CONSTRUCTOR (decl) = 0;
35088 if (DECL_COMDAT_GROUP (default_decl)
35089 || TREE_PUBLIC (default_decl))
35091 /* In this case, each translation unit with a call to this
35092 versioned function will put out a resolver. Ensure it
35093 is comdat to keep just one copy. */
35094 DECL_COMDAT (decl) = 1;
35095 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
35097 /* Build result decl and add to function_decl. */
35098 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
35099 DECL_ARTIFICIAL (t) = 1;
35100 DECL_IGNORED_P (t) = 1;
35101 DECL_RESULT (decl) = t;
35103 gimplify_function_tree (decl);
35104 push_cfun (DECL_STRUCT_FUNCTION (decl));
35105 *empty_bb = init_lowered_empty_function (decl, false);
35107 cgraph_node::add_new_function (decl, true);
35108 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
35110 pop_cfun ();
35112 gcc_assert (dispatch_decl != NULL);
35113 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
35114 DECL_ATTRIBUTES (dispatch_decl)
35115 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
35117 /* Create the alias for dispatch to resolver here. */
35118 /*cgraph_create_function_alias (dispatch_decl, decl);*/
35119 cgraph_node::create_same_body_alias (dispatch_decl, decl);
35120 XDELETEVEC (resolver_name);
35121 return decl;
35124 /* Generate the dispatching code body to dispatch multi-versioned function
35125 DECL. The target hook is called to process the "target" attributes and
35126 provide the code to dispatch the right function at run-time. NODE points
35127 to the dispatcher decl whose body will be created. */
35129 static tree
35130 ix86_generate_version_dispatcher_body (void *node_p)
35132 tree resolver_decl;
35133 basic_block empty_bb;
35134 tree default_ver_decl;
35135 struct cgraph_node *versn;
35136 struct cgraph_node *node;
35138 struct cgraph_function_version_info *node_version_info = NULL;
35139 struct cgraph_function_version_info *versn_info = NULL;
35141 node = (cgraph_node *)node_p;
35143 node_version_info = node->function_version ();
35144 gcc_assert (node->dispatcher_function
35145 && node_version_info != NULL);
35147 if (node_version_info->dispatcher_resolver)
35148 return node_version_info->dispatcher_resolver;
35150 /* The first version in the chain corresponds to the default version. */
35151 default_ver_decl = node_version_info->next->this_node->decl;
35153 /* node is going to be an alias, so remove the finalized bit. */
35154 node->definition = false;
35156 resolver_decl = make_resolver_func (default_ver_decl,
35157 node->decl, &empty_bb);
35159 node_version_info->dispatcher_resolver = resolver_decl;
35161 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
35163 auto_vec<tree, 2> fn_ver_vec;
35165 for (versn_info = node_version_info->next; versn_info;
35166 versn_info = versn_info->next)
35168 versn = versn_info->this_node;
35169 /* Check for virtual functions here again, as by this time it should
35170 have been determined if this function needs a vtable index or
35171 not. This happens for methods in derived classes that override
35172 virtual methods in base classes but are not explicitly marked as
35173 virtual. */
35174 if (DECL_VINDEX (versn->decl))
35175 sorry ("Virtual function multiversioning not supported");
35177 fn_ver_vec.safe_push (versn->decl);
35180 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
35181 cgraph_edge::rebuild_edges ();
35182 pop_cfun ();
35183 return resolver_decl;
35185 /* This builds the processor_model struct type defined in
35186 libgcc/config/i386/cpuinfo.c */
35188 static tree
35189 build_processor_model_struct (void)
35191 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
35192 "__cpu_features"};
35193 tree field = NULL_TREE, field_chain = NULL_TREE;
35194 int i;
35195 tree type = make_node (RECORD_TYPE);
35197 /* The first 3 fields are unsigned int. */
35198 for (i = 0; i < 3; ++i)
35200 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35201 get_identifier (field_name[i]), unsigned_type_node);
35202 if (field_chain != NULL_TREE)
35203 DECL_CHAIN (field) = field_chain;
35204 field_chain = field;
35207 /* The last field is an array of unsigned integers of size one. */
35208 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35209 get_identifier (field_name[3]),
35210 build_array_type (unsigned_type_node,
35211 build_index_type (size_one_node)));
35212 if (field_chain != NULL_TREE)
35213 DECL_CHAIN (field) = field_chain;
35214 field_chain = field;
35216 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
35217 return type;
35220 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
35222 static tree
35223 make_var_decl (tree type, const char *name)
35225 tree new_decl;
35227 new_decl = build_decl (UNKNOWN_LOCATION,
35228 VAR_DECL,
35229 get_identifier(name),
35230 type);
35232 DECL_EXTERNAL (new_decl) = 1;
35233 TREE_STATIC (new_decl) = 1;
35234 TREE_PUBLIC (new_decl) = 1;
35235 DECL_INITIAL (new_decl) = 0;
35236 DECL_ARTIFICIAL (new_decl) = 0;
35237 DECL_PRESERVE_P (new_decl) = 1;
35239 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
35240 assemble_variable (new_decl, 0, 0, 0);
35242 return new_decl;
35245 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
35246 into an integer defined in libgcc/config/i386/cpuinfo.c */
35248 static tree
35249 fold_builtin_cpu (tree fndecl, tree *args)
35251 unsigned int i;
35252 enum ix86_builtins fn_code = (enum ix86_builtins)
35253 DECL_FUNCTION_CODE (fndecl);
35254 tree param_string_cst = NULL;
35256 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
35257 enum processor_features
35259 F_CMOV = 0,
35260 F_MMX,
35261 F_POPCNT,
35262 F_SSE,
35263 F_SSE2,
35264 F_SSE3,
35265 F_SSSE3,
35266 F_SSE4_1,
35267 F_SSE4_2,
35268 F_AVX,
35269 F_AVX2,
35270 F_SSE4_A,
35271 F_FMA4,
35272 F_XOP,
35273 F_FMA,
35274 F_AVX512F,
35275 F_MAX
35278 /* These are the values for vendor types and cpu types and subtypes
35279 in cpuinfo.c. Cpu types and subtypes should be subtracted by
35280 the corresponding start value. */
35281 enum processor_model
35283 M_INTEL = 1,
35284 M_AMD,
35285 M_CPU_TYPE_START,
35286 M_INTEL_BONNELL,
35287 M_INTEL_CORE2,
35288 M_INTEL_COREI7,
35289 M_AMDFAM10H,
35290 M_AMDFAM15H,
35291 M_INTEL_SILVERMONT,
35292 M_INTEL_KNL,
35293 M_AMD_BTVER1,
35294 M_AMD_BTVER2,
35295 M_CPU_SUBTYPE_START,
35296 M_INTEL_COREI7_NEHALEM,
35297 M_INTEL_COREI7_WESTMERE,
35298 M_INTEL_COREI7_SANDYBRIDGE,
35299 M_AMDFAM10H_BARCELONA,
35300 M_AMDFAM10H_SHANGHAI,
35301 M_AMDFAM10H_ISTANBUL,
35302 M_AMDFAM15H_BDVER1,
35303 M_AMDFAM15H_BDVER2,
35304 M_AMDFAM15H_BDVER3,
35305 M_AMDFAM15H_BDVER4,
35306 M_INTEL_COREI7_IVYBRIDGE,
35307 M_INTEL_COREI7_HASWELL
35310 static struct _arch_names_table
35312 const char *const name;
35313 const enum processor_model model;
35315 const arch_names_table[] =
35317 {"amd", M_AMD},
35318 {"intel", M_INTEL},
35319 {"atom", M_INTEL_BONNELL},
35320 {"slm", M_INTEL_SILVERMONT},
35321 {"core2", M_INTEL_CORE2},
35322 {"corei7", M_INTEL_COREI7},
35323 {"nehalem", M_INTEL_COREI7_NEHALEM},
35324 {"westmere", M_INTEL_COREI7_WESTMERE},
35325 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
35326 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
35327 {"haswell", M_INTEL_COREI7_HASWELL},
35328 {"bonnell", M_INTEL_BONNELL},
35329 {"silvermont", M_INTEL_SILVERMONT},
35330 {"knl", M_INTEL_KNL},
35331 {"amdfam10h", M_AMDFAM10H},
35332 {"barcelona", M_AMDFAM10H_BARCELONA},
35333 {"shanghai", M_AMDFAM10H_SHANGHAI},
35334 {"istanbul", M_AMDFAM10H_ISTANBUL},
35335 {"btver1", M_AMD_BTVER1},
35336 {"amdfam15h", M_AMDFAM15H},
35337 {"bdver1", M_AMDFAM15H_BDVER1},
35338 {"bdver2", M_AMDFAM15H_BDVER2},
35339 {"bdver3", M_AMDFAM15H_BDVER3},
35340 {"bdver4", M_AMDFAM15H_BDVER4},
35341 {"btver2", M_AMD_BTVER2},
35344 static struct _isa_names_table
35346 const char *const name;
35347 const enum processor_features feature;
35349 const isa_names_table[] =
35351 {"cmov", F_CMOV},
35352 {"mmx", F_MMX},
35353 {"popcnt", F_POPCNT},
35354 {"sse", F_SSE},
35355 {"sse2", F_SSE2},
35356 {"sse3", F_SSE3},
35357 {"ssse3", F_SSSE3},
35358 {"sse4a", F_SSE4_A},
35359 {"sse4.1", F_SSE4_1},
35360 {"sse4.2", F_SSE4_2},
35361 {"avx", F_AVX},
35362 {"fma4", F_FMA4},
35363 {"xop", F_XOP},
35364 {"fma", F_FMA},
35365 {"avx2", F_AVX2},
35366 {"avx512f",F_AVX512F}
35369 tree __processor_model_type = build_processor_model_struct ();
35370 tree __cpu_model_var = make_var_decl (__processor_model_type,
35371 "__cpu_model");
35374 varpool_node::add (__cpu_model_var);
35376 gcc_assert ((args != NULL) && (*args != NULL));
35378 param_string_cst = *args;
35379 while (param_string_cst
35380 && TREE_CODE (param_string_cst) != STRING_CST)
35382 /* *args must be a expr that can contain other EXPRS leading to a
35383 STRING_CST. */
35384 if (!EXPR_P (param_string_cst))
35386 error ("Parameter to builtin must be a string constant or literal");
35387 return integer_zero_node;
35389 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
35392 gcc_assert (param_string_cst);
35394 if (fn_code == IX86_BUILTIN_CPU_IS)
35396 tree ref;
35397 tree field;
35398 tree final;
35400 unsigned int field_val = 0;
35401 unsigned int NUM_ARCH_NAMES
35402 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
35404 for (i = 0; i < NUM_ARCH_NAMES; i++)
35405 if (strcmp (arch_names_table[i].name,
35406 TREE_STRING_POINTER (param_string_cst)) == 0)
35407 break;
35409 if (i == NUM_ARCH_NAMES)
35411 error ("Parameter to builtin not valid: %s",
35412 TREE_STRING_POINTER (param_string_cst));
35413 return integer_zero_node;
35416 field = TYPE_FIELDS (__processor_model_type);
35417 field_val = arch_names_table[i].model;
35419 /* CPU types are stored in the next field. */
35420 if (field_val > M_CPU_TYPE_START
35421 && field_val < M_CPU_SUBTYPE_START)
35423 field = DECL_CHAIN (field);
35424 field_val -= M_CPU_TYPE_START;
35427 /* CPU subtypes are stored in the next field. */
35428 if (field_val > M_CPU_SUBTYPE_START)
35430 field = DECL_CHAIN ( DECL_CHAIN (field));
35431 field_val -= M_CPU_SUBTYPE_START;
35434 /* Get the appropriate field in __cpu_model. */
35435 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35436 field, NULL_TREE);
35438 /* Check the value. */
35439 final = build2 (EQ_EXPR, unsigned_type_node, ref,
35440 build_int_cstu (unsigned_type_node, field_val));
35441 return build1 (CONVERT_EXPR, integer_type_node, final);
35443 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35445 tree ref;
35446 tree array_elt;
35447 tree field;
35448 tree final;
35450 unsigned int field_val = 0;
35451 unsigned int NUM_ISA_NAMES
35452 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
35454 for (i = 0; i < NUM_ISA_NAMES; i++)
35455 if (strcmp (isa_names_table[i].name,
35456 TREE_STRING_POINTER (param_string_cst)) == 0)
35457 break;
35459 if (i == NUM_ISA_NAMES)
35461 error ("Parameter to builtin not valid: %s",
35462 TREE_STRING_POINTER (param_string_cst));
35463 return integer_zero_node;
35466 field = TYPE_FIELDS (__processor_model_type);
35467 /* Get the last field, which is __cpu_features. */
35468 while (DECL_CHAIN (field))
35469 field = DECL_CHAIN (field);
35471 /* Get the appropriate field: __cpu_model.__cpu_features */
35472 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35473 field, NULL_TREE);
35475 /* Access the 0th element of __cpu_features array. */
35476 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
35477 integer_zero_node, NULL_TREE, NULL_TREE);
35479 field_val = (1 << isa_names_table[i].feature);
35480 /* Return __cpu_model.__cpu_features[0] & field_val */
35481 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
35482 build_int_cstu (unsigned_type_node, field_val));
35483 return build1 (CONVERT_EXPR, integer_type_node, final);
35485 gcc_unreachable ();
35488 static tree
35489 ix86_fold_builtin (tree fndecl, int n_args,
35490 tree *args, bool ignore ATTRIBUTE_UNUSED)
35492 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35494 enum ix86_builtins fn_code = (enum ix86_builtins)
35495 DECL_FUNCTION_CODE (fndecl);
35496 if (fn_code == IX86_BUILTIN_CPU_IS
35497 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35499 gcc_assert (n_args == 1);
35500 return fold_builtin_cpu (fndecl, args);
35504 #ifdef SUBTARGET_FOLD_BUILTIN
35505 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
35506 #endif
35508 return NULL_TREE;
35511 /* Make builtins to detect cpu type and features supported. NAME is
35512 the builtin name, CODE is the builtin code, and FTYPE is the function
35513 type of the builtin. */
35515 static void
35516 make_cpu_type_builtin (const char* name, int code,
35517 enum ix86_builtin_func_type ftype, bool is_const)
35519 tree decl;
35520 tree type;
35522 type = ix86_get_builtin_func_type (ftype);
35523 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
35524 NULL, NULL_TREE);
35525 gcc_assert (decl != NULL_TREE);
35526 ix86_builtins[(int) code] = decl;
35527 TREE_READONLY (decl) = is_const;
35530 /* Make builtins to get CPU type and features supported. The created
35531 builtins are :
35533 __builtin_cpu_init (), to detect cpu type and features,
35534 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
35535 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
35538 static void
35539 ix86_init_platform_type_builtins (void)
35541 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
35542 INT_FTYPE_VOID, false);
35543 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
35544 INT_FTYPE_PCCHAR, true);
35545 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
35546 INT_FTYPE_PCCHAR, true);
35549 /* Internal method for ix86_init_builtins. */
35551 static void
35552 ix86_init_builtins_va_builtins_abi (void)
35554 tree ms_va_ref, sysv_va_ref;
35555 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
35556 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
35557 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
35558 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
35560 if (!TARGET_64BIT)
35561 return;
35562 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
35563 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
35564 ms_va_ref = build_reference_type (ms_va_list_type_node);
35565 sysv_va_ref =
35566 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
35568 fnvoid_va_end_ms =
35569 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35570 fnvoid_va_start_ms =
35571 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35572 fnvoid_va_end_sysv =
35573 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
35574 fnvoid_va_start_sysv =
35575 build_varargs_function_type_list (void_type_node, sysv_va_ref,
35576 NULL_TREE);
35577 fnvoid_va_copy_ms =
35578 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
35579 NULL_TREE);
35580 fnvoid_va_copy_sysv =
35581 build_function_type_list (void_type_node, sysv_va_ref,
35582 sysv_va_ref, NULL_TREE);
35584 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
35585 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
35586 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
35587 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
35588 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
35589 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
35590 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
35591 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35592 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
35593 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35594 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
35595 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35598 static void
35599 ix86_init_builtin_types (void)
35601 tree float128_type_node, float80_type_node;
35603 /* The __float80 type. */
35604 float80_type_node = long_double_type_node;
35605 if (TYPE_MODE (float80_type_node) != XFmode)
35607 /* The __float80 type. */
35608 float80_type_node = make_node (REAL_TYPE);
35610 TYPE_PRECISION (float80_type_node) = 80;
35611 layout_type (float80_type_node);
35613 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
35615 /* The __float128 type. */
35616 float128_type_node = make_node (REAL_TYPE);
35617 TYPE_PRECISION (float128_type_node) = 128;
35618 layout_type (float128_type_node);
35619 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
35621 /* This macro is built by i386-builtin-types.awk. */
35622 DEFINE_BUILTIN_PRIMITIVE_TYPES;
35625 static void
35626 ix86_init_builtins (void)
35628 tree t;
35630 ix86_init_builtin_types ();
35632 /* Builtins to get CPU type and features. */
35633 ix86_init_platform_type_builtins ();
35635 /* TFmode support builtins. */
35636 def_builtin_const (0, "__builtin_infq",
35637 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
35638 def_builtin_const (0, "__builtin_huge_valq",
35639 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
35641 /* We will expand them to normal call if SSE isn't available since
35642 they are used by libgcc. */
35643 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
35644 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
35645 BUILT_IN_MD, "__fabstf2", NULL_TREE);
35646 TREE_READONLY (t) = 1;
35647 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
35649 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
35650 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
35651 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
35652 TREE_READONLY (t) = 1;
35653 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
35655 ix86_init_tm_builtins ();
35656 ix86_init_mmx_sse_builtins ();
35657 ix86_init_mpx_builtins ();
35659 if (TARGET_LP64)
35660 ix86_init_builtins_va_builtins_abi ();
35662 #ifdef SUBTARGET_INIT_BUILTINS
35663 SUBTARGET_INIT_BUILTINS;
35664 #endif
35667 /* Return the ix86 builtin for CODE. */
35669 static tree
35670 ix86_builtin_decl (unsigned code, bool)
35672 if (code >= IX86_BUILTIN_MAX)
35673 return error_mark_node;
35675 return ix86_builtins[code];
35678 /* Errors in the source file can cause expand_expr to return const0_rtx
35679 where we expect a vector. To avoid crashing, use one of the vector
35680 clear instructions. */
35681 static rtx
35682 safe_vector_operand (rtx x, machine_mode mode)
35684 if (x == const0_rtx)
35685 x = CONST0_RTX (mode);
35686 return x;
35689 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
35691 static rtx
35692 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
35694 rtx pat;
35695 tree arg0 = CALL_EXPR_ARG (exp, 0);
35696 tree arg1 = CALL_EXPR_ARG (exp, 1);
35697 rtx op0 = expand_normal (arg0);
35698 rtx op1 = expand_normal (arg1);
35699 machine_mode tmode = insn_data[icode].operand[0].mode;
35700 machine_mode mode0 = insn_data[icode].operand[1].mode;
35701 machine_mode mode1 = insn_data[icode].operand[2].mode;
35703 if (VECTOR_MODE_P (mode0))
35704 op0 = safe_vector_operand (op0, mode0);
35705 if (VECTOR_MODE_P (mode1))
35706 op1 = safe_vector_operand (op1, mode1);
35708 if (optimize || !target
35709 || GET_MODE (target) != tmode
35710 || !insn_data[icode].operand[0].predicate (target, tmode))
35711 target = gen_reg_rtx (tmode);
35713 if (GET_MODE (op1) == SImode && mode1 == TImode)
35715 rtx x = gen_reg_rtx (V4SImode);
35716 emit_insn (gen_sse2_loadd (x, op1));
35717 op1 = gen_lowpart (TImode, x);
35720 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35721 op0 = copy_to_mode_reg (mode0, op0);
35722 if (!insn_data[icode].operand[2].predicate (op1, mode1))
35723 op1 = copy_to_mode_reg (mode1, op1);
35725 pat = GEN_FCN (icode) (target, op0, op1);
35726 if (! pat)
35727 return 0;
35729 emit_insn (pat);
35731 return target;
35734 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
35736 static rtx
35737 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
35738 enum ix86_builtin_func_type m_type,
35739 enum rtx_code sub_code)
35741 rtx pat;
35742 int i;
35743 int nargs;
35744 bool comparison_p = false;
35745 bool tf_p = false;
35746 bool last_arg_constant = false;
35747 int num_memory = 0;
35748 struct {
35749 rtx op;
35750 machine_mode mode;
35751 } args[4];
35753 machine_mode tmode = insn_data[icode].operand[0].mode;
35755 switch (m_type)
35757 case MULTI_ARG_4_DF2_DI_I:
35758 case MULTI_ARG_4_DF2_DI_I1:
35759 case MULTI_ARG_4_SF2_SI_I:
35760 case MULTI_ARG_4_SF2_SI_I1:
35761 nargs = 4;
35762 last_arg_constant = true;
35763 break;
35765 case MULTI_ARG_3_SF:
35766 case MULTI_ARG_3_DF:
35767 case MULTI_ARG_3_SF2:
35768 case MULTI_ARG_3_DF2:
35769 case MULTI_ARG_3_DI:
35770 case MULTI_ARG_3_SI:
35771 case MULTI_ARG_3_SI_DI:
35772 case MULTI_ARG_3_HI:
35773 case MULTI_ARG_3_HI_SI:
35774 case MULTI_ARG_3_QI:
35775 case MULTI_ARG_3_DI2:
35776 case MULTI_ARG_3_SI2:
35777 case MULTI_ARG_3_HI2:
35778 case MULTI_ARG_3_QI2:
35779 nargs = 3;
35780 break;
35782 case MULTI_ARG_2_SF:
35783 case MULTI_ARG_2_DF:
35784 case MULTI_ARG_2_DI:
35785 case MULTI_ARG_2_SI:
35786 case MULTI_ARG_2_HI:
35787 case MULTI_ARG_2_QI:
35788 nargs = 2;
35789 break;
35791 case MULTI_ARG_2_DI_IMM:
35792 case MULTI_ARG_2_SI_IMM:
35793 case MULTI_ARG_2_HI_IMM:
35794 case MULTI_ARG_2_QI_IMM:
35795 nargs = 2;
35796 last_arg_constant = true;
35797 break;
35799 case MULTI_ARG_1_SF:
35800 case MULTI_ARG_1_DF:
35801 case MULTI_ARG_1_SF2:
35802 case MULTI_ARG_1_DF2:
35803 case MULTI_ARG_1_DI:
35804 case MULTI_ARG_1_SI:
35805 case MULTI_ARG_1_HI:
35806 case MULTI_ARG_1_QI:
35807 case MULTI_ARG_1_SI_DI:
35808 case MULTI_ARG_1_HI_DI:
35809 case MULTI_ARG_1_HI_SI:
35810 case MULTI_ARG_1_QI_DI:
35811 case MULTI_ARG_1_QI_SI:
35812 case MULTI_ARG_1_QI_HI:
35813 nargs = 1;
35814 break;
35816 case MULTI_ARG_2_DI_CMP:
35817 case MULTI_ARG_2_SI_CMP:
35818 case MULTI_ARG_2_HI_CMP:
35819 case MULTI_ARG_2_QI_CMP:
35820 nargs = 2;
35821 comparison_p = true;
35822 break;
35824 case MULTI_ARG_2_SF_TF:
35825 case MULTI_ARG_2_DF_TF:
35826 case MULTI_ARG_2_DI_TF:
35827 case MULTI_ARG_2_SI_TF:
35828 case MULTI_ARG_2_HI_TF:
35829 case MULTI_ARG_2_QI_TF:
35830 nargs = 2;
35831 tf_p = true;
35832 break;
35834 default:
35835 gcc_unreachable ();
35838 if (optimize || !target
35839 || GET_MODE (target) != tmode
35840 || !insn_data[icode].operand[0].predicate (target, tmode))
35841 target = gen_reg_rtx (tmode);
35843 gcc_assert (nargs <= 4);
35845 for (i = 0; i < nargs; i++)
35847 tree arg = CALL_EXPR_ARG (exp, i);
35848 rtx op = expand_normal (arg);
35849 int adjust = (comparison_p) ? 1 : 0;
35850 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
35852 if (last_arg_constant && i == nargs - 1)
35854 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
35856 enum insn_code new_icode = icode;
35857 switch (icode)
35859 case CODE_FOR_xop_vpermil2v2df3:
35860 case CODE_FOR_xop_vpermil2v4sf3:
35861 case CODE_FOR_xop_vpermil2v4df3:
35862 case CODE_FOR_xop_vpermil2v8sf3:
35863 error ("the last argument must be a 2-bit immediate");
35864 return gen_reg_rtx (tmode);
35865 case CODE_FOR_xop_rotlv2di3:
35866 new_icode = CODE_FOR_rotlv2di3;
35867 goto xop_rotl;
35868 case CODE_FOR_xop_rotlv4si3:
35869 new_icode = CODE_FOR_rotlv4si3;
35870 goto xop_rotl;
35871 case CODE_FOR_xop_rotlv8hi3:
35872 new_icode = CODE_FOR_rotlv8hi3;
35873 goto xop_rotl;
35874 case CODE_FOR_xop_rotlv16qi3:
35875 new_icode = CODE_FOR_rotlv16qi3;
35876 xop_rotl:
35877 if (CONST_INT_P (op))
35879 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
35880 op = GEN_INT (INTVAL (op) & mask);
35881 gcc_checking_assert
35882 (insn_data[icode].operand[i + 1].predicate (op, mode));
35884 else
35886 gcc_checking_assert
35887 (nargs == 2
35888 && insn_data[new_icode].operand[0].mode == tmode
35889 && insn_data[new_icode].operand[1].mode == tmode
35890 && insn_data[new_icode].operand[2].mode == mode
35891 && insn_data[new_icode].operand[0].predicate
35892 == insn_data[icode].operand[0].predicate
35893 && insn_data[new_icode].operand[1].predicate
35894 == insn_data[icode].operand[1].predicate);
35895 icode = new_icode;
35896 goto non_constant;
35898 break;
35899 default:
35900 gcc_unreachable ();
35904 else
35906 non_constant:
35907 if (VECTOR_MODE_P (mode))
35908 op = safe_vector_operand (op, mode);
35910 /* If we aren't optimizing, only allow one memory operand to be
35911 generated. */
35912 if (memory_operand (op, mode))
35913 num_memory++;
35915 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
35917 if (optimize
35918 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
35919 || num_memory > 1)
35920 op = force_reg (mode, op);
35923 args[i].op = op;
35924 args[i].mode = mode;
35927 switch (nargs)
35929 case 1:
35930 pat = GEN_FCN (icode) (target, args[0].op);
35931 break;
35933 case 2:
35934 if (tf_p)
35935 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
35936 GEN_INT ((int)sub_code));
35937 else if (! comparison_p)
35938 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
35939 else
35941 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
35942 args[0].op,
35943 args[1].op);
35945 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
35947 break;
35949 case 3:
35950 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
35951 break;
35953 case 4:
35954 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
35955 break;
35957 default:
35958 gcc_unreachable ();
35961 if (! pat)
35962 return 0;
35964 emit_insn (pat);
35965 return target;
35968 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
35969 insns with vec_merge. */
35971 static rtx
35972 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
35973 rtx target)
35975 rtx pat;
35976 tree arg0 = CALL_EXPR_ARG (exp, 0);
35977 rtx op1, op0 = expand_normal (arg0);
35978 machine_mode tmode = insn_data[icode].operand[0].mode;
35979 machine_mode mode0 = insn_data[icode].operand[1].mode;
35981 if (optimize || !target
35982 || GET_MODE (target) != tmode
35983 || !insn_data[icode].operand[0].predicate (target, tmode))
35984 target = gen_reg_rtx (tmode);
35986 if (VECTOR_MODE_P (mode0))
35987 op0 = safe_vector_operand (op0, mode0);
35989 if ((optimize && !register_operand (op0, mode0))
35990 || !insn_data[icode].operand[1].predicate (op0, mode0))
35991 op0 = copy_to_mode_reg (mode0, op0);
35993 op1 = op0;
35994 if (!insn_data[icode].operand[2].predicate (op1, mode0))
35995 op1 = copy_to_mode_reg (mode0, op1);
35997 pat = GEN_FCN (icode) (target, op0, op1);
35998 if (! pat)
35999 return 0;
36000 emit_insn (pat);
36001 return target;
36004 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
36006 static rtx
36007 ix86_expand_sse_compare (const struct builtin_description *d,
36008 tree exp, rtx target, bool swap)
36010 rtx pat;
36011 tree arg0 = CALL_EXPR_ARG (exp, 0);
36012 tree arg1 = CALL_EXPR_ARG (exp, 1);
36013 rtx op0 = expand_normal (arg0);
36014 rtx op1 = expand_normal (arg1);
36015 rtx op2;
36016 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36017 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36018 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36019 enum rtx_code comparison = d->comparison;
36021 if (VECTOR_MODE_P (mode0))
36022 op0 = safe_vector_operand (op0, mode0);
36023 if (VECTOR_MODE_P (mode1))
36024 op1 = safe_vector_operand (op1, mode1);
36026 /* Swap operands if we have a comparison that isn't available in
36027 hardware. */
36028 if (swap)
36030 rtx tmp = gen_reg_rtx (mode1);
36031 emit_move_insn (tmp, op1);
36032 op1 = op0;
36033 op0 = tmp;
36036 if (optimize || !target
36037 || GET_MODE (target) != tmode
36038 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36039 target = gen_reg_rtx (tmode);
36041 if ((optimize && !register_operand (op0, mode0))
36042 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
36043 op0 = copy_to_mode_reg (mode0, op0);
36044 if ((optimize && !register_operand (op1, mode1))
36045 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
36046 op1 = copy_to_mode_reg (mode1, op1);
36048 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
36049 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36050 if (! pat)
36051 return 0;
36052 emit_insn (pat);
36053 return target;
36056 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
36058 static rtx
36059 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
36060 rtx target)
36062 rtx pat;
36063 tree arg0 = CALL_EXPR_ARG (exp, 0);
36064 tree arg1 = CALL_EXPR_ARG (exp, 1);
36065 rtx op0 = expand_normal (arg0);
36066 rtx op1 = expand_normal (arg1);
36067 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36068 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36069 enum rtx_code comparison = d->comparison;
36071 if (VECTOR_MODE_P (mode0))
36072 op0 = safe_vector_operand (op0, mode0);
36073 if (VECTOR_MODE_P (mode1))
36074 op1 = safe_vector_operand (op1, mode1);
36076 /* Swap operands if we have a comparison that isn't available in
36077 hardware. */
36078 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
36079 std::swap (op1, op0);
36081 target = gen_reg_rtx (SImode);
36082 emit_move_insn (target, const0_rtx);
36083 target = gen_rtx_SUBREG (QImode, target, 0);
36085 if ((optimize && !register_operand (op0, mode0))
36086 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36087 op0 = copy_to_mode_reg (mode0, op0);
36088 if ((optimize && !register_operand (op1, mode1))
36089 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36090 op1 = copy_to_mode_reg (mode1, op1);
36092 pat = GEN_FCN (d->icode) (op0, op1);
36093 if (! pat)
36094 return 0;
36095 emit_insn (pat);
36096 emit_insn (gen_rtx_SET (VOIDmode,
36097 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36098 gen_rtx_fmt_ee (comparison, QImode,
36099 SET_DEST (pat),
36100 const0_rtx)));
36102 return SUBREG_REG (target);
36105 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
36107 static rtx
36108 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
36109 rtx target)
36111 rtx pat;
36112 tree arg0 = CALL_EXPR_ARG (exp, 0);
36113 rtx op1, op0 = expand_normal (arg0);
36114 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36115 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36117 if (optimize || target == 0
36118 || GET_MODE (target) != tmode
36119 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36120 target = gen_reg_rtx (tmode);
36122 if (VECTOR_MODE_P (mode0))
36123 op0 = safe_vector_operand (op0, mode0);
36125 if ((optimize && !register_operand (op0, mode0))
36126 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36127 op0 = copy_to_mode_reg (mode0, op0);
36129 op1 = GEN_INT (d->comparison);
36131 pat = GEN_FCN (d->icode) (target, op0, op1);
36132 if (! pat)
36133 return 0;
36134 emit_insn (pat);
36135 return target;
36138 static rtx
36139 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
36140 tree exp, rtx target)
36142 rtx pat;
36143 tree arg0 = CALL_EXPR_ARG (exp, 0);
36144 tree arg1 = CALL_EXPR_ARG (exp, 1);
36145 rtx op0 = expand_normal (arg0);
36146 rtx op1 = expand_normal (arg1);
36147 rtx op2;
36148 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36149 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36150 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36152 if (optimize || target == 0
36153 || GET_MODE (target) != tmode
36154 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36155 target = gen_reg_rtx (tmode);
36157 op0 = safe_vector_operand (op0, mode0);
36158 op1 = safe_vector_operand (op1, mode1);
36160 if ((optimize && !register_operand (op0, mode0))
36161 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36162 op0 = copy_to_mode_reg (mode0, op0);
36163 if ((optimize && !register_operand (op1, mode1))
36164 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36165 op1 = copy_to_mode_reg (mode1, op1);
36167 op2 = GEN_INT (d->comparison);
36169 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36170 if (! pat)
36171 return 0;
36172 emit_insn (pat);
36173 return target;
36176 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
36178 static rtx
36179 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
36180 rtx target)
36182 rtx pat;
36183 tree arg0 = CALL_EXPR_ARG (exp, 0);
36184 tree arg1 = CALL_EXPR_ARG (exp, 1);
36185 rtx op0 = expand_normal (arg0);
36186 rtx op1 = expand_normal (arg1);
36187 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36188 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36189 enum rtx_code comparison = d->comparison;
36191 if (VECTOR_MODE_P (mode0))
36192 op0 = safe_vector_operand (op0, mode0);
36193 if (VECTOR_MODE_P (mode1))
36194 op1 = safe_vector_operand (op1, mode1);
36196 target = gen_reg_rtx (SImode);
36197 emit_move_insn (target, const0_rtx);
36198 target = gen_rtx_SUBREG (QImode, target, 0);
36200 if ((optimize && !register_operand (op0, mode0))
36201 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36202 op0 = copy_to_mode_reg (mode0, op0);
36203 if ((optimize && !register_operand (op1, mode1))
36204 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36205 op1 = copy_to_mode_reg (mode1, op1);
36207 pat = GEN_FCN (d->icode) (op0, op1);
36208 if (! pat)
36209 return 0;
36210 emit_insn (pat);
36211 emit_insn (gen_rtx_SET (VOIDmode,
36212 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36213 gen_rtx_fmt_ee (comparison, QImode,
36214 SET_DEST (pat),
36215 const0_rtx)));
36217 return SUBREG_REG (target);
36220 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
36222 static rtx
36223 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
36224 tree exp, rtx target)
36226 rtx pat;
36227 tree arg0 = CALL_EXPR_ARG (exp, 0);
36228 tree arg1 = CALL_EXPR_ARG (exp, 1);
36229 tree arg2 = CALL_EXPR_ARG (exp, 2);
36230 tree arg3 = CALL_EXPR_ARG (exp, 3);
36231 tree arg4 = CALL_EXPR_ARG (exp, 4);
36232 rtx scratch0, scratch1;
36233 rtx op0 = expand_normal (arg0);
36234 rtx op1 = expand_normal (arg1);
36235 rtx op2 = expand_normal (arg2);
36236 rtx op3 = expand_normal (arg3);
36237 rtx op4 = expand_normal (arg4);
36238 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
36240 tmode0 = insn_data[d->icode].operand[0].mode;
36241 tmode1 = insn_data[d->icode].operand[1].mode;
36242 modev2 = insn_data[d->icode].operand[2].mode;
36243 modei3 = insn_data[d->icode].operand[3].mode;
36244 modev4 = insn_data[d->icode].operand[4].mode;
36245 modei5 = insn_data[d->icode].operand[5].mode;
36246 modeimm = insn_data[d->icode].operand[6].mode;
36248 if (VECTOR_MODE_P (modev2))
36249 op0 = safe_vector_operand (op0, modev2);
36250 if (VECTOR_MODE_P (modev4))
36251 op2 = safe_vector_operand (op2, modev4);
36253 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36254 op0 = copy_to_mode_reg (modev2, op0);
36255 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
36256 op1 = copy_to_mode_reg (modei3, op1);
36257 if ((optimize && !register_operand (op2, modev4))
36258 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
36259 op2 = copy_to_mode_reg (modev4, op2);
36260 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
36261 op3 = copy_to_mode_reg (modei5, op3);
36263 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
36265 error ("the fifth argument must be an 8-bit immediate");
36266 return const0_rtx;
36269 if (d->code == IX86_BUILTIN_PCMPESTRI128)
36271 if (optimize || !target
36272 || GET_MODE (target) != tmode0
36273 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36274 target = gen_reg_rtx (tmode0);
36276 scratch1 = gen_reg_rtx (tmode1);
36278 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
36280 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
36282 if (optimize || !target
36283 || GET_MODE (target) != tmode1
36284 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36285 target = gen_reg_rtx (tmode1);
36287 scratch0 = gen_reg_rtx (tmode0);
36289 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
36291 else
36293 gcc_assert (d->flag);
36295 scratch0 = gen_reg_rtx (tmode0);
36296 scratch1 = gen_reg_rtx (tmode1);
36298 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
36301 if (! pat)
36302 return 0;
36304 emit_insn (pat);
36306 if (d->flag)
36308 target = gen_reg_rtx (SImode);
36309 emit_move_insn (target, const0_rtx);
36310 target = gen_rtx_SUBREG (QImode, target, 0);
36312 emit_insn
36313 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36314 gen_rtx_fmt_ee (EQ, QImode,
36315 gen_rtx_REG ((machine_mode) d->flag,
36316 FLAGS_REG),
36317 const0_rtx)));
36318 return SUBREG_REG (target);
36320 else
36321 return target;
36325 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
36327 static rtx
36328 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
36329 tree exp, rtx target)
36331 rtx pat;
36332 tree arg0 = CALL_EXPR_ARG (exp, 0);
36333 tree arg1 = CALL_EXPR_ARG (exp, 1);
36334 tree arg2 = CALL_EXPR_ARG (exp, 2);
36335 rtx scratch0, scratch1;
36336 rtx op0 = expand_normal (arg0);
36337 rtx op1 = expand_normal (arg1);
36338 rtx op2 = expand_normal (arg2);
36339 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
36341 tmode0 = insn_data[d->icode].operand[0].mode;
36342 tmode1 = insn_data[d->icode].operand[1].mode;
36343 modev2 = insn_data[d->icode].operand[2].mode;
36344 modev3 = insn_data[d->icode].operand[3].mode;
36345 modeimm = insn_data[d->icode].operand[4].mode;
36347 if (VECTOR_MODE_P (modev2))
36348 op0 = safe_vector_operand (op0, modev2);
36349 if (VECTOR_MODE_P (modev3))
36350 op1 = safe_vector_operand (op1, modev3);
36352 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36353 op0 = copy_to_mode_reg (modev2, op0);
36354 if ((optimize && !register_operand (op1, modev3))
36355 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
36356 op1 = copy_to_mode_reg (modev3, op1);
36358 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
36360 error ("the third argument must be an 8-bit immediate");
36361 return const0_rtx;
36364 if (d->code == IX86_BUILTIN_PCMPISTRI128)
36366 if (optimize || !target
36367 || GET_MODE (target) != tmode0
36368 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36369 target = gen_reg_rtx (tmode0);
36371 scratch1 = gen_reg_rtx (tmode1);
36373 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
36375 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
36377 if (optimize || !target
36378 || GET_MODE (target) != tmode1
36379 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36380 target = gen_reg_rtx (tmode1);
36382 scratch0 = gen_reg_rtx (tmode0);
36384 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
36386 else
36388 gcc_assert (d->flag);
36390 scratch0 = gen_reg_rtx (tmode0);
36391 scratch1 = gen_reg_rtx (tmode1);
36393 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
36396 if (! pat)
36397 return 0;
36399 emit_insn (pat);
36401 if (d->flag)
36403 target = gen_reg_rtx (SImode);
36404 emit_move_insn (target, const0_rtx);
36405 target = gen_rtx_SUBREG (QImode, target, 0);
36407 emit_insn
36408 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36409 gen_rtx_fmt_ee (EQ, QImode,
36410 gen_rtx_REG ((machine_mode) d->flag,
36411 FLAGS_REG),
36412 const0_rtx)));
36413 return SUBREG_REG (target);
36415 else
36416 return target;
36419 /* Subroutine of ix86_expand_builtin to take care of insns with
36420 variable number of operands. */
36422 static rtx
36423 ix86_expand_args_builtin (const struct builtin_description *d,
36424 tree exp, rtx target)
36426 rtx pat, real_target;
36427 unsigned int i, nargs;
36428 unsigned int nargs_constant = 0;
36429 unsigned int mask_pos = 0;
36430 int num_memory = 0;
36431 struct
36433 rtx op;
36434 machine_mode mode;
36435 } args[6];
36436 bool last_arg_count = false;
36437 enum insn_code icode = d->icode;
36438 const struct insn_data_d *insn_p = &insn_data[icode];
36439 machine_mode tmode = insn_p->operand[0].mode;
36440 machine_mode rmode = VOIDmode;
36441 bool swap = false;
36442 enum rtx_code comparison = d->comparison;
36444 switch ((enum ix86_builtin_func_type) d->flag)
36446 case V2DF_FTYPE_V2DF_ROUND:
36447 case V4DF_FTYPE_V4DF_ROUND:
36448 case V4SF_FTYPE_V4SF_ROUND:
36449 case V8SF_FTYPE_V8SF_ROUND:
36450 case V4SI_FTYPE_V4SF_ROUND:
36451 case V8SI_FTYPE_V8SF_ROUND:
36452 return ix86_expand_sse_round (d, exp, target);
36453 case V4SI_FTYPE_V2DF_V2DF_ROUND:
36454 case V8SI_FTYPE_V4DF_V4DF_ROUND:
36455 case V16SI_FTYPE_V8DF_V8DF_ROUND:
36456 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
36457 case INT_FTYPE_V8SF_V8SF_PTEST:
36458 case INT_FTYPE_V4DI_V4DI_PTEST:
36459 case INT_FTYPE_V4DF_V4DF_PTEST:
36460 case INT_FTYPE_V4SF_V4SF_PTEST:
36461 case INT_FTYPE_V2DI_V2DI_PTEST:
36462 case INT_FTYPE_V2DF_V2DF_PTEST:
36463 return ix86_expand_sse_ptest (d, exp, target);
36464 case FLOAT128_FTYPE_FLOAT128:
36465 case FLOAT_FTYPE_FLOAT:
36466 case INT_FTYPE_INT:
36467 case UINT64_FTYPE_INT:
36468 case UINT16_FTYPE_UINT16:
36469 case INT64_FTYPE_INT64:
36470 case INT64_FTYPE_V4SF:
36471 case INT64_FTYPE_V2DF:
36472 case INT_FTYPE_V16QI:
36473 case INT_FTYPE_V8QI:
36474 case INT_FTYPE_V8SF:
36475 case INT_FTYPE_V4DF:
36476 case INT_FTYPE_V4SF:
36477 case INT_FTYPE_V2DF:
36478 case INT_FTYPE_V32QI:
36479 case V16QI_FTYPE_V16QI:
36480 case V8SI_FTYPE_V8SF:
36481 case V8SI_FTYPE_V4SI:
36482 case V8HI_FTYPE_V8HI:
36483 case V8HI_FTYPE_V16QI:
36484 case V8QI_FTYPE_V8QI:
36485 case V8SF_FTYPE_V8SF:
36486 case V8SF_FTYPE_V8SI:
36487 case V8SF_FTYPE_V4SF:
36488 case V8SF_FTYPE_V8HI:
36489 case V4SI_FTYPE_V4SI:
36490 case V4SI_FTYPE_V16QI:
36491 case V4SI_FTYPE_V4SF:
36492 case V4SI_FTYPE_V8SI:
36493 case V4SI_FTYPE_V8HI:
36494 case V4SI_FTYPE_V4DF:
36495 case V4SI_FTYPE_V2DF:
36496 case V4HI_FTYPE_V4HI:
36497 case V4DF_FTYPE_V4DF:
36498 case V4DF_FTYPE_V4SI:
36499 case V4DF_FTYPE_V4SF:
36500 case V4DF_FTYPE_V2DF:
36501 case V4SF_FTYPE_V4SF:
36502 case V4SF_FTYPE_V4SI:
36503 case V4SF_FTYPE_V8SF:
36504 case V4SF_FTYPE_V4DF:
36505 case V4SF_FTYPE_V8HI:
36506 case V4SF_FTYPE_V2DF:
36507 case V2DI_FTYPE_V2DI:
36508 case V2DI_FTYPE_V16QI:
36509 case V2DI_FTYPE_V8HI:
36510 case V2DI_FTYPE_V4SI:
36511 case V2DF_FTYPE_V2DF:
36512 case V2DF_FTYPE_V4SI:
36513 case V2DF_FTYPE_V4DF:
36514 case V2DF_FTYPE_V4SF:
36515 case V2DF_FTYPE_V2SI:
36516 case V2SI_FTYPE_V2SI:
36517 case V2SI_FTYPE_V4SF:
36518 case V2SI_FTYPE_V2SF:
36519 case V2SI_FTYPE_V2DF:
36520 case V2SF_FTYPE_V2SF:
36521 case V2SF_FTYPE_V2SI:
36522 case V32QI_FTYPE_V32QI:
36523 case V32QI_FTYPE_V16QI:
36524 case V16HI_FTYPE_V16HI:
36525 case V16HI_FTYPE_V8HI:
36526 case V8SI_FTYPE_V8SI:
36527 case V16HI_FTYPE_V16QI:
36528 case V8SI_FTYPE_V16QI:
36529 case V4DI_FTYPE_V16QI:
36530 case V8SI_FTYPE_V8HI:
36531 case V4DI_FTYPE_V8HI:
36532 case V4DI_FTYPE_V4SI:
36533 case V4DI_FTYPE_V2DI:
36534 case HI_FTYPE_HI:
36535 case HI_FTYPE_V16QI:
36536 case SI_FTYPE_V32QI:
36537 case DI_FTYPE_V64QI:
36538 case V16QI_FTYPE_HI:
36539 case V32QI_FTYPE_SI:
36540 case V64QI_FTYPE_DI:
36541 case V8HI_FTYPE_QI:
36542 case V16HI_FTYPE_HI:
36543 case V32HI_FTYPE_SI:
36544 case V4SI_FTYPE_QI:
36545 case V8SI_FTYPE_QI:
36546 case V4SI_FTYPE_HI:
36547 case V8SI_FTYPE_HI:
36548 case QI_FTYPE_V8HI:
36549 case HI_FTYPE_V16HI:
36550 case SI_FTYPE_V32HI:
36551 case QI_FTYPE_V4SI:
36552 case QI_FTYPE_V8SI:
36553 case HI_FTYPE_V16SI:
36554 case QI_FTYPE_V2DI:
36555 case QI_FTYPE_V4DI:
36556 case QI_FTYPE_V8DI:
36557 case UINT_FTYPE_V2DF:
36558 case UINT_FTYPE_V4SF:
36559 case UINT64_FTYPE_V2DF:
36560 case UINT64_FTYPE_V4SF:
36561 case V16QI_FTYPE_V8DI:
36562 case V16HI_FTYPE_V16SI:
36563 case V16SI_FTYPE_HI:
36564 case V2DI_FTYPE_QI:
36565 case V4DI_FTYPE_QI:
36566 case V16SI_FTYPE_V16SI:
36567 case V16SI_FTYPE_INT:
36568 case V16SF_FTYPE_FLOAT:
36569 case V16SF_FTYPE_V8SF:
36570 case V16SI_FTYPE_V8SI:
36571 case V16SF_FTYPE_V4SF:
36572 case V16SI_FTYPE_V4SI:
36573 case V16SF_FTYPE_V16SF:
36574 case V8HI_FTYPE_V8DI:
36575 case V8UHI_FTYPE_V8UHI:
36576 case V8SI_FTYPE_V8DI:
36577 case V8SF_FTYPE_V8DF:
36578 case V8DI_FTYPE_QI:
36579 case V8DI_FTYPE_INT64:
36580 case V8DI_FTYPE_V4DI:
36581 case V8DI_FTYPE_V8DI:
36582 case V8DF_FTYPE_DOUBLE:
36583 case V8DF_FTYPE_V4DF:
36584 case V8DF_FTYPE_V2DF:
36585 case V8DF_FTYPE_V8DF:
36586 case V8DF_FTYPE_V8SI:
36587 nargs = 1;
36588 break;
36589 case V4SF_FTYPE_V4SF_VEC_MERGE:
36590 case V2DF_FTYPE_V2DF_VEC_MERGE:
36591 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
36592 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
36593 case V16QI_FTYPE_V16QI_V16QI:
36594 case V16QI_FTYPE_V8HI_V8HI:
36595 case V16SI_FTYPE_V16SI_V16SI:
36596 case V16SF_FTYPE_V16SF_V16SF:
36597 case V16SF_FTYPE_V16SF_V16SI:
36598 case V8QI_FTYPE_V8QI_V8QI:
36599 case V8QI_FTYPE_V4HI_V4HI:
36600 case V8HI_FTYPE_V8HI_V8HI:
36601 case V8HI_FTYPE_V16QI_V16QI:
36602 case V8HI_FTYPE_V4SI_V4SI:
36603 case V8SF_FTYPE_V8SF_V8SF:
36604 case V8SF_FTYPE_V8SF_V8SI:
36605 case V8DI_FTYPE_V8DI_V8DI:
36606 case V8DF_FTYPE_V8DF_V8DF:
36607 case V8DF_FTYPE_V8DF_V8DI:
36608 case V4SI_FTYPE_V4SI_V4SI:
36609 case V4SI_FTYPE_V8HI_V8HI:
36610 case V4SI_FTYPE_V4SF_V4SF:
36611 case V4SI_FTYPE_V2DF_V2DF:
36612 case V4HI_FTYPE_V4HI_V4HI:
36613 case V4HI_FTYPE_V8QI_V8QI:
36614 case V4HI_FTYPE_V2SI_V2SI:
36615 case V4DF_FTYPE_V4DF_V4DF:
36616 case V4DF_FTYPE_V4DF_V4DI:
36617 case V4SF_FTYPE_V4SF_V4SF:
36618 case V4SF_FTYPE_V4SF_V4SI:
36619 case V4SF_FTYPE_V4SF_V2SI:
36620 case V4SF_FTYPE_V4SF_V2DF:
36621 case V4SF_FTYPE_V4SF_UINT:
36622 case V4SF_FTYPE_V4SF_UINT64:
36623 case V4SF_FTYPE_V4SF_DI:
36624 case V4SF_FTYPE_V4SF_SI:
36625 case V2DI_FTYPE_V2DI_V2DI:
36626 case V2DI_FTYPE_V16QI_V16QI:
36627 case V2DI_FTYPE_V4SI_V4SI:
36628 case V2UDI_FTYPE_V4USI_V4USI:
36629 case V2DI_FTYPE_V2DI_V16QI:
36630 case V2DI_FTYPE_V2DF_V2DF:
36631 case V2SI_FTYPE_V2SI_V2SI:
36632 case V2SI_FTYPE_V4HI_V4HI:
36633 case V2SI_FTYPE_V2SF_V2SF:
36634 case V2DF_FTYPE_V2DF_V2DF:
36635 case V2DF_FTYPE_V2DF_V4SF:
36636 case V2DF_FTYPE_V2DF_V2DI:
36637 case V2DF_FTYPE_V2DF_DI:
36638 case V2DF_FTYPE_V2DF_SI:
36639 case V2DF_FTYPE_V2DF_UINT:
36640 case V2DF_FTYPE_V2DF_UINT64:
36641 case V2SF_FTYPE_V2SF_V2SF:
36642 case V1DI_FTYPE_V1DI_V1DI:
36643 case V1DI_FTYPE_V8QI_V8QI:
36644 case V1DI_FTYPE_V2SI_V2SI:
36645 case V32QI_FTYPE_V16HI_V16HI:
36646 case V16HI_FTYPE_V8SI_V8SI:
36647 case V32QI_FTYPE_V32QI_V32QI:
36648 case V16HI_FTYPE_V32QI_V32QI:
36649 case V16HI_FTYPE_V16HI_V16HI:
36650 case V8SI_FTYPE_V4DF_V4DF:
36651 case V8SI_FTYPE_V8SI_V8SI:
36652 case V8SI_FTYPE_V16HI_V16HI:
36653 case V4DI_FTYPE_V4DI_V4DI:
36654 case V4DI_FTYPE_V8SI_V8SI:
36655 case V4UDI_FTYPE_V8USI_V8USI:
36656 case QI_FTYPE_V8DI_V8DI:
36657 case V8DI_FTYPE_V64QI_V64QI:
36658 case HI_FTYPE_V16SI_V16SI:
36659 if (comparison == UNKNOWN)
36660 return ix86_expand_binop_builtin (icode, exp, target);
36661 nargs = 2;
36662 break;
36663 case V4SF_FTYPE_V4SF_V4SF_SWAP:
36664 case V2DF_FTYPE_V2DF_V2DF_SWAP:
36665 gcc_assert (comparison != UNKNOWN);
36666 nargs = 2;
36667 swap = true;
36668 break;
36669 case V16HI_FTYPE_V16HI_V8HI_COUNT:
36670 case V16HI_FTYPE_V16HI_SI_COUNT:
36671 case V8SI_FTYPE_V8SI_V4SI_COUNT:
36672 case V8SI_FTYPE_V8SI_SI_COUNT:
36673 case V4DI_FTYPE_V4DI_V2DI_COUNT:
36674 case V4DI_FTYPE_V4DI_INT_COUNT:
36675 case V8HI_FTYPE_V8HI_V8HI_COUNT:
36676 case V8HI_FTYPE_V8HI_SI_COUNT:
36677 case V4SI_FTYPE_V4SI_V4SI_COUNT:
36678 case V4SI_FTYPE_V4SI_SI_COUNT:
36679 case V4HI_FTYPE_V4HI_V4HI_COUNT:
36680 case V4HI_FTYPE_V4HI_SI_COUNT:
36681 case V2DI_FTYPE_V2DI_V2DI_COUNT:
36682 case V2DI_FTYPE_V2DI_SI_COUNT:
36683 case V2SI_FTYPE_V2SI_V2SI_COUNT:
36684 case V2SI_FTYPE_V2SI_SI_COUNT:
36685 case V1DI_FTYPE_V1DI_V1DI_COUNT:
36686 case V1DI_FTYPE_V1DI_SI_COUNT:
36687 nargs = 2;
36688 last_arg_count = true;
36689 break;
36690 case UINT64_FTYPE_UINT64_UINT64:
36691 case UINT_FTYPE_UINT_UINT:
36692 case UINT_FTYPE_UINT_USHORT:
36693 case UINT_FTYPE_UINT_UCHAR:
36694 case UINT16_FTYPE_UINT16_INT:
36695 case UINT8_FTYPE_UINT8_INT:
36696 case HI_FTYPE_HI_HI:
36697 case SI_FTYPE_SI_SI:
36698 case DI_FTYPE_DI_DI:
36699 case V16SI_FTYPE_V8DF_V8DF:
36700 nargs = 2;
36701 break;
36702 case V2DI_FTYPE_V2DI_INT_CONVERT:
36703 nargs = 2;
36704 rmode = V1TImode;
36705 nargs_constant = 1;
36706 break;
36707 case V4DI_FTYPE_V4DI_INT_CONVERT:
36708 nargs = 2;
36709 rmode = V2TImode;
36710 nargs_constant = 1;
36711 break;
36712 case V8DI_FTYPE_V8DI_INT_CONVERT:
36713 nargs = 2;
36714 rmode = V4TImode;
36715 nargs_constant = 1;
36716 break;
36717 case V8HI_FTYPE_V8HI_INT:
36718 case V8HI_FTYPE_V8SF_INT:
36719 case V16HI_FTYPE_V16SF_INT:
36720 case V8HI_FTYPE_V4SF_INT:
36721 case V8SF_FTYPE_V8SF_INT:
36722 case V4SF_FTYPE_V16SF_INT:
36723 case V16SF_FTYPE_V16SF_INT:
36724 case V4SI_FTYPE_V4SI_INT:
36725 case V4SI_FTYPE_V8SI_INT:
36726 case V4HI_FTYPE_V4HI_INT:
36727 case V4DF_FTYPE_V4DF_INT:
36728 case V4DF_FTYPE_V8DF_INT:
36729 case V4SF_FTYPE_V4SF_INT:
36730 case V4SF_FTYPE_V8SF_INT:
36731 case V2DI_FTYPE_V2DI_INT:
36732 case V2DF_FTYPE_V2DF_INT:
36733 case V2DF_FTYPE_V4DF_INT:
36734 case V16HI_FTYPE_V16HI_INT:
36735 case V8SI_FTYPE_V8SI_INT:
36736 case V16SI_FTYPE_V16SI_INT:
36737 case V4SI_FTYPE_V16SI_INT:
36738 case V4DI_FTYPE_V4DI_INT:
36739 case V2DI_FTYPE_V4DI_INT:
36740 case V4DI_FTYPE_V8DI_INT:
36741 case HI_FTYPE_HI_INT:
36742 case QI_FTYPE_V4SF_INT:
36743 case QI_FTYPE_V2DF_INT:
36744 nargs = 2;
36745 nargs_constant = 1;
36746 break;
36747 case V16QI_FTYPE_V16QI_V16QI_V16QI:
36748 case V8SF_FTYPE_V8SF_V8SF_V8SF:
36749 case V4DF_FTYPE_V4DF_V4DF_V4DF:
36750 case V4SF_FTYPE_V4SF_V4SF_V4SF:
36751 case V2DF_FTYPE_V2DF_V2DF_V2DF:
36752 case V32QI_FTYPE_V32QI_V32QI_V32QI:
36753 case HI_FTYPE_V16SI_V16SI_HI:
36754 case QI_FTYPE_V8DI_V8DI_QI:
36755 case V16HI_FTYPE_V16SI_V16HI_HI:
36756 case V16QI_FTYPE_V16SI_V16QI_HI:
36757 case V16QI_FTYPE_V8DI_V16QI_QI:
36758 case V16SF_FTYPE_V16SF_V16SF_HI:
36759 case V16SF_FTYPE_V16SF_V16SF_V16SF:
36760 case V16SF_FTYPE_V16SF_V16SI_V16SF:
36761 case V16SF_FTYPE_V16SI_V16SF_HI:
36762 case V16SF_FTYPE_V16SI_V16SF_V16SF:
36763 case V16SF_FTYPE_V4SF_V16SF_HI:
36764 case V16SI_FTYPE_SI_V16SI_HI:
36765 case V16SI_FTYPE_V16HI_V16SI_HI:
36766 case V16SI_FTYPE_V16QI_V16SI_HI:
36767 case V16SI_FTYPE_V16SF_V16SI_HI:
36768 case V8SF_FTYPE_V4SF_V8SF_QI:
36769 case V4DF_FTYPE_V2DF_V4DF_QI:
36770 case V8SI_FTYPE_V4SI_V8SI_QI:
36771 case V8SI_FTYPE_SI_V8SI_QI:
36772 case V4SI_FTYPE_V4SI_V4SI_QI:
36773 case V4SI_FTYPE_SI_V4SI_QI:
36774 case V4DI_FTYPE_V2DI_V4DI_QI:
36775 case V4DI_FTYPE_DI_V4DI_QI:
36776 case V2DI_FTYPE_V2DI_V2DI_QI:
36777 case V2DI_FTYPE_DI_V2DI_QI:
36778 case V64QI_FTYPE_V64QI_V64QI_DI:
36779 case V64QI_FTYPE_V16QI_V64QI_DI:
36780 case V64QI_FTYPE_QI_V64QI_DI:
36781 case V32QI_FTYPE_V32QI_V32QI_SI:
36782 case V32QI_FTYPE_V16QI_V32QI_SI:
36783 case V32QI_FTYPE_QI_V32QI_SI:
36784 case V16QI_FTYPE_V16QI_V16QI_HI:
36785 case V16QI_FTYPE_QI_V16QI_HI:
36786 case V32HI_FTYPE_V8HI_V32HI_SI:
36787 case V32HI_FTYPE_HI_V32HI_SI:
36788 case V16HI_FTYPE_V8HI_V16HI_HI:
36789 case V16HI_FTYPE_HI_V16HI_HI:
36790 case V8HI_FTYPE_V8HI_V8HI_QI:
36791 case V8HI_FTYPE_HI_V8HI_QI:
36792 case V8SF_FTYPE_V8HI_V8SF_QI:
36793 case V4SF_FTYPE_V8HI_V4SF_QI:
36794 case V8SI_FTYPE_V8SF_V8SI_QI:
36795 case V4SI_FTYPE_V4SF_V4SI_QI:
36796 case V8DI_FTYPE_V8SF_V8DI_QI:
36797 case V4DI_FTYPE_V4SF_V4DI_QI:
36798 case V2DI_FTYPE_V4SF_V2DI_QI:
36799 case V8SF_FTYPE_V8DI_V8SF_QI:
36800 case V4SF_FTYPE_V4DI_V4SF_QI:
36801 case V4SF_FTYPE_V2DI_V4SF_QI:
36802 case V8DF_FTYPE_V8DI_V8DF_QI:
36803 case V4DF_FTYPE_V4DI_V4DF_QI:
36804 case V2DF_FTYPE_V2DI_V2DF_QI:
36805 case V16QI_FTYPE_V8HI_V16QI_QI:
36806 case V16QI_FTYPE_V16HI_V16QI_HI:
36807 case V16QI_FTYPE_V4SI_V16QI_QI:
36808 case V16QI_FTYPE_V8SI_V16QI_QI:
36809 case V8HI_FTYPE_V4SI_V8HI_QI:
36810 case V8HI_FTYPE_V8SI_V8HI_QI:
36811 case V16QI_FTYPE_V2DI_V16QI_QI:
36812 case V16QI_FTYPE_V4DI_V16QI_QI:
36813 case V8HI_FTYPE_V2DI_V8HI_QI:
36814 case V8HI_FTYPE_V4DI_V8HI_QI:
36815 case V4SI_FTYPE_V2DI_V4SI_QI:
36816 case V4SI_FTYPE_V4DI_V4SI_QI:
36817 case V32QI_FTYPE_V32HI_V32QI_SI:
36818 case HI_FTYPE_V16QI_V16QI_HI:
36819 case SI_FTYPE_V32QI_V32QI_SI:
36820 case DI_FTYPE_V64QI_V64QI_DI:
36821 case QI_FTYPE_V8HI_V8HI_QI:
36822 case HI_FTYPE_V16HI_V16HI_HI:
36823 case SI_FTYPE_V32HI_V32HI_SI:
36824 case QI_FTYPE_V4SI_V4SI_QI:
36825 case QI_FTYPE_V8SI_V8SI_QI:
36826 case QI_FTYPE_V2DI_V2DI_QI:
36827 case QI_FTYPE_V4DI_V4DI_QI:
36828 case V4SF_FTYPE_V2DF_V4SF_QI:
36829 case V4SF_FTYPE_V4DF_V4SF_QI:
36830 case V16SI_FTYPE_V16SI_V16SI_HI:
36831 case V16SI_FTYPE_V16SI_V16SI_V16SI:
36832 case V16SI_FTYPE_V4SI_V16SI_HI:
36833 case V2DI_FTYPE_V2DI_V2DI_V2DI:
36834 case V2DI_FTYPE_V4SI_V2DI_QI:
36835 case V2DI_FTYPE_V8HI_V2DI_QI:
36836 case V2DI_FTYPE_V16QI_V2DI_QI:
36837 case V4DI_FTYPE_V4DI_V4DI_QI:
36838 case V4DI_FTYPE_V4SI_V4DI_QI:
36839 case V4DI_FTYPE_V8HI_V4DI_QI:
36840 case V4DI_FTYPE_V16QI_V4DI_QI:
36841 case V8DI_FTYPE_V8DF_V8DI_QI:
36842 case V4DI_FTYPE_V4DF_V4DI_QI:
36843 case V2DI_FTYPE_V2DF_V2DI_QI:
36844 case V4SI_FTYPE_V4DF_V4SI_QI:
36845 case V4SI_FTYPE_V2DF_V4SI_QI:
36846 case V4SI_FTYPE_V8HI_V4SI_QI:
36847 case V4SI_FTYPE_V16QI_V4SI_QI:
36848 case V8SI_FTYPE_V8SI_V8SI_V8SI:
36849 case V4DI_FTYPE_V4DI_V4DI_V4DI:
36850 case V8DF_FTYPE_V2DF_V8DF_QI:
36851 case V8DF_FTYPE_V4DF_V8DF_QI:
36852 case V8DF_FTYPE_V8DF_V8DF_QI:
36853 case V8DF_FTYPE_V8DF_V8DF_V8DF:
36854 case V8SF_FTYPE_V8SF_V8SF_QI:
36855 case V8SF_FTYPE_V8SI_V8SF_QI:
36856 case V4DF_FTYPE_V4DF_V4DF_QI:
36857 case V4SF_FTYPE_V4SF_V4SF_QI:
36858 case V2DF_FTYPE_V2DF_V2DF_QI:
36859 case V2DF_FTYPE_V4SF_V2DF_QI:
36860 case V2DF_FTYPE_V4SI_V2DF_QI:
36861 case V4SF_FTYPE_V4SI_V4SF_QI:
36862 case V4DF_FTYPE_V4SF_V4DF_QI:
36863 case V4DF_FTYPE_V4SI_V4DF_QI:
36864 case V8SI_FTYPE_V8SI_V8SI_QI:
36865 case V8SI_FTYPE_V8HI_V8SI_QI:
36866 case V8SI_FTYPE_V16QI_V8SI_QI:
36867 case V8DF_FTYPE_V8DF_V8DI_V8DF:
36868 case V8DF_FTYPE_V8DI_V8DF_V8DF:
36869 case V8DF_FTYPE_V8SF_V8DF_QI:
36870 case V8DF_FTYPE_V8SI_V8DF_QI:
36871 case V8DI_FTYPE_DI_V8DI_QI:
36872 case V16SF_FTYPE_V8SF_V16SF_HI:
36873 case V16SI_FTYPE_V8SI_V16SI_HI:
36874 case V16HI_FTYPE_V16HI_V16HI_HI:
36875 case V8HI_FTYPE_V16QI_V8HI_QI:
36876 case V16HI_FTYPE_V16QI_V16HI_HI:
36877 case V32HI_FTYPE_V32HI_V32HI_SI:
36878 case V32HI_FTYPE_V32QI_V32HI_SI:
36879 case V8DI_FTYPE_V16QI_V8DI_QI:
36880 case V8DI_FTYPE_V2DI_V8DI_QI:
36881 case V8DI_FTYPE_V4DI_V8DI_QI:
36882 case V8DI_FTYPE_V8DI_V8DI_QI:
36883 case V8DI_FTYPE_V8DI_V8DI_V8DI:
36884 case V8DI_FTYPE_V8HI_V8DI_QI:
36885 case V8DI_FTYPE_V8SI_V8DI_QI:
36886 case V8HI_FTYPE_V8DI_V8HI_QI:
36887 case V8SF_FTYPE_V8DF_V8SF_QI:
36888 case V8SI_FTYPE_V8DF_V8SI_QI:
36889 case V8SI_FTYPE_V8DI_V8SI_QI:
36890 case V4SI_FTYPE_V4SI_V4SI_V4SI:
36891 nargs = 3;
36892 break;
36893 case V32QI_FTYPE_V32QI_V32QI_INT:
36894 case V16HI_FTYPE_V16HI_V16HI_INT:
36895 case V16QI_FTYPE_V16QI_V16QI_INT:
36896 case V4DI_FTYPE_V4DI_V4DI_INT:
36897 case V8HI_FTYPE_V8HI_V8HI_INT:
36898 case V8SI_FTYPE_V8SI_V8SI_INT:
36899 case V8SI_FTYPE_V8SI_V4SI_INT:
36900 case V8SF_FTYPE_V8SF_V8SF_INT:
36901 case V8SF_FTYPE_V8SF_V4SF_INT:
36902 case V4SI_FTYPE_V4SI_V4SI_INT:
36903 case V4DF_FTYPE_V4DF_V4DF_INT:
36904 case V16SF_FTYPE_V16SF_V16SF_INT:
36905 case V16SF_FTYPE_V16SF_V4SF_INT:
36906 case V16SI_FTYPE_V16SI_V4SI_INT:
36907 case V4DF_FTYPE_V4DF_V2DF_INT:
36908 case V4SF_FTYPE_V4SF_V4SF_INT:
36909 case V2DI_FTYPE_V2DI_V2DI_INT:
36910 case V4DI_FTYPE_V4DI_V2DI_INT:
36911 case V2DF_FTYPE_V2DF_V2DF_INT:
36912 case QI_FTYPE_V8DI_V8DI_INT:
36913 case QI_FTYPE_V8DF_V8DF_INT:
36914 case QI_FTYPE_V2DF_V2DF_INT:
36915 case QI_FTYPE_V4SF_V4SF_INT:
36916 case HI_FTYPE_V16SI_V16SI_INT:
36917 case HI_FTYPE_V16SF_V16SF_INT:
36918 nargs = 3;
36919 nargs_constant = 1;
36920 break;
36921 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
36922 nargs = 3;
36923 rmode = V4DImode;
36924 nargs_constant = 1;
36925 break;
36926 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
36927 nargs = 3;
36928 rmode = V2DImode;
36929 nargs_constant = 1;
36930 break;
36931 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
36932 nargs = 3;
36933 rmode = DImode;
36934 nargs_constant = 1;
36935 break;
36936 case V2DI_FTYPE_V2DI_UINT_UINT:
36937 nargs = 3;
36938 nargs_constant = 2;
36939 break;
36940 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
36941 nargs = 3;
36942 rmode = V8DImode;
36943 nargs_constant = 1;
36944 break;
36945 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
36946 nargs = 5;
36947 rmode = V8DImode;
36948 mask_pos = 2;
36949 nargs_constant = 1;
36950 break;
36951 case QI_FTYPE_V8DF_INT_QI:
36952 case QI_FTYPE_V4DF_INT_QI:
36953 case QI_FTYPE_V2DF_INT_QI:
36954 case HI_FTYPE_V16SF_INT_HI:
36955 case QI_FTYPE_V8SF_INT_QI:
36956 case QI_FTYPE_V4SF_INT_QI:
36957 nargs = 3;
36958 mask_pos = 1;
36959 nargs_constant = 1;
36960 break;
36961 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
36962 nargs = 5;
36963 rmode = V4DImode;
36964 mask_pos = 2;
36965 nargs_constant = 1;
36966 break;
36967 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
36968 nargs = 5;
36969 rmode = V2DImode;
36970 mask_pos = 2;
36971 nargs_constant = 1;
36972 break;
36973 case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
36974 case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
36975 case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
36976 case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
36977 case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
36978 case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
36979 case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
36980 case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
36981 case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
36982 case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
36983 case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
36984 case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
36985 case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
36986 case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
36987 case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
36988 case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
36989 case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
36990 case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
36991 case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
36992 case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
36993 case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
36994 case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
36995 case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
36996 case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
36997 case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
36998 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
36999 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
37000 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
37001 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
37002 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
37003 case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
37004 case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
37005 case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
37006 case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
37007 case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
37008 case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
37009 case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
37010 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
37011 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
37012 case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
37013 case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
37014 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
37015 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
37016 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
37017 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
37018 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
37019 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
37020 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
37021 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
37022 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
37023 case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
37024 case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
37025 case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
37026 case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
37027 nargs = 4;
37028 break;
37029 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
37030 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
37031 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
37032 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
37033 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
37034 nargs = 4;
37035 nargs_constant = 1;
37036 break;
37037 case QI_FTYPE_V4DI_V4DI_INT_QI:
37038 case QI_FTYPE_V8SI_V8SI_INT_QI:
37039 case QI_FTYPE_V4DF_V4DF_INT_QI:
37040 case QI_FTYPE_V8SF_V8SF_INT_QI:
37041 case QI_FTYPE_V2DI_V2DI_INT_QI:
37042 case QI_FTYPE_V4SI_V4SI_INT_QI:
37043 case QI_FTYPE_V2DF_V2DF_INT_QI:
37044 case QI_FTYPE_V4SF_V4SF_INT_QI:
37045 case DI_FTYPE_V64QI_V64QI_INT_DI:
37046 case SI_FTYPE_V32QI_V32QI_INT_SI:
37047 case HI_FTYPE_V16QI_V16QI_INT_HI:
37048 case SI_FTYPE_V32HI_V32HI_INT_SI:
37049 case HI_FTYPE_V16HI_V16HI_INT_HI:
37050 case QI_FTYPE_V8HI_V8HI_INT_QI:
37051 nargs = 4;
37052 mask_pos = 1;
37053 nargs_constant = 1;
37054 break;
37055 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
37056 nargs = 4;
37057 nargs_constant = 2;
37058 break;
37059 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
37060 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
37061 nargs = 4;
37062 break;
37063 case QI_FTYPE_V8DI_V8DI_INT_QI:
37064 case HI_FTYPE_V16SI_V16SI_INT_HI:
37065 case QI_FTYPE_V8DF_V8DF_INT_QI:
37066 case HI_FTYPE_V16SF_V16SF_INT_HI:
37067 mask_pos = 1;
37068 nargs = 4;
37069 nargs_constant = 1;
37070 break;
37071 case V8SF_FTYPE_V8SF_INT_V8SF_QI:
37072 case V4SF_FTYPE_V4SF_INT_V4SF_QI:
37073 case V2DF_FTYPE_V4DF_INT_V2DF_QI:
37074 case V2DI_FTYPE_V4DI_INT_V2DI_QI:
37075 case V8SF_FTYPE_V16SF_INT_V8SF_QI:
37076 case V8SI_FTYPE_V16SI_INT_V8SI_QI:
37077 case V2DF_FTYPE_V8DF_INT_V2DF_QI:
37078 case V2DI_FTYPE_V8DI_INT_V2DI_QI:
37079 case V4SF_FTYPE_V8SF_INT_V4SF_QI:
37080 case V4SI_FTYPE_V8SI_INT_V4SI_QI:
37081 case V8HI_FTYPE_V8SF_INT_V8HI_QI:
37082 case V8HI_FTYPE_V4SF_INT_V8HI_QI:
37083 case V32HI_FTYPE_V32HI_INT_V32HI_SI:
37084 case V16HI_FTYPE_V16HI_INT_V16HI_HI:
37085 case V8HI_FTYPE_V8HI_INT_V8HI_QI:
37086 case V4DI_FTYPE_V4DI_INT_V4DI_QI:
37087 case V2DI_FTYPE_V2DI_INT_V2DI_QI:
37088 case V8SI_FTYPE_V8SI_INT_V8SI_QI:
37089 case V4SI_FTYPE_V4SI_INT_V4SI_QI:
37090 case V4DF_FTYPE_V4DF_INT_V4DF_QI:
37091 case V2DF_FTYPE_V2DF_INT_V2DF_QI:
37092 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
37093 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
37094 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
37095 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
37096 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
37097 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
37098 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
37099 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
37100 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
37101 nargs = 4;
37102 mask_pos = 2;
37103 nargs_constant = 1;
37104 break;
37105 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
37106 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
37107 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
37108 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
37109 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
37110 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
37111 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
37112 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
37113 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
37114 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
37115 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
37116 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
37117 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
37118 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
37119 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
37120 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
37121 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
37122 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
37123 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
37124 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
37125 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
37126 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
37127 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
37128 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
37129 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
37130 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
37131 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
37132 nargs = 5;
37133 mask_pos = 2;
37134 nargs_constant = 1;
37135 break;
37136 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
37137 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
37138 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
37139 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
37140 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
37141 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
37142 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
37143 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
37144 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
37145 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
37146 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
37147 nargs = 5;
37148 nargs = 5;
37149 mask_pos = 1;
37150 nargs_constant = 1;
37151 break;
37153 default:
37154 gcc_unreachable ();
37157 gcc_assert (nargs <= ARRAY_SIZE (args));
37159 if (comparison != UNKNOWN)
37161 gcc_assert (nargs == 2);
37162 return ix86_expand_sse_compare (d, exp, target, swap);
37165 if (rmode == VOIDmode || rmode == tmode)
37167 if (optimize
37168 || target == 0
37169 || GET_MODE (target) != tmode
37170 || !insn_p->operand[0].predicate (target, tmode))
37171 target = gen_reg_rtx (tmode);
37172 real_target = target;
37174 else
37176 real_target = gen_reg_rtx (tmode);
37177 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
37180 for (i = 0; i < nargs; i++)
37182 tree arg = CALL_EXPR_ARG (exp, i);
37183 rtx op = expand_normal (arg);
37184 machine_mode mode = insn_p->operand[i + 1].mode;
37185 bool match = insn_p->operand[i + 1].predicate (op, mode);
37187 if (last_arg_count && (i + 1) == nargs)
37189 /* SIMD shift insns take either an 8-bit immediate or
37190 register as count. But builtin functions take int as
37191 count. If count doesn't match, we put it in register. */
37192 if (!match)
37194 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
37195 if (!insn_p->operand[i + 1].predicate (op, mode))
37196 op = copy_to_reg (op);
37199 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37200 (!mask_pos && (nargs - i) <= nargs_constant))
37202 if (!match)
37203 switch (icode)
37205 case CODE_FOR_avx_vinsertf128v4di:
37206 case CODE_FOR_avx_vextractf128v4di:
37207 error ("the last argument must be an 1-bit immediate");
37208 return const0_rtx;
37210 case CODE_FOR_avx512f_cmpv8di3_mask:
37211 case CODE_FOR_avx512f_cmpv16si3_mask:
37212 case CODE_FOR_avx512f_ucmpv8di3_mask:
37213 case CODE_FOR_avx512f_ucmpv16si3_mask:
37214 case CODE_FOR_avx512vl_cmpv4di3_mask:
37215 case CODE_FOR_avx512vl_cmpv8si3_mask:
37216 case CODE_FOR_avx512vl_ucmpv4di3_mask:
37217 case CODE_FOR_avx512vl_ucmpv8si3_mask:
37218 case CODE_FOR_avx512vl_cmpv2di3_mask:
37219 case CODE_FOR_avx512vl_cmpv4si3_mask:
37220 case CODE_FOR_avx512vl_ucmpv2di3_mask:
37221 case CODE_FOR_avx512vl_ucmpv4si3_mask:
37222 error ("the last argument must be a 3-bit immediate");
37223 return const0_rtx;
37225 case CODE_FOR_sse4_1_roundsd:
37226 case CODE_FOR_sse4_1_roundss:
37228 case CODE_FOR_sse4_1_roundpd:
37229 case CODE_FOR_sse4_1_roundps:
37230 case CODE_FOR_avx_roundpd256:
37231 case CODE_FOR_avx_roundps256:
37233 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
37234 case CODE_FOR_sse4_1_roundps_sfix:
37235 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
37236 case CODE_FOR_avx_roundps_sfix256:
37238 case CODE_FOR_sse4_1_blendps:
37239 case CODE_FOR_avx_blendpd256:
37240 case CODE_FOR_avx_vpermilv4df:
37241 case CODE_FOR_avx_vpermilv4df_mask:
37242 case CODE_FOR_avx512f_getmantv8df_mask:
37243 case CODE_FOR_avx512f_getmantv16sf_mask:
37244 case CODE_FOR_avx512vl_getmantv8sf_mask:
37245 case CODE_FOR_avx512vl_getmantv4df_mask:
37246 case CODE_FOR_avx512vl_getmantv4sf_mask:
37247 case CODE_FOR_avx512vl_getmantv2df_mask:
37248 case CODE_FOR_avx512dq_rangepv8df_mask_round:
37249 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
37250 case CODE_FOR_avx512dq_rangepv4df_mask:
37251 case CODE_FOR_avx512dq_rangepv8sf_mask:
37252 case CODE_FOR_avx512dq_rangepv2df_mask:
37253 case CODE_FOR_avx512dq_rangepv4sf_mask:
37254 case CODE_FOR_avx_shufpd256_mask:
37255 error ("the last argument must be a 4-bit immediate");
37256 return const0_rtx;
37258 case CODE_FOR_sha1rnds4:
37259 case CODE_FOR_sse4_1_blendpd:
37260 case CODE_FOR_avx_vpermilv2df:
37261 case CODE_FOR_avx_vpermilv2df_mask:
37262 case CODE_FOR_xop_vpermil2v2df3:
37263 case CODE_FOR_xop_vpermil2v4sf3:
37264 case CODE_FOR_xop_vpermil2v4df3:
37265 case CODE_FOR_xop_vpermil2v8sf3:
37266 case CODE_FOR_avx512f_vinsertf32x4_mask:
37267 case CODE_FOR_avx512f_vinserti32x4_mask:
37268 case CODE_FOR_avx512f_vextractf32x4_mask:
37269 case CODE_FOR_avx512f_vextracti32x4_mask:
37270 case CODE_FOR_sse2_shufpd:
37271 case CODE_FOR_sse2_shufpd_mask:
37272 case CODE_FOR_avx512dq_shuf_f64x2_mask:
37273 case CODE_FOR_avx512dq_shuf_i64x2_mask:
37274 case CODE_FOR_avx512vl_shuf_i32x4_mask:
37275 case CODE_FOR_avx512vl_shuf_f32x4_mask:
37276 error ("the last argument must be a 2-bit immediate");
37277 return const0_rtx;
37279 case CODE_FOR_avx_vextractf128v4df:
37280 case CODE_FOR_avx_vextractf128v8sf:
37281 case CODE_FOR_avx_vextractf128v8si:
37282 case CODE_FOR_avx_vinsertf128v4df:
37283 case CODE_FOR_avx_vinsertf128v8sf:
37284 case CODE_FOR_avx_vinsertf128v8si:
37285 case CODE_FOR_avx512f_vinsertf64x4_mask:
37286 case CODE_FOR_avx512f_vinserti64x4_mask:
37287 case CODE_FOR_avx512f_vextractf64x4_mask:
37288 case CODE_FOR_avx512f_vextracti64x4_mask:
37289 case CODE_FOR_avx512dq_vinsertf32x8_mask:
37290 case CODE_FOR_avx512dq_vinserti32x8_mask:
37291 case CODE_FOR_avx512vl_vinsertv4df:
37292 case CODE_FOR_avx512vl_vinsertv4di:
37293 case CODE_FOR_avx512vl_vinsertv8sf:
37294 case CODE_FOR_avx512vl_vinsertv8si:
37295 error ("the last argument must be a 1-bit immediate");
37296 return const0_rtx;
37298 case CODE_FOR_avx_vmcmpv2df3:
37299 case CODE_FOR_avx_vmcmpv4sf3:
37300 case CODE_FOR_avx_cmpv2df3:
37301 case CODE_FOR_avx_cmpv4sf3:
37302 case CODE_FOR_avx_cmpv4df3:
37303 case CODE_FOR_avx_cmpv8sf3:
37304 case CODE_FOR_avx512f_cmpv8df3_mask:
37305 case CODE_FOR_avx512f_cmpv16sf3_mask:
37306 case CODE_FOR_avx512f_vmcmpv2df3_mask:
37307 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
37308 error ("the last argument must be a 5-bit immediate");
37309 return const0_rtx;
37311 default:
37312 switch (nargs_constant)
37314 case 2:
37315 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37316 (!mask_pos && (nargs - i) == nargs_constant))
37318 error ("the next to last argument must be an 8-bit immediate");
37319 break;
37321 case 1:
37322 error ("the last argument must be an 8-bit immediate");
37323 break;
37324 default:
37325 gcc_unreachable ();
37327 return const0_rtx;
37330 else
37332 if (VECTOR_MODE_P (mode))
37333 op = safe_vector_operand (op, mode);
37335 /* If we aren't optimizing, only allow one memory operand to
37336 be generated. */
37337 if (memory_operand (op, mode))
37338 num_memory++;
37340 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37342 if (optimize || !match || num_memory > 1)
37343 op = copy_to_mode_reg (mode, op);
37345 else
37347 op = copy_to_reg (op);
37348 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37352 args[i].op = op;
37353 args[i].mode = mode;
37356 switch (nargs)
37358 case 1:
37359 pat = GEN_FCN (icode) (real_target, args[0].op);
37360 break;
37361 case 2:
37362 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
37363 break;
37364 case 3:
37365 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37366 args[2].op);
37367 break;
37368 case 4:
37369 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37370 args[2].op, args[3].op);
37371 break;
37372 case 5:
37373 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37374 args[2].op, args[3].op, args[4].op);
37375 case 6:
37376 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37377 args[2].op, args[3].op, args[4].op,
37378 args[5].op);
37379 break;
37380 default:
37381 gcc_unreachable ();
37384 if (! pat)
37385 return 0;
37387 emit_insn (pat);
37388 return target;
37391 /* Transform pattern of following layout:
37392 (parallel [
37393 set (A B)
37394 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
37396 into:
37397 (set (A B))
37400 (parallel [ A B
37402 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
37405 into:
37406 (parallel [ A B ... ]) */
37408 static rtx
37409 ix86_erase_embedded_rounding (rtx pat)
37411 if (GET_CODE (pat) == INSN)
37412 pat = PATTERN (pat);
37414 gcc_assert (GET_CODE (pat) == PARALLEL);
37416 if (XVECLEN (pat, 0) == 2)
37418 rtx p0 = XVECEXP (pat, 0, 0);
37419 rtx p1 = XVECEXP (pat, 0, 1);
37421 gcc_assert (GET_CODE (p0) == SET
37422 && GET_CODE (p1) == UNSPEC
37423 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
37425 return p0;
37427 else
37429 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
37430 int i = 0;
37431 int j = 0;
37433 for (; i < XVECLEN (pat, 0); ++i)
37435 rtx elem = XVECEXP (pat, 0, i);
37436 if (GET_CODE (elem) != UNSPEC
37437 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
37438 res [j++] = elem;
37441 /* No more than 1 occurence was removed. */
37442 gcc_assert (j >= XVECLEN (pat, 0) - 1);
37444 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
37448 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
37449 with rounding. */
37450 static rtx
37451 ix86_expand_sse_comi_round (const struct builtin_description *d,
37452 tree exp, rtx target)
37454 rtx pat, set_dst;
37455 tree arg0 = CALL_EXPR_ARG (exp, 0);
37456 tree arg1 = CALL_EXPR_ARG (exp, 1);
37457 tree arg2 = CALL_EXPR_ARG (exp, 2);
37458 tree arg3 = CALL_EXPR_ARG (exp, 3);
37459 rtx op0 = expand_normal (arg0);
37460 rtx op1 = expand_normal (arg1);
37461 rtx op2 = expand_normal (arg2);
37462 rtx op3 = expand_normal (arg3);
37463 enum insn_code icode = d->icode;
37464 const struct insn_data_d *insn_p = &insn_data[icode];
37465 machine_mode mode0 = insn_p->operand[0].mode;
37466 machine_mode mode1 = insn_p->operand[1].mode;
37467 enum rtx_code comparison = UNEQ;
37468 bool need_ucomi = false;
37470 /* See avxintrin.h for values. */
37471 enum rtx_code comi_comparisons[32] =
37473 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
37474 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
37475 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
37477 bool need_ucomi_values[32] =
37479 true, false, false, true, true, false, false, true,
37480 true, false, false, true, true, false, false, true,
37481 false, true, true, false, false, true, true, false,
37482 false, true, true, false, false, true, true, false
37485 if (!CONST_INT_P (op2))
37487 error ("the third argument must be comparison constant");
37488 return const0_rtx;
37490 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
37492 error ("incorect comparison mode");
37493 return const0_rtx;
37496 if (!insn_p->operand[2].predicate (op3, SImode))
37498 error ("incorrect rounding operand");
37499 return const0_rtx;
37502 comparison = comi_comparisons[INTVAL (op2)];
37503 need_ucomi = need_ucomi_values[INTVAL (op2)];
37505 if (VECTOR_MODE_P (mode0))
37506 op0 = safe_vector_operand (op0, mode0);
37507 if (VECTOR_MODE_P (mode1))
37508 op1 = safe_vector_operand (op1, mode1);
37510 target = gen_reg_rtx (SImode);
37511 emit_move_insn (target, const0_rtx);
37512 target = gen_rtx_SUBREG (QImode, target, 0);
37514 if ((optimize && !register_operand (op0, mode0))
37515 || !insn_p->operand[0].predicate (op0, mode0))
37516 op0 = copy_to_mode_reg (mode0, op0);
37517 if ((optimize && !register_operand (op1, mode1))
37518 || !insn_p->operand[1].predicate (op1, mode1))
37519 op1 = copy_to_mode_reg (mode1, op1);
37521 if (need_ucomi)
37522 icode = icode == CODE_FOR_sse_comi_round
37523 ? CODE_FOR_sse_ucomi_round
37524 : CODE_FOR_sse2_ucomi_round;
37526 pat = GEN_FCN (icode) (op0, op1, op3);
37527 if (! pat)
37528 return 0;
37530 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
37531 if (INTVAL (op3) == NO_ROUND)
37533 pat = ix86_erase_embedded_rounding (pat);
37534 if (! pat)
37535 return 0;
37537 set_dst = SET_DEST (pat);
37539 else
37541 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
37542 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
37545 emit_insn (pat);
37546 emit_insn (gen_rtx_SET (VOIDmode,
37547 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37548 gen_rtx_fmt_ee (comparison, QImode,
37549 set_dst,
37550 const0_rtx)));
37552 return SUBREG_REG (target);
37555 static rtx
37556 ix86_expand_round_builtin (const struct builtin_description *d,
37557 tree exp, rtx target)
37559 rtx pat;
37560 unsigned int i, nargs;
37561 struct
37563 rtx op;
37564 machine_mode mode;
37565 } args[6];
37566 enum insn_code icode = d->icode;
37567 const struct insn_data_d *insn_p = &insn_data[icode];
37568 machine_mode tmode = insn_p->operand[0].mode;
37569 unsigned int nargs_constant = 0;
37570 unsigned int redundant_embed_rnd = 0;
37572 switch ((enum ix86_builtin_func_type) d->flag)
37574 case UINT64_FTYPE_V2DF_INT:
37575 case UINT64_FTYPE_V4SF_INT:
37576 case UINT_FTYPE_V2DF_INT:
37577 case UINT_FTYPE_V4SF_INT:
37578 case INT64_FTYPE_V2DF_INT:
37579 case INT64_FTYPE_V4SF_INT:
37580 case INT_FTYPE_V2DF_INT:
37581 case INT_FTYPE_V4SF_INT:
37582 nargs = 2;
37583 break;
37584 case V4SF_FTYPE_V4SF_UINT_INT:
37585 case V4SF_FTYPE_V4SF_UINT64_INT:
37586 case V2DF_FTYPE_V2DF_UINT64_INT:
37587 case V4SF_FTYPE_V4SF_INT_INT:
37588 case V4SF_FTYPE_V4SF_INT64_INT:
37589 case V2DF_FTYPE_V2DF_INT64_INT:
37590 case V4SF_FTYPE_V4SF_V4SF_INT:
37591 case V2DF_FTYPE_V2DF_V2DF_INT:
37592 case V4SF_FTYPE_V4SF_V2DF_INT:
37593 case V2DF_FTYPE_V2DF_V4SF_INT:
37594 nargs = 3;
37595 break;
37596 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
37597 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
37598 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
37599 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
37600 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
37601 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
37602 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
37603 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
37604 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
37605 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
37606 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
37607 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
37608 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
37609 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
37610 nargs = 4;
37611 break;
37612 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
37613 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
37614 nargs_constant = 2;
37615 nargs = 4;
37616 break;
37617 case INT_FTYPE_V4SF_V4SF_INT_INT:
37618 case INT_FTYPE_V2DF_V2DF_INT_INT:
37619 return ix86_expand_sse_comi_round (d, exp, target);
37620 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
37621 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
37622 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
37623 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
37624 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
37625 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
37626 nargs = 5;
37627 break;
37628 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
37629 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
37630 nargs_constant = 4;
37631 nargs = 5;
37632 break;
37633 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
37634 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
37635 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
37636 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
37637 nargs_constant = 3;
37638 nargs = 5;
37639 break;
37640 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
37641 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
37642 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
37643 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
37644 nargs = 6;
37645 nargs_constant = 4;
37646 break;
37647 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
37648 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
37649 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
37650 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
37651 nargs = 6;
37652 nargs_constant = 3;
37653 break;
37654 default:
37655 gcc_unreachable ();
37657 gcc_assert (nargs <= ARRAY_SIZE (args));
37659 if (optimize
37660 || target == 0
37661 || GET_MODE (target) != tmode
37662 || !insn_p->operand[0].predicate (target, tmode))
37663 target = gen_reg_rtx (tmode);
37665 for (i = 0; i < nargs; i++)
37667 tree arg = CALL_EXPR_ARG (exp, i);
37668 rtx op = expand_normal (arg);
37669 machine_mode mode = insn_p->operand[i + 1].mode;
37670 bool match = insn_p->operand[i + 1].predicate (op, mode);
37672 if (i == nargs - nargs_constant)
37674 if (!match)
37676 switch (icode)
37678 case CODE_FOR_avx512f_getmantv8df_mask_round:
37679 case CODE_FOR_avx512f_getmantv16sf_mask_round:
37680 case CODE_FOR_avx512f_vgetmantv2df_round:
37681 case CODE_FOR_avx512f_vgetmantv4sf_round:
37682 error ("the immediate argument must be a 4-bit immediate");
37683 return const0_rtx;
37684 case CODE_FOR_avx512f_cmpv8df3_mask_round:
37685 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
37686 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
37687 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
37688 error ("the immediate argument must be a 5-bit immediate");
37689 return const0_rtx;
37690 default:
37691 error ("the immediate argument must be an 8-bit immediate");
37692 return const0_rtx;
37696 else if (i == nargs-1)
37698 if (!insn_p->operand[nargs].predicate (op, SImode))
37700 error ("incorrect rounding operand");
37701 return const0_rtx;
37704 /* If there is no rounding use normal version of the pattern. */
37705 if (INTVAL (op) == NO_ROUND)
37706 redundant_embed_rnd = 1;
37708 else
37710 if (VECTOR_MODE_P (mode))
37711 op = safe_vector_operand (op, mode);
37713 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37715 if (optimize || !match)
37716 op = copy_to_mode_reg (mode, op);
37718 else
37720 op = copy_to_reg (op);
37721 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37725 args[i].op = op;
37726 args[i].mode = mode;
37729 switch (nargs)
37731 case 1:
37732 pat = GEN_FCN (icode) (target, args[0].op);
37733 break;
37734 case 2:
37735 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37736 break;
37737 case 3:
37738 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37739 args[2].op);
37740 break;
37741 case 4:
37742 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37743 args[2].op, args[3].op);
37744 break;
37745 case 5:
37746 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37747 args[2].op, args[3].op, args[4].op);
37748 case 6:
37749 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37750 args[2].op, args[3].op, args[4].op,
37751 args[5].op);
37752 break;
37753 default:
37754 gcc_unreachable ();
37757 if (!pat)
37758 return 0;
37760 if (redundant_embed_rnd)
37761 pat = ix86_erase_embedded_rounding (pat);
37763 emit_insn (pat);
37764 return target;
37767 /* Subroutine of ix86_expand_builtin to take care of special insns
37768 with variable number of operands. */
37770 static rtx
37771 ix86_expand_special_args_builtin (const struct builtin_description *d,
37772 tree exp, rtx target)
37774 tree arg;
37775 rtx pat, op;
37776 unsigned int i, nargs, arg_adjust, memory;
37777 bool aligned_mem = false;
37778 struct
37780 rtx op;
37781 machine_mode mode;
37782 } args[3];
37783 enum insn_code icode = d->icode;
37784 bool last_arg_constant = false;
37785 const struct insn_data_d *insn_p = &insn_data[icode];
37786 machine_mode tmode = insn_p->operand[0].mode;
37787 enum { load, store } klass;
37789 switch ((enum ix86_builtin_func_type) d->flag)
37791 case VOID_FTYPE_VOID:
37792 emit_insn (GEN_FCN (icode) (target));
37793 return 0;
37794 case VOID_FTYPE_UINT64:
37795 case VOID_FTYPE_UNSIGNED:
37796 nargs = 0;
37797 klass = store;
37798 memory = 0;
37799 break;
37801 case INT_FTYPE_VOID:
37802 case USHORT_FTYPE_VOID:
37803 case UINT64_FTYPE_VOID:
37804 case UNSIGNED_FTYPE_VOID:
37805 nargs = 0;
37806 klass = load;
37807 memory = 0;
37808 break;
37809 case UINT64_FTYPE_PUNSIGNED:
37810 case V2DI_FTYPE_PV2DI:
37811 case V4DI_FTYPE_PV4DI:
37812 case V32QI_FTYPE_PCCHAR:
37813 case V16QI_FTYPE_PCCHAR:
37814 case V8SF_FTYPE_PCV4SF:
37815 case V8SF_FTYPE_PCFLOAT:
37816 case V4SF_FTYPE_PCFLOAT:
37817 case V4DF_FTYPE_PCV2DF:
37818 case V4DF_FTYPE_PCDOUBLE:
37819 case V2DF_FTYPE_PCDOUBLE:
37820 case VOID_FTYPE_PVOID:
37821 case V16SI_FTYPE_PV4SI:
37822 case V16SF_FTYPE_PV4SF:
37823 case V8DI_FTYPE_PV4DI:
37824 case V8DI_FTYPE_PV8DI:
37825 case V8DF_FTYPE_PV4DF:
37826 nargs = 1;
37827 klass = load;
37828 memory = 0;
37829 switch (icode)
37831 case CODE_FOR_sse4_1_movntdqa:
37832 case CODE_FOR_avx2_movntdqa:
37833 case CODE_FOR_avx512f_movntdqa:
37834 aligned_mem = true;
37835 break;
37836 default:
37837 break;
37839 break;
37840 case VOID_FTYPE_PV2SF_V4SF:
37841 case VOID_FTYPE_PV8DI_V8DI:
37842 case VOID_FTYPE_PV4DI_V4DI:
37843 case VOID_FTYPE_PV2DI_V2DI:
37844 case VOID_FTYPE_PCHAR_V32QI:
37845 case VOID_FTYPE_PCHAR_V16QI:
37846 case VOID_FTYPE_PFLOAT_V16SF:
37847 case VOID_FTYPE_PFLOAT_V8SF:
37848 case VOID_FTYPE_PFLOAT_V4SF:
37849 case VOID_FTYPE_PDOUBLE_V8DF:
37850 case VOID_FTYPE_PDOUBLE_V4DF:
37851 case VOID_FTYPE_PDOUBLE_V2DF:
37852 case VOID_FTYPE_PLONGLONG_LONGLONG:
37853 case VOID_FTYPE_PULONGLONG_ULONGLONG:
37854 case VOID_FTYPE_PINT_INT:
37855 nargs = 1;
37856 klass = store;
37857 /* Reserve memory operand for target. */
37858 memory = ARRAY_SIZE (args);
37859 switch (icode)
37861 /* These builtins and instructions require the memory
37862 to be properly aligned. */
37863 case CODE_FOR_avx_movntv4di:
37864 case CODE_FOR_sse2_movntv2di:
37865 case CODE_FOR_avx_movntv8sf:
37866 case CODE_FOR_sse_movntv4sf:
37867 case CODE_FOR_sse4a_vmmovntv4sf:
37868 case CODE_FOR_avx_movntv4df:
37869 case CODE_FOR_sse2_movntv2df:
37870 case CODE_FOR_sse4a_vmmovntv2df:
37871 case CODE_FOR_sse2_movntidi:
37872 case CODE_FOR_sse_movntq:
37873 case CODE_FOR_sse2_movntisi:
37874 case CODE_FOR_avx512f_movntv16sf:
37875 case CODE_FOR_avx512f_movntv8df:
37876 case CODE_FOR_avx512f_movntv8di:
37877 aligned_mem = true;
37878 break;
37879 default:
37880 break;
37882 break;
37883 case V4SF_FTYPE_V4SF_PCV2SF:
37884 case V2DF_FTYPE_V2DF_PCDOUBLE:
37885 nargs = 2;
37886 klass = load;
37887 memory = 1;
37888 break;
37889 case V8SF_FTYPE_PCV8SF_V8SI:
37890 case V4DF_FTYPE_PCV4DF_V4DI:
37891 case V4SF_FTYPE_PCV4SF_V4SI:
37892 case V2DF_FTYPE_PCV2DF_V2DI:
37893 case V8SI_FTYPE_PCV8SI_V8SI:
37894 case V4DI_FTYPE_PCV4DI_V4DI:
37895 case V4SI_FTYPE_PCV4SI_V4SI:
37896 case V2DI_FTYPE_PCV2DI_V2DI:
37897 nargs = 2;
37898 klass = load;
37899 memory = 0;
37900 break;
37901 case VOID_FTYPE_PV8DF_V8DF_QI:
37902 case VOID_FTYPE_PV16SF_V16SF_HI:
37903 case VOID_FTYPE_PV8DI_V8DI_QI:
37904 case VOID_FTYPE_PV4DI_V4DI_QI:
37905 case VOID_FTYPE_PV2DI_V2DI_QI:
37906 case VOID_FTYPE_PV16SI_V16SI_HI:
37907 case VOID_FTYPE_PV8SI_V8SI_QI:
37908 case VOID_FTYPE_PV4SI_V4SI_QI:
37909 switch (icode)
37911 /* These builtins and instructions require the memory
37912 to be properly aligned. */
37913 case CODE_FOR_avx512f_storev16sf_mask:
37914 case CODE_FOR_avx512f_storev16si_mask:
37915 case CODE_FOR_avx512f_storev8df_mask:
37916 case CODE_FOR_avx512f_storev8di_mask:
37917 case CODE_FOR_avx512vl_storev8sf_mask:
37918 case CODE_FOR_avx512vl_storev8si_mask:
37919 case CODE_FOR_avx512vl_storev4df_mask:
37920 case CODE_FOR_avx512vl_storev4di_mask:
37921 case CODE_FOR_avx512vl_storev4sf_mask:
37922 case CODE_FOR_avx512vl_storev4si_mask:
37923 case CODE_FOR_avx512vl_storev2df_mask:
37924 case CODE_FOR_avx512vl_storev2di_mask:
37925 aligned_mem = true;
37926 break;
37927 default:
37928 break;
37930 /* FALLTHRU */
37931 case VOID_FTYPE_PV8SF_V8SI_V8SF:
37932 case VOID_FTYPE_PV4DF_V4DI_V4DF:
37933 case VOID_FTYPE_PV4SF_V4SI_V4SF:
37934 case VOID_FTYPE_PV2DF_V2DI_V2DF:
37935 case VOID_FTYPE_PV8SI_V8SI_V8SI:
37936 case VOID_FTYPE_PV4DI_V4DI_V4DI:
37937 case VOID_FTYPE_PV4SI_V4SI_V4SI:
37938 case VOID_FTYPE_PV2DI_V2DI_V2DI:
37939 case VOID_FTYPE_PDOUBLE_V2DF_QI:
37940 case VOID_FTYPE_PFLOAT_V4SF_QI:
37941 case VOID_FTYPE_PV8SI_V8DI_QI:
37942 case VOID_FTYPE_PV8HI_V8DI_QI:
37943 case VOID_FTYPE_PV16HI_V16SI_HI:
37944 case VOID_FTYPE_PV16QI_V8DI_QI:
37945 case VOID_FTYPE_PV16QI_V16SI_HI:
37946 case VOID_FTYPE_PV4SI_V4DI_QI:
37947 case VOID_FTYPE_PV4SI_V2DI_QI:
37948 case VOID_FTYPE_PV8HI_V4DI_QI:
37949 case VOID_FTYPE_PV8HI_V2DI_QI:
37950 case VOID_FTYPE_PV8HI_V8SI_QI:
37951 case VOID_FTYPE_PV8HI_V4SI_QI:
37952 case VOID_FTYPE_PV16QI_V4DI_QI:
37953 case VOID_FTYPE_PV16QI_V2DI_QI:
37954 case VOID_FTYPE_PV16QI_V8SI_QI:
37955 case VOID_FTYPE_PV16QI_V4SI_QI:
37956 case VOID_FTYPE_PV8HI_V8HI_QI:
37957 case VOID_FTYPE_PV16HI_V16HI_HI:
37958 case VOID_FTYPE_PV32HI_V32HI_SI:
37959 case VOID_FTYPE_PV16QI_V16QI_HI:
37960 case VOID_FTYPE_PV32QI_V32QI_SI:
37961 case VOID_FTYPE_PV64QI_V64QI_DI:
37962 case VOID_FTYPE_PV4DF_V4DF_QI:
37963 case VOID_FTYPE_PV2DF_V2DF_QI:
37964 case VOID_FTYPE_PV8SF_V8SF_QI:
37965 case VOID_FTYPE_PV4SF_V4SF_QI:
37966 nargs = 2;
37967 klass = store;
37968 /* Reserve memory operand for target. */
37969 memory = ARRAY_SIZE (args);
37970 break;
37971 case V4SF_FTYPE_PCV4SF_V4SF_QI:
37972 case V8SF_FTYPE_PCV8SF_V8SF_QI:
37973 case V16SF_FTYPE_PCV16SF_V16SF_HI:
37974 case V4SI_FTYPE_PCV4SI_V4SI_QI:
37975 case V8SI_FTYPE_PCV8SI_V8SI_QI:
37976 case V16SI_FTYPE_PCV16SI_V16SI_HI:
37977 case V2DF_FTYPE_PCV2DF_V2DF_QI:
37978 case V4DF_FTYPE_PCV4DF_V4DF_QI:
37979 case V8DF_FTYPE_PCV8DF_V8DF_QI:
37980 case V2DI_FTYPE_PCV2DI_V2DI_QI:
37981 case V4DI_FTYPE_PCV4DI_V4DI_QI:
37982 case V8DI_FTYPE_PCV8DI_V8DI_QI:
37983 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
37984 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
37985 case V8HI_FTYPE_PCV8HI_V8HI_QI:
37986 case V16HI_FTYPE_PCV16HI_V16HI_HI:
37987 case V32HI_FTYPE_PCV32HI_V32HI_SI:
37988 case V16QI_FTYPE_PCV16QI_V16QI_HI:
37989 case V32QI_FTYPE_PCV32QI_V32QI_SI:
37990 case V64QI_FTYPE_PCV64QI_V64QI_DI:
37991 nargs = 3;
37992 klass = load;
37993 memory = 0;
37994 switch (icode)
37996 /* These builtins and instructions require the memory
37997 to be properly aligned. */
37998 case CODE_FOR_avx512f_loadv16sf_mask:
37999 case CODE_FOR_avx512f_loadv16si_mask:
38000 case CODE_FOR_avx512f_loadv8df_mask:
38001 case CODE_FOR_avx512f_loadv8di_mask:
38002 case CODE_FOR_avx512vl_loadv8sf_mask:
38003 case CODE_FOR_avx512vl_loadv8si_mask:
38004 case CODE_FOR_avx512vl_loadv4df_mask:
38005 case CODE_FOR_avx512vl_loadv4di_mask:
38006 case CODE_FOR_avx512vl_loadv4sf_mask:
38007 case CODE_FOR_avx512vl_loadv4si_mask:
38008 case CODE_FOR_avx512vl_loadv2df_mask:
38009 case CODE_FOR_avx512vl_loadv2di_mask:
38010 case CODE_FOR_avx512bw_loadv64qi_mask:
38011 case CODE_FOR_avx512vl_loadv32qi_mask:
38012 case CODE_FOR_avx512vl_loadv16qi_mask:
38013 case CODE_FOR_avx512bw_loadv32hi_mask:
38014 case CODE_FOR_avx512vl_loadv16hi_mask:
38015 case CODE_FOR_avx512vl_loadv8hi_mask:
38016 aligned_mem = true;
38017 break;
38018 default:
38019 break;
38021 break;
38022 case VOID_FTYPE_UINT_UINT_UINT:
38023 case VOID_FTYPE_UINT64_UINT_UINT:
38024 case UCHAR_FTYPE_UINT_UINT_UINT:
38025 case UCHAR_FTYPE_UINT64_UINT_UINT:
38026 nargs = 3;
38027 klass = load;
38028 memory = ARRAY_SIZE (args);
38029 last_arg_constant = true;
38030 break;
38031 default:
38032 gcc_unreachable ();
38035 gcc_assert (nargs <= ARRAY_SIZE (args));
38037 if (klass == store)
38039 arg = CALL_EXPR_ARG (exp, 0);
38040 op = expand_normal (arg);
38041 gcc_assert (target == 0);
38042 if (memory)
38044 op = ix86_zero_extend_to_Pmode (op);
38045 target = gen_rtx_MEM (tmode, op);
38046 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
38047 on it. Try to improve it using get_pointer_alignment,
38048 and if the special builtin is one that requires strict
38049 mode alignment, also from it's GET_MODE_ALIGNMENT.
38050 Failure to do so could lead to ix86_legitimate_combined_insn
38051 rejecting all changes to such insns. */
38052 unsigned int align = get_pointer_alignment (arg);
38053 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
38054 align = GET_MODE_ALIGNMENT (tmode);
38055 if (MEM_ALIGN (target) < align)
38056 set_mem_align (target, align);
38058 else
38059 target = force_reg (tmode, op);
38060 arg_adjust = 1;
38062 else
38064 arg_adjust = 0;
38065 if (optimize
38066 || target == 0
38067 || !register_operand (target, tmode)
38068 || GET_MODE (target) != tmode)
38069 target = gen_reg_rtx (tmode);
38072 for (i = 0; i < nargs; i++)
38074 machine_mode mode = insn_p->operand[i + 1].mode;
38075 bool match;
38077 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
38078 op = expand_normal (arg);
38079 match = insn_p->operand[i + 1].predicate (op, mode);
38081 if (last_arg_constant && (i + 1) == nargs)
38083 if (!match)
38085 if (icode == CODE_FOR_lwp_lwpvalsi3
38086 || icode == CODE_FOR_lwp_lwpinssi3
38087 || icode == CODE_FOR_lwp_lwpvaldi3
38088 || icode == CODE_FOR_lwp_lwpinsdi3)
38089 error ("the last argument must be a 32-bit immediate");
38090 else
38091 error ("the last argument must be an 8-bit immediate");
38092 return const0_rtx;
38095 else
38097 if (i == memory)
38099 /* This must be the memory operand. */
38100 op = ix86_zero_extend_to_Pmode (op);
38101 op = gen_rtx_MEM (mode, op);
38102 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
38103 on it. Try to improve it using get_pointer_alignment,
38104 and if the special builtin is one that requires strict
38105 mode alignment, also from it's GET_MODE_ALIGNMENT.
38106 Failure to do so could lead to ix86_legitimate_combined_insn
38107 rejecting all changes to such insns. */
38108 unsigned int align = get_pointer_alignment (arg);
38109 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
38110 align = GET_MODE_ALIGNMENT (mode);
38111 if (MEM_ALIGN (op) < align)
38112 set_mem_align (op, align);
38114 else
38116 /* This must be register. */
38117 if (VECTOR_MODE_P (mode))
38118 op = safe_vector_operand (op, mode);
38120 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38121 op = copy_to_mode_reg (mode, op);
38122 else
38124 op = copy_to_reg (op);
38125 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38130 args[i].op = op;
38131 args[i].mode = mode;
38134 switch (nargs)
38136 case 0:
38137 pat = GEN_FCN (icode) (target);
38138 break;
38139 case 1:
38140 pat = GEN_FCN (icode) (target, args[0].op);
38141 break;
38142 case 2:
38143 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38144 break;
38145 case 3:
38146 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
38147 break;
38148 default:
38149 gcc_unreachable ();
38152 if (! pat)
38153 return 0;
38154 emit_insn (pat);
38155 return klass == store ? 0 : target;
38158 /* Return the integer constant in ARG. Constrain it to be in the range
38159 of the subparts of VEC_TYPE; issue an error if not. */
38161 static int
38162 get_element_number (tree vec_type, tree arg)
38164 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
38166 if (!tree_fits_uhwi_p (arg)
38167 || (elt = tree_to_uhwi (arg), elt > max))
38169 error ("selector must be an integer constant in the range 0..%wi", max);
38170 return 0;
38173 return elt;
38176 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38177 ix86_expand_vector_init. We DO have language-level syntax for this, in
38178 the form of (type){ init-list }. Except that since we can't place emms
38179 instructions from inside the compiler, we can't allow the use of MMX
38180 registers unless the user explicitly asks for it. So we do *not* define
38181 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
38182 we have builtins invoked by mmintrin.h that gives us license to emit
38183 these sorts of instructions. */
38185 static rtx
38186 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
38188 machine_mode tmode = TYPE_MODE (type);
38189 machine_mode inner_mode = GET_MODE_INNER (tmode);
38190 int i, n_elt = GET_MODE_NUNITS (tmode);
38191 rtvec v = rtvec_alloc (n_elt);
38193 gcc_assert (VECTOR_MODE_P (tmode));
38194 gcc_assert (call_expr_nargs (exp) == n_elt);
38196 for (i = 0; i < n_elt; ++i)
38198 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
38199 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
38202 if (!target || !register_operand (target, tmode))
38203 target = gen_reg_rtx (tmode);
38205 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
38206 return target;
38209 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38210 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
38211 had a language-level syntax for referencing vector elements. */
38213 static rtx
38214 ix86_expand_vec_ext_builtin (tree exp, rtx target)
38216 machine_mode tmode, mode0;
38217 tree arg0, arg1;
38218 int elt;
38219 rtx op0;
38221 arg0 = CALL_EXPR_ARG (exp, 0);
38222 arg1 = CALL_EXPR_ARG (exp, 1);
38224 op0 = expand_normal (arg0);
38225 elt = get_element_number (TREE_TYPE (arg0), arg1);
38227 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38228 mode0 = TYPE_MODE (TREE_TYPE (arg0));
38229 gcc_assert (VECTOR_MODE_P (mode0));
38231 op0 = force_reg (mode0, op0);
38233 if (optimize || !target || !register_operand (target, tmode))
38234 target = gen_reg_rtx (tmode);
38236 ix86_expand_vector_extract (true, target, op0, elt);
38238 return target;
38241 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38242 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
38243 a language-level syntax for referencing vector elements. */
38245 static rtx
38246 ix86_expand_vec_set_builtin (tree exp)
38248 machine_mode tmode, mode1;
38249 tree arg0, arg1, arg2;
38250 int elt;
38251 rtx op0, op1, target;
38253 arg0 = CALL_EXPR_ARG (exp, 0);
38254 arg1 = CALL_EXPR_ARG (exp, 1);
38255 arg2 = CALL_EXPR_ARG (exp, 2);
38257 tmode = TYPE_MODE (TREE_TYPE (arg0));
38258 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38259 gcc_assert (VECTOR_MODE_P (tmode));
38261 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
38262 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
38263 elt = get_element_number (TREE_TYPE (arg0), arg2);
38265 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
38266 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
38268 op0 = force_reg (tmode, op0);
38269 op1 = force_reg (mode1, op1);
38271 /* OP0 is the source of these builtin functions and shouldn't be
38272 modified. Create a copy, use it and return it as target. */
38273 target = gen_reg_rtx (tmode);
38274 emit_move_insn (target, op0);
38275 ix86_expand_vector_set (true, target, op1, elt);
38277 return target;
38280 /* Emit conditional move of SRC to DST with condition
38281 OP1 CODE OP2. */
38282 static void
38283 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
38285 rtx t;
38287 if (TARGET_CMOVE)
38289 t = ix86_expand_compare (code, op1, op2);
38290 emit_insn (gen_rtx_SET (VOIDmode, dst,
38291 gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
38292 src, dst)));
38294 else
38296 rtx nomove = gen_label_rtx ();
38297 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
38298 const0_rtx, GET_MODE (op1), 1, nomove);
38299 emit_move_insn (dst, src);
38300 emit_label (nomove);
38304 /* Choose max of DST and SRC and put it to DST. */
38305 static void
38306 ix86_emit_move_max (rtx dst, rtx src)
38308 ix86_emit_cmove (dst, src, LTU, dst, src);
38311 /* Expand an expression EXP that calls a built-in function,
38312 with result going to TARGET if that's convenient
38313 (and in mode MODE if that's convenient).
38314 SUBTARGET may be used as the target for computing one of EXP's operands.
38315 IGNORE is nonzero if the value is to be ignored. */
38317 static rtx
38318 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
38319 machine_mode mode, int ignore)
38321 const struct builtin_description *d;
38322 size_t i;
38323 enum insn_code icode;
38324 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
38325 tree arg0, arg1, arg2, arg3, arg4;
38326 rtx op0, op1, op2, op3, op4, pat, insn;
38327 machine_mode mode0, mode1, mode2, mode3, mode4;
38328 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
38330 /* For CPU builtins that can be folded, fold first and expand the fold. */
38331 switch (fcode)
38333 case IX86_BUILTIN_CPU_INIT:
38335 /* Make it call __cpu_indicator_init in libgcc. */
38336 tree call_expr, fndecl, type;
38337 type = build_function_type_list (integer_type_node, NULL_TREE);
38338 fndecl = build_fn_decl ("__cpu_indicator_init", type);
38339 call_expr = build_call_expr (fndecl, 0);
38340 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
38342 case IX86_BUILTIN_CPU_IS:
38343 case IX86_BUILTIN_CPU_SUPPORTS:
38345 tree arg0 = CALL_EXPR_ARG (exp, 0);
38346 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
38347 gcc_assert (fold_expr != NULL_TREE);
38348 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
38352 /* Determine whether the builtin function is available under the current ISA.
38353 Originally the builtin was not created if it wasn't applicable to the
38354 current ISA based on the command line switches. With function specific
38355 options, we need to check in the context of the function making the call
38356 whether it is supported. */
38357 if (ix86_builtins_isa[fcode].isa
38358 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
38360 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
38361 NULL, (enum fpmath_unit) 0, false);
38363 if (!opts)
38364 error ("%qE needs unknown isa option", fndecl);
38365 else
38367 gcc_assert (opts != NULL);
38368 error ("%qE needs isa option %s", fndecl, opts);
38369 free (opts);
38371 return const0_rtx;
38374 switch (fcode)
38376 case IX86_BUILTIN_BNDMK:
38377 if (!target
38378 || GET_MODE (target) != BNDmode
38379 || !register_operand (target, BNDmode))
38380 target = gen_reg_rtx (BNDmode);
38382 arg0 = CALL_EXPR_ARG (exp, 0);
38383 arg1 = CALL_EXPR_ARG (exp, 1);
38385 op0 = expand_normal (arg0);
38386 op1 = expand_normal (arg1);
38388 if (!register_operand (op0, Pmode))
38389 op0 = ix86_zero_extend_to_Pmode (op0);
38390 if (!register_operand (op1, Pmode))
38391 op1 = ix86_zero_extend_to_Pmode (op1);
38393 /* Builtin arg1 is size of block but instruction op1 should
38394 be (size - 1). */
38395 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
38396 NULL_RTX, 1, OPTAB_DIRECT);
38398 emit_insn (BNDmode == BND64mode
38399 ? gen_bnd64_mk (target, op0, op1)
38400 : gen_bnd32_mk (target, op0, op1));
38401 return target;
38403 case IX86_BUILTIN_BNDSTX:
38404 arg0 = CALL_EXPR_ARG (exp, 0);
38405 arg1 = CALL_EXPR_ARG (exp, 1);
38406 arg2 = CALL_EXPR_ARG (exp, 2);
38408 op0 = expand_normal (arg0);
38409 op1 = expand_normal (arg1);
38410 op2 = expand_normal (arg2);
38412 if (!register_operand (op0, Pmode))
38413 op0 = ix86_zero_extend_to_Pmode (op0);
38414 if (!register_operand (op1, BNDmode))
38415 op1 = copy_to_mode_reg (BNDmode, op1);
38416 if (!register_operand (op2, Pmode))
38417 op2 = ix86_zero_extend_to_Pmode (op2);
38419 emit_insn (BNDmode == BND64mode
38420 ? gen_bnd64_stx (op2, op0, op1)
38421 : gen_bnd32_stx (op2, op0, op1));
38422 return 0;
38424 case IX86_BUILTIN_BNDLDX:
38425 if (!target
38426 || GET_MODE (target) != BNDmode
38427 || !register_operand (target, BNDmode))
38428 target = gen_reg_rtx (BNDmode);
38430 arg0 = CALL_EXPR_ARG (exp, 0);
38431 arg1 = CALL_EXPR_ARG (exp, 1);
38433 op0 = expand_normal (arg0);
38434 op1 = expand_normal (arg1);
38436 if (!register_operand (op0, Pmode))
38437 op0 = ix86_zero_extend_to_Pmode (op0);
38438 if (!register_operand (op1, Pmode))
38439 op1 = ix86_zero_extend_to_Pmode (op1);
38441 emit_insn (BNDmode == BND64mode
38442 ? gen_bnd64_ldx (target, op0, op1)
38443 : gen_bnd32_ldx (target, op0, op1));
38444 return target;
38446 case IX86_BUILTIN_BNDCL:
38447 arg0 = CALL_EXPR_ARG (exp, 0);
38448 arg1 = CALL_EXPR_ARG (exp, 1);
38450 op0 = expand_normal (arg0);
38451 op1 = expand_normal (arg1);
38453 if (!register_operand (op0, Pmode))
38454 op0 = ix86_zero_extend_to_Pmode (op0);
38455 if (!register_operand (op1, BNDmode))
38456 op1 = copy_to_mode_reg (BNDmode, op1);
38458 emit_insn (BNDmode == BND64mode
38459 ? gen_bnd64_cl (op1, op0)
38460 : gen_bnd32_cl (op1, op0));
38461 return 0;
38463 case IX86_BUILTIN_BNDCU:
38464 arg0 = CALL_EXPR_ARG (exp, 0);
38465 arg1 = CALL_EXPR_ARG (exp, 1);
38467 op0 = expand_normal (arg0);
38468 op1 = expand_normal (arg1);
38470 if (!register_operand (op0, Pmode))
38471 op0 = ix86_zero_extend_to_Pmode (op0);
38472 if (!register_operand (op1, BNDmode))
38473 op1 = copy_to_mode_reg (BNDmode, op1);
38475 emit_insn (BNDmode == BND64mode
38476 ? gen_bnd64_cu (op1, op0)
38477 : gen_bnd32_cu (op1, op0));
38478 return 0;
38480 case IX86_BUILTIN_BNDRET:
38481 arg0 = CALL_EXPR_ARG (exp, 0);
38482 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
38483 target = chkp_get_rtl_bounds (arg0);
38485 /* If no bounds were specified for returned value,
38486 then use INIT bounds. It usually happens when
38487 some built-in function is expanded. */
38488 if (!target)
38490 rtx t1 = gen_reg_rtx (Pmode);
38491 rtx t2 = gen_reg_rtx (Pmode);
38492 target = gen_reg_rtx (BNDmode);
38493 emit_move_insn (t1, const0_rtx);
38494 emit_move_insn (t2, constm1_rtx);
38495 emit_insn (BNDmode == BND64mode
38496 ? gen_bnd64_mk (target, t1, t2)
38497 : gen_bnd32_mk (target, t1, t2));
38500 gcc_assert (target && REG_P (target));
38501 return target;
38503 case IX86_BUILTIN_BNDNARROW:
38505 rtx m1, m1h1, m1h2, lb, ub, t1;
38507 /* Return value and lb. */
38508 arg0 = CALL_EXPR_ARG (exp, 0);
38509 /* Bounds. */
38510 arg1 = CALL_EXPR_ARG (exp, 1);
38511 /* Size. */
38512 arg2 = CALL_EXPR_ARG (exp, 2);
38514 lb = expand_normal (arg0);
38515 op1 = expand_normal (arg1);
38516 op2 = expand_normal (arg2);
38518 /* Size was passed but we need to use (size - 1) as for bndmk. */
38519 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
38520 NULL_RTX, 1, OPTAB_DIRECT);
38522 /* Add LB to size and inverse to get UB. */
38523 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
38524 op2, 1, OPTAB_DIRECT);
38525 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
38527 if (!register_operand (lb, Pmode))
38528 lb = ix86_zero_extend_to_Pmode (lb);
38529 if (!register_operand (ub, Pmode))
38530 ub = ix86_zero_extend_to_Pmode (ub);
38532 /* We need to move bounds to memory before any computations. */
38533 if (MEM_P (op1))
38534 m1 = op1;
38535 else
38537 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
38538 emit_move_insn (m1, op1);
38541 /* Generate mem expression to be used for access to LB and UB. */
38542 m1h1 = adjust_address (m1, Pmode, 0);
38543 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
38545 t1 = gen_reg_rtx (Pmode);
38547 /* Compute LB. */
38548 emit_move_insn (t1, m1h1);
38549 ix86_emit_move_max (t1, lb);
38550 emit_move_insn (m1h1, t1);
38552 /* Compute UB. UB is stored in 1's complement form. Therefore
38553 we also use max here. */
38554 emit_move_insn (t1, m1h2);
38555 ix86_emit_move_max (t1, ub);
38556 emit_move_insn (m1h2, t1);
38558 op2 = gen_reg_rtx (BNDmode);
38559 emit_move_insn (op2, m1);
38561 return chkp_join_splitted_slot (lb, op2);
38564 case IX86_BUILTIN_BNDINT:
38566 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
38568 if (!target
38569 || GET_MODE (target) != BNDmode
38570 || !register_operand (target, BNDmode))
38571 target = gen_reg_rtx (BNDmode);
38573 arg0 = CALL_EXPR_ARG (exp, 0);
38574 arg1 = CALL_EXPR_ARG (exp, 1);
38576 op0 = expand_normal (arg0);
38577 op1 = expand_normal (arg1);
38579 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
38580 rh1 = adjust_address (res, Pmode, 0);
38581 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
38583 /* Put first bounds to temporaries. */
38584 lb1 = gen_reg_rtx (Pmode);
38585 ub1 = gen_reg_rtx (Pmode);
38586 if (MEM_P (op0))
38588 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
38589 emit_move_insn (ub1, adjust_address (op0, Pmode,
38590 GET_MODE_SIZE (Pmode)));
38592 else
38594 emit_move_insn (res, op0);
38595 emit_move_insn (lb1, rh1);
38596 emit_move_insn (ub1, rh2);
38599 /* Put second bounds to temporaries. */
38600 lb2 = gen_reg_rtx (Pmode);
38601 ub2 = gen_reg_rtx (Pmode);
38602 if (MEM_P (op1))
38604 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
38605 emit_move_insn (ub2, adjust_address (op1, Pmode,
38606 GET_MODE_SIZE (Pmode)));
38608 else
38610 emit_move_insn (res, op1);
38611 emit_move_insn (lb2, rh1);
38612 emit_move_insn (ub2, rh2);
38615 /* Compute LB. */
38616 ix86_emit_move_max (lb1, lb2);
38617 emit_move_insn (rh1, lb1);
38619 /* Compute UB. UB is stored in 1's complement form. Therefore
38620 we also use max here. */
38621 ix86_emit_move_max (ub1, ub2);
38622 emit_move_insn (rh2, ub1);
38624 emit_move_insn (target, res);
38626 return target;
38629 case IX86_BUILTIN_SIZEOF:
38631 tree name;
38632 rtx symbol;
38634 if (!target
38635 || GET_MODE (target) != Pmode
38636 || !register_operand (target, Pmode))
38637 target = gen_reg_rtx (Pmode);
38639 arg0 = CALL_EXPR_ARG (exp, 0);
38640 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
38642 name = DECL_ASSEMBLER_NAME (arg0);
38643 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
38645 emit_insn (Pmode == SImode
38646 ? gen_move_size_reloc_si (target, symbol)
38647 : gen_move_size_reloc_di (target, symbol));
38649 return target;
38652 case IX86_BUILTIN_BNDLOWER:
38654 rtx mem, hmem;
38656 if (!target
38657 || GET_MODE (target) != Pmode
38658 || !register_operand (target, Pmode))
38659 target = gen_reg_rtx (Pmode);
38661 arg0 = CALL_EXPR_ARG (exp, 0);
38662 op0 = expand_normal (arg0);
38664 /* We need to move bounds to memory first. */
38665 if (MEM_P (op0))
38666 mem = op0;
38667 else
38669 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38670 emit_move_insn (mem, op0);
38673 /* Generate mem expression to access LB and load it. */
38674 hmem = adjust_address (mem, Pmode, 0);
38675 emit_move_insn (target, hmem);
38677 return target;
38680 case IX86_BUILTIN_BNDUPPER:
38682 rtx mem, hmem, res;
38684 if (!target
38685 || GET_MODE (target) != Pmode
38686 || !register_operand (target, Pmode))
38687 target = gen_reg_rtx (Pmode);
38689 arg0 = CALL_EXPR_ARG (exp, 0);
38690 op0 = expand_normal (arg0);
38692 /* We need to move bounds to memory first. */
38693 if (MEM_P (op0))
38694 mem = op0;
38695 else
38697 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38698 emit_move_insn (mem, op0);
38701 /* Generate mem expression to access UB. */
38702 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
38704 /* We need to inverse all bits of UB. */
38705 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
38707 if (res != target)
38708 emit_move_insn (target, res);
38710 return target;
38713 case IX86_BUILTIN_MASKMOVQ:
38714 case IX86_BUILTIN_MASKMOVDQU:
38715 icode = (fcode == IX86_BUILTIN_MASKMOVQ
38716 ? CODE_FOR_mmx_maskmovq
38717 : CODE_FOR_sse2_maskmovdqu);
38718 /* Note the arg order is different from the operand order. */
38719 arg1 = CALL_EXPR_ARG (exp, 0);
38720 arg2 = CALL_EXPR_ARG (exp, 1);
38721 arg0 = CALL_EXPR_ARG (exp, 2);
38722 op0 = expand_normal (arg0);
38723 op1 = expand_normal (arg1);
38724 op2 = expand_normal (arg2);
38725 mode0 = insn_data[icode].operand[0].mode;
38726 mode1 = insn_data[icode].operand[1].mode;
38727 mode2 = insn_data[icode].operand[2].mode;
38729 op0 = ix86_zero_extend_to_Pmode (op0);
38730 op0 = gen_rtx_MEM (mode1, op0);
38732 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38733 op0 = copy_to_mode_reg (mode0, op0);
38734 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38735 op1 = copy_to_mode_reg (mode1, op1);
38736 if (!insn_data[icode].operand[2].predicate (op2, mode2))
38737 op2 = copy_to_mode_reg (mode2, op2);
38738 pat = GEN_FCN (icode) (op0, op1, op2);
38739 if (! pat)
38740 return 0;
38741 emit_insn (pat);
38742 return 0;
38744 case IX86_BUILTIN_LDMXCSR:
38745 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
38746 target = assign_386_stack_local (SImode, SLOT_TEMP);
38747 emit_move_insn (target, op0);
38748 emit_insn (gen_sse_ldmxcsr (target));
38749 return 0;
38751 case IX86_BUILTIN_STMXCSR:
38752 target = assign_386_stack_local (SImode, SLOT_TEMP);
38753 emit_insn (gen_sse_stmxcsr (target));
38754 return copy_to_mode_reg (SImode, target);
38756 case IX86_BUILTIN_CLFLUSH:
38757 arg0 = CALL_EXPR_ARG (exp, 0);
38758 op0 = expand_normal (arg0);
38759 icode = CODE_FOR_sse2_clflush;
38760 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38761 op0 = ix86_zero_extend_to_Pmode (op0);
38763 emit_insn (gen_sse2_clflush (op0));
38764 return 0;
38766 case IX86_BUILTIN_CLWB:
38767 arg0 = CALL_EXPR_ARG (exp, 0);
38768 op0 = expand_normal (arg0);
38769 icode = CODE_FOR_clwb;
38770 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38771 op0 = ix86_zero_extend_to_Pmode (op0);
38773 emit_insn (gen_clwb (op0));
38774 return 0;
38776 case IX86_BUILTIN_CLFLUSHOPT:
38777 arg0 = CALL_EXPR_ARG (exp, 0);
38778 op0 = expand_normal (arg0);
38779 icode = CODE_FOR_clflushopt;
38780 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38781 op0 = ix86_zero_extend_to_Pmode (op0);
38783 emit_insn (gen_clflushopt (op0));
38784 return 0;
38786 case IX86_BUILTIN_MONITOR:
38787 arg0 = CALL_EXPR_ARG (exp, 0);
38788 arg1 = CALL_EXPR_ARG (exp, 1);
38789 arg2 = CALL_EXPR_ARG (exp, 2);
38790 op0 = expand_normal (arg0);
38791 op1 = expand_normal (arg1);
38792 op2 = expand_normal (arg2);
38793 if (!REG_P (op0))
38794 op0 = ix86_zero_extend_to_Pmode (op0);
38795 if (!REG_P (op1))
38796 op1 = copy_to_mode_reg (SImode, op1);
38797 if (!REG_P (op2))
38798 op2 = copy_to_mode_reg (SImode, op2);
38799 emit_insn (ix86_gen_monitor (op0, op1, op2));
38800 return 0;
38802 case IX86_BUILTIN_MWAIT:
38803 arg0 = CALL_EXPR_ARG (exp, 0);
38804 arg1 = CALL_EXPR_ARG (exp, 1);
38805 op0 = expand_normal (arg0);
38806 op1 = expand_normal (arg1);
38807 if (!REG_P (op0))
38808 op0 = copy_to_mode_reg (SImode, op0);
38809 if (!REG_P (op1))
38810 op1 = copy_to_mode_reg (SImode, op1);
38811 emit_insn (gen_sse3_mwait (op0, op1));
38812 return 0;
38814 case IX86_BUILTIN_VEC_INIT_V2SI:
38815 case IX86_BUILTIN_VEC_INIT_V4HI:
38816 case IX86_BUILTIN_VEC_INIT_V8QI:
38817 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
38819 case IX86_BUILTIN_VEC_EXT_V2DF:
38820 case IX86_BUILTIN_VEC_EXT_V2DI:
38821 case IX86_BUILTIN_VEC_EXT_V4SF:
38822 case IX86_BUILTIN_VEC_EXT_V4SI:
38823 case IX86_BUILTIN_VEC_EXT_V8HI:
38824 case IX86_BUILTIN_VEC_EXT_V2SI:
38825 case IX86_BUILTIN_VEC_EXT_V4HI:
38826 case IX86_BUILTIN_VEC_EXT_V16QI:
38827 return ix86_expand_vec_ext_builtin (exp, target);
38829 case IX86_BUILTIN_VEC_SET_V2DI:
38830 case IX86_BUILTIN_VEC_SET_V4SF:
38831 case IX86_BUILTIN_VEC_SET_V4SI:
38832 case IX86_BUILTIN_VEC_SET_V8HI:
38833 case IX86_BUILTIN_VEC_SET_V4HI:
38834 case IX86_BUILTIN_VEC_SET_V16QI:
38835 return ix86_expand_vec_set_builtin (exp);
38837 case IX86_BUILTIN_INFQ:
38838 case IX86_BUILTIN_HUGE_VALQ:
38840 REAL_VALUE_TYPE inf;
38841 rtx tmp;
38843 real_inf (&inf);
38844 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
38846 tmp = validize_mem (force_const_mem (mode, tmp));
38848 if (target == 0)
38849 target = gen_reg_rtx (mode);
38851 emit_move_insn (target, tmp);
38852 return target;
38855 case IX86_BUILTIN_RDPMC:
38856 case IX86_BUILTIN_RDTSC:
38857 case IX86_BUILTIN_RDTSCP:
38859 op0 = gen_reg_rtx (DImode);
38860 op1 = gen_reg_rtx (DImode);
38862 if (fcode == IX86_BUILTIN_RDPMC)
38864 arg0 = CALL_EXPR_ARG (exp, 0);
38865 op2 = expand_normal (arg0);
38866 if (!register_operand (op2, SImode))
38867 op2 = copy_to_mode_reg (SImode, op2);
38869 insn = (TARGET_64BIT
38870 ? gen_rdpmc_rex64 (op0, op1, op2)
38871 : gen_rdpmc (op0, op2));
38872 emit_insn (insn);
38874 else if (fcode == IX86_BUILTIN_RDTSC)
38876 insn = (TARGET_64BIT
38877 ? gen_rdtsc_rex64 (op0, op1)
38878 : gen_rdtsc (op0));
38879 emit_insn (insn);
38881 else
38883 op2 = gen_reg_rtx (SImode);
38885 insn = (TARGET_64BIT
38886 ? gen_rdtscp_rex64 (op0, op1, op2)
38887 : gen_rdtscp (op0, op2));
38888 emit_insn (insn);
38890 arg0 = CALL_EXPR_ARG (exp, 0);
38891 op4 = expand_normal (arg0);
38892 if (!address_operand (op4, VOIDmode))
38894 op4 = convert_memory_address (Pmode, op4);
38895 op4 = copy_addr_to_reg (op4);
38897 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
38900 if (target == 0)
38902 /* mode is VOIDmode if __builtin_rd* has been called
38903 without lhs. */
38904 if (mode == VOIDmode)
38905 return target;
38906 target = gen_reg_rtx (mode);
38909 if (TARGET_64BIT)
38911 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
38912 op1, 1, OPTAB_DIRECT);
38913 op0 = expand_simple_binop (DImode, IOR, op0, op1,
38914 op0, 1, OPTAB_DIRECT);
38917 emit_move_insn (target, op0);
38918 return target;
38920 case IX86_BUILTIN_FXSAVE:
38921 case IX86_BUILTIN_FXRSTOR:
38922 case IX86_BUILTIN_FXSAVE64:
38923 case IX86_BUILTIN_FXRSTOR64:
38924 case IX86_BUILTIN_FNSTENV:
38925 case IX86_BUILTIN_FLDENV:
38926 mode0 = BLKmode;
38927 switch (fcode)
38929 case IX86_BUILTIN_FXSAVE:
38930 icode = CODE_FOR_fxsave;
38931 break;
38932 case IX86_BUILTIN_FXRSTOR:
38933 icode = CODE_FOR_fxrstor;
38934 break;
38935 case IX86_BUILTIN_FXSAVE64:
38936 icode = CODE_FOR_fxsave64;
38937 break;
38938 case IX86_BUILTIN_FXRSTOR64:
38939 icode = CODE_FOR_fxrstor64;
38940 break;
38941 case IX86_BUILTIN_FNSTENV:
38942 icode = CODE_FOR_fnstenv;
38943 break;
38944 case IX86_BUILTIN_FLDENV:
38945 icode = CODE_FOR_fldenv;
38946 break;
38947 default:
38948 gcc_unreachable ();
38951 arg0 = CALL_EXPR_ARG (exp, 0);
38952 op0 = expand_normal (arg0);
38954 if (!address_operand (op0, VOIDmode))
38956 op0 = convert_memory_address (Pmode, op0);
38957 op0 = copy_addr_to_reg (op0);
38959 op0 = gen_rtx_MEM (mode0, op0);
38961 pat = GEN_FCN (icode) (op0);
38962 if (pat)
38963 emit_insn (pat);
38964 return 0;
38966 case IX86_BUILTIN_XSAVE:
38967 case IX86_BUILTIN_XRSTOR:
38968 case IX86_BUILTIN_XSAVE64:
38969 case IX86_BUILTIN_XRSTOR64:
38970 case IX86_BUILTIN_XSAVEOPT:
38971 case IX86_BUILTIN_XSAVEOPT64:
38972 case IX86_BUILTIN_XSAVES:
38973 case IX86_BUILTIN_XRSTORS:
38974 case IX86_BUILTIN_XSAVES64:
38975 case IX86_BUILTIN_XRSTORS64:
38976 case IX86_BUILTIN_XSAVEC:
38977 case IX86_BUILTIN_XSAVEC64:
38978 arg0 = CALL_EXPR_ARG (exp, 0);
38979 arg1 = CALL_EXPR_ARG (exp, 1);
38980 op0 = expand_normal (arg0);
38981 op1 = expand_normal (arg1);
38983 if (!address_operand (op0, VOIDmode))
38985 op0 = convert_memory_address (Pmode, op0);
38986 op0 = copy_addr_to_reg (op0);
38988 op0 = gen_rtx_MEM (BLKmode, op0);
38990 op1 = force_reg (DImode, op1);
38992 if (TARGET_64BIT)
38994 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
38995 NULL, 1, OPTAB_DIRECT);
38996 switch (fcode)
38998 case IX86_BUILTIN_XSAVE:
38999 icode = CODE_FOR_xsave_rex64;
39000 break;
39001 case IX86_BUILTIN_XRSTOR:
39002 icode = CODE_FOR_xrstor_rex64;
39003 break;
39004 case IX86_BUILTIN_XSAVE64:
39005 icode = CODE_FOR_xsave64;
39006 break;
39007 case IX86_BUILTIN_XRSTOR64:
39008 icode = CODE_FOR_xrstor64;
39009 break;
39010 case IX86_BUILTIN_XSAVEOPT:
39011 icode = CODE_FOR_xsaveopt_rex64;
39012 break;
39013 case IX86_BUILTIN_XSAVEOPT64:
39014 icode = CODE_FOR_xsaveopt64;
39015 break;
39016 case IX86_BUILTIN_XSAVES:
39017 icode = CODE_FOR_xsaves_rex64;
39018 break;
39019 case IX86_BUILTIN_XRSTORS:
39020 icode = CODE_FOR_xrstors_rex64;
39021 break;
39022 case IX86_BUILTIN_XSAVES64:
39023 icode = CODE_FOR_xsaves64;
39024 break;
39025 case IX86_BUILTIN_XRSTORS64:
39026 icode = CODE_FOR_xrstors64;
39027 break;
39028 case IX86_BUILTIN_XSAVEC:
39029 icode = CODE_FOR_xsavec_rex64;
39030 break;
39031 case IX86_BUILTIN_XSAVEC64:
39032 icode = CODE_FOR_xsavec64;
39033 break;
39034 default:
39035 gcc_unreachable ();
39038 op2 = gen_lowpart (SImode, op2);
39039 op1 = gen_lowpart (SImode, op1);
39040 pat = GEN_FCN (icode) (op0, op1, op2);
39042 else
39044 switch (fcode)
39046 case IX86_BUILTIN_XSAVE:
39047 icode = CODE_FOR_xsave;
39048 break;
39049 case IX86_BUILTIN_XRSTOR:
39050 icode = CODE_FOR_xrstor;
39051 break;
39052 case IX86_BUILTIN_XSAVEOPT:
39053 icode = CODE_FOR_xsaveopt;
39054 break;
39055 case IX86_BUILTIN_XSAVES:
39056 icode = CODE_FOR_xsaves;
39057 break;
39058 case IX86_BUILTIN_XRSTORS:
39059 icode = CODE_FOR_xrstors;
39060 break;
39061 case IX86_BUILTIN_XSAVEC:
39062 icode = CODE_FOR_xsavec;
39063 break;
39064 default:
39065 gcc_unreachable ();
39067 pat = GEN_FCN (icode) (op0, op1);
39070 if (pat)
39071 emit_insn (pat);
39072 return 0;
39074 case IX86_BUILTIN_LLWPCB:
39075 arg0 = CALL_EXPR_ARG (exp, 0);
39076 op0 = expand_normal (arg0);
39077 icode = CODE_FOR_lwp_llwpcb;
39078 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39079 op0 = ix86_zero_extend_to_Pmode (op0);
39080 emit_insn (gen_lwp_llwpcb (op0));
39081 return 0;
39083 case IX86_BUILTIN_SLWPCB:
39084 icode = CODE_FOR_lwp_slwpcb;
39085 if (!target
39086 || !insn_data[icode].operand[0].predicate (target, Pmode))
39087 target = gen_reg_rtx (Pmode);
39088 emit_insn (gen_lwp_slwpcb (target));
39089 return target;
39091 case IX86_BUILTIN_BEXTRI32:
39092 case IX86_BUILTIN_BEXTRI64:
39093 arg0 = CALL_EXPR_ARG (exp, 0);
39094 arg1 = CALL_EXPR_ARG (exp, 1);
39095 op0 = expand_normal (arg0);
39096 op1 = expand_normal (arg1);
39097 icode = (fcode == IX86_BUILTIN_BEXTRI32
39098 ? CODE_FOR_tbm_bextri_si
39099 : CODE_FOR_tbm_bextri_di);
39100 if (!CONST_INT_P (op1))
39102 error ("last argument must be an immediate");
39103 return const0_rtx;
39105 else
39107 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
39108 unsigned char lsb_index = INTVAL (op1) & 0xFF;
39109 op1 = GEN_INT (length);
39110 op2 = GEN_INT (lsb_index);
39111 pat = GEN_FCN (icode) (target, op0, op1, op2);
39112 if (pat)
39113 emit_insn (pat);
39114 return target;
39117 case IX86_BUILTIN_RDRAND16_STEP:
39118 icode = CODE_FOR_rdrandhi_1;
39119 mode0 = HImode;
39120 goto rdrand_step;
39122 case IX86_BUILTIN_RDRAND32_STEP:
39123 icode = CODE_FOR_rdrandsi_1;
39124 mode0 = SImode;
39125 goto rdrand_step;
39127 case IX86_BUILTIN_RDRAND64_STEP:
39128 icode = CODE_FOR_rdranddi_1;
39129 mode0 = DImode;
39131 rdrand_step:
39132 op0 = gen_reg_rtx (mode0);
39133 emit_insn (GEN_FCN (icode) (op0));
39135 arg0 = CALL_EXPR_ARG (exp, 0);
39136 op1 = expand_normal (arg0);
39137 if (!address_operand (op1, VOIDmode))
39139 op1 = convert_memory_address (Pmode, op1);
39140 op1 = copy_addr_to_reg (op1);
39142 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39144 op1 = gen_reg_rtx (SImode);
39145 emit_move_insn (op1, CONST1_RTX (SImode));
39147 /* Emit SImode conditional move. */
39148 if (mode0 == HImode)
39150 op2 = gen_reg_rtx (SImode);
39151 emit_insn (gen_zero_extendhisi2 (op2, op0));
39153 else if (mode0 == SImode)
39154 op2 = op0;
39155 else
39156 op2 = gen_rtx_SUBREG (SImode, op0, 0);
39158 if (target == 0
39159 || !register_operand (target, SImode))
39160 target = gen_reg_rtx (SImode);
39162 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
39163 const0_rtx);
39164 emit_insn (gen_rtx_SET (VOIDmode, target,
39165 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
39166 return target;
39168 case IX86_BUILTIN_RDSEED16_STEP:
39169 icode = CODE_FOR_rdseedhi_1;
39170 mode0 = HImode;
39171 goto rdseed_step;
39173 case IX86_BUILTIN_RDSEED32_STEP:
39174 icode = CODE_FOR_rdseedsi_1;
39175 mode0 = SImode;
39176 goto rdseed_step;
39178 case IX86_BUILTIN_RDSEED64_STEP:
39179 icode = CODE_FOR_rdseeddi_1;
39180 mode0 = DImode;
39182 rdseed_step:
39183 op0 = gen_reg_rtx (mode0);
39184 emit_insn (GEN_FCN (icode) (op0));
39186 arg0 = CALL_EXPR_ARG (exp, 0);
39187 op1 = expand_normal (arg0);
39188 if (!address_operand (op1, VOIDmode))
39190 op1 = convert_memory_address (Pmode, op1);
39191 op1 = copy_addr_to_reg (op1);
39193 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39195 op2 = gen_reg_rtx (QImode);
39197 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
39198 const0_rtx);
39199 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
39201 if (target == 0
39202 || !register_operand (target, SImode))
39203 target = gen_reg_rtx (SImode);
39205 emit_insn (gen_zero_extendqisi2 (target, op2));
39206 return target;
39208 case IX86_BUILTIN_SBB32:
39209 icode = CODE_FOR_subsi3_carry;
39210 mode0 = SImode;
39211 goto addcarryx;
39213 case IX86_BUILTIN_SBB64:
39214 icode = CODE_FOR_subdi3_carry;
39215 mode0 = DImode;
39216 goto addcarryx;
39218 case IX86_BUILTIN_ADDCARRYX32:
39219 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
39220 mode0 = SImode;
39221 goto addcarryx;
39223 case IX86_BUILTIN_ADDCARRYX64:
39224 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
39225 mode0 = DImode;
39227 addcarryx:
39228 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
39229 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
39230 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
39231 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
39233 op0 = gen_reg_rtx (QImode);
39235 /* Generate CF from input operand. */
39236 op1 = expand_normal (arg0);
39237 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
39238 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
39240 /* Gen ADCX instruction to compute X+Y+CF. */
39241 op2 = expand_normal (arg1);
39242 op3 = expand_normal (arg2);
39244 if (!REG_P (op2))
39245 op2 = copy_to_mode_reg (mode0, op2);
39246 if (!REG_P (op3))
39247 op3 = copy_to_mode_reg (mode0, op3);
39249 op0 = gen_reg_rtx (mode0);
39251 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
39252 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
39253 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
39255 /* Store the result. */
39256 op4 = expand_normal (arg3);
39257 if (!address_operand (op4, VOIDmode))
39259 op4 = convert_memory_address (Pmode, op4);
39260 op4 = copy_addr_to_reg (op4);
39262 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
39264 /* Return current CF value. */
39265 if (target == 0)
39266 target = gen_reg_rtx (QImode);
39268 PUT_MODE (pat, QImode);
39269 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
39270 return target;
39272 case IX86_BUILTIN_READ_FLAGS:
39273 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
39275 if (optimize
39276 || target == NULL_RTX
39277 || !nonimmediate_operand (target, word_mode)
39278 || GET_MODE (target) != word_mode)
39279 target = gen_reg_rtx (word_mode);
39281 emit_insn (gen_pop (target));
39282 return target;
39284 case IX86_BUILTIN_WRITE_FLAGS:
39286 arg0 = CALL_EXPR_ARG (exp, 0);
39287 op0 = expand_normal (arg0);
39288 if (!general_no_elim_operand (op0, word_mode))
39289 op0 = copy_to_mode_reg (word_mode, op0);
39291 emit_insn (gen_push (op0));
39292 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
39293 return 0;
39295 case IX86_BUILTIN_KORTESTC16:
39296 icode = CODE_FOR_kortestchi;
39297 mode0 = HImode;
39298 mode1 = CCCmode;
39299 goto kortest;
39301 case IX86_BUILTIN_KORTESTZ16:
39302 icode = CODE_FOR_kortestzhi;
39303 mode0 = HImode;
39304 mode1 = CCZmode;
39306 kortest:
39307 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
39308 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
39309 op0 = expand_normal (arg0);
39310 op1 = expand_normal (arg1);
39312 op0 = copy_to_reg (op0);
39313 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39314 op1 = copy_to_reg (op1);
39315 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
39317 target = gen_reg_rtx (QImode);
39318 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
39320 /* Emit kortest. */
39321 emit_insn (GEN_FCN (icode) (op0, op1));
39322 /* And use setcc to return result from flags. */
39323 ix86_expand_setcc (target, EQ,
39324 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
39325 return target;
39327 case IX86_BUILTIN_GATHERSIV2DF:
39328 icode = CODE_FOR_avx2_gathersiv2df;
39329 goto gather_gen;
39330 case IX86_BUILTIN_GATHERSIV4DF:
39331 icode = CODE_FOR_avx2_gathersiv4df;
39332 goto gather_gen;
39333 case IX86_BUILTIN_GATHERDIV2DF:
39334 icode = CODE_FOR_avx2_gatherdiv2df;
39335 goto gather_gen;
39336 case IX86_BUILTIN_GATHERDIV4DF:
39337 icode = CODE_FOR_avx2_gatherdiv4df;
39338 goto gather_gen;
39339 case IX86_BUILTIN_GATHERSIV4SF:
39340 icode = CODE_FOR_avx2_gathersiv4sf;
39341 goto gather_gen;
39342 case IX86_BUILTIN_GATHERSIV8SF:
39343 icode = CODE_FOR_avx2_gathersiv8sf;
39344 goto gather_gen;
39345 case IX86_BUILTIN_GATHERDIV4SF:
39346 icode = CODE_FOR_avx2_gatherdiv4sf;
39347 goto gather_gen;
39348 case IX86_BUILTIN_GATHERDIV8SF:
39349 icode = CODE_FOR_avx2_gatherdiv8sf;
39350 goto gather_gen;
39351 case IX86_BUILTIN_GATHERSIV2DI:
39352 icode = CODE_FOR_avx2_gathersiv2di;
39353 goto gather_gen;
39354 case IX86_BUILTIN_GATHERSIV4DI:
39355 icode = CODE_FOR_avx2_gathersiv4di;
39356 goto gather_gen;
39357 case IX86_BUILTIN_GATHERDIV2DI:
39358 icode = CODE_FOR_avx2_gatherdiv2di;
39359 goto gather_gen;
39360 case IX86_BUILTIN_GATHERDIV4DI:
39361 icode = CODE_FOR_avx2_gatherdiv4di;
39362 goto gather_gen;
39363 case IX86_BUILTIN_GATHERSIV4SI:
39364 icode = CODE_FOR_avx2_gathersiv4si;
39365 goto gather_gen;
39366 case IX86_BUILTIN_GATHERSIV8SI:
39367 icode = CODE_FOR_avx2_gathersiv8si;
39368 goto gather_gen;
39369 case IX86_BUILTIN_GATHERDIV4SI:
39370 icode = CODE_FOR_avx2_gatherdiv4si;
39371 goto gather_gen;
39372 case IX86_BUILTIN_GATHERDIV8SI:
39373 icode = CODE_FOR_avx2_gatherdiv8si;
39374 goto gather_gen;
39375 case IX86_BUILTIN_GATHERALTSIV4DF:
39376 icode = CODE_FOR_avx2_gathersiv4df;
39377 goto gather_gen;
39378 case IX86_BUILTIN_GATHERALTDIV8SF:
39379 icode = CODE_FOR_avx2_gatherdiv8sf;
39380 goto gather_gen;
39381 case IX86_BUILTIN_GATHERALTSIV4DI:
39382 icode = CODE_FOR_avx2_gathersiv4di;
39383 goto gather_gen;
39384 case IX86_BUILTIN_GATHERALTDIV8SI:
39385 icode = CODE_FOR_avx2_gatherdiv8si;
39386 goto gather_gen;
39387 case IX86_BUILTIN_GATHER3SIV16SF:
39388 icode = CODE_FOR_avx512f_gathersiv16sf;
39389 goto gather_gen;
39390 case IX86_BUILTIN_GATHER3SIV8DF:
39391 icode = CODE_FOR_avx512f_gathersiv8df;
39392 goto gather_gen;
39393 case IX86_BUILTIN_GATHER3DIV16SF:
39394 icode = CODE_FOR_avx512f_gatherdiv16sf;
39395 goto gather_gen;
39396 case IX86_BUILTIN_GATHER3DIV8DF:
39397 icode = CODE_FOR_avx512f_gatherdiv8df;
39398 goto gather_gen;
39399 case IX86_BUILTIN_GATHER3SIV16SI:
39400 icode = CODE_FOR_avx512f_gathersiv16si;
39401 goto gather_gen;
39402 case IX86_BUILTIN_GATHER3SIV8DI:
39403 icode = CODE_FOR_avx512f_gathersiv8di;
39404 goto gather_gen;
39405 case IX86_BUILTIN_GATHER3DIV16SI:
39406 icode = CODE_FOR_avx512f_gatherdiv16si;
39407 goto gather_gen;
39408 case IX86_BUILTIN_GATHER3DIV8DI:
39409 icode = CODE_FOR_avx512f_gatherdiv8di;
39410 goto gather_gen;
39411 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39412 icode = CODE_FOR_avx512f_gathersiv8df;
39413 goto gather_gen;
39414 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39415 icode = CODE_FOR_avx512f_gatherdiv16sf;
39416 goto gather_gen;
39417 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39418 icode = CODE_FOR_avx512f_gathersiv8di;
39419 goto gather_gen;
39420 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39421 icode = CODE_FOR_avx512f_gatherdiv16si;
39422 goto gather_gen;
39423 case IX86_BUILTIN_GATHER3SIV2DF:
39424 icode = CODE_FOR_avx512vl_gathersiv2df;
39425 goto gather_gen;
39426 case IX86_BUILTIN_GATHER3SIV4DF:
39427 icode = CODE_FOR_avx512vl_gathersiv4df;
39428 goto gather_gen;
39429 case IX86_BUILTIN_GATHER3DIV2DF:
39430 icode = CODE_FOR_avx512vl_gatherdiv2df;
39431 goto gather_gen;
39432 case IX86_BUILTIN_GATHER3DIV4DF:
39433 icode = CODE_FOR_avx512vl_gatherdiv4df;
39434 goto gather_gen;
39435 case IX86_BUILTIN_GATHER3SIV4SF:
39436 icode = CODE_FOR_avx512vl_gathersiv4sf;
39437 goto gather_gen;
39438 case IX86_BUILTIN_GATHER3SIV8SF:
39439 icode = CODE_FOR_avx512vl_gathersiv8sf;
39440 goto gather_gen;
39441 case IX86_BUILTIN_GATHER3DIV4SF:
39442 icode = CODE_FOR_avx512vl_gatherdiv4sf;
39443 goto gather_gen;
39444 case IX86_BUILTIN_GATHER3DIV8SF:
39445 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39446 goto gather_gen;
39447 case IX86_BUILTIN_GATHER3SIV2DI:
39448 icode = CODE_FOR_avx512vl_gathersiv2di;
39449 goto gather_gen;
39450 case IX86_BUILTIN_GATHER3SIV4DI:
39451 icode = CODE_FOR_avx512vl_gathersiv4di;
39452 goto gather_gen;
39453 case IX86_BUILTIN_GATHER3DIV2DI:
39454 icode = CODE_FOR_avx512vl_gatherdiv2di;
39455 goto gather_gen;
39456 case IX86_BUILTIN_GATHER3DIV4DI:
39457 icode = CODE_FOR_avx512vl_gatherdiv4di;
39458 goto gather_gen;
39459 case IX86_BUILTIN_GATHER3SIV4SI:
39460 icode = CODE_FOR_avx512vl_gathersiv4si;
39461 goto gather_gen;
39462 case IX86_BUILTIN_GATHER3SIV8SI:
39463 icode = CODE_FOR_avx512vl_gathersiv8si;
39464 goto gather_gen;
39465 case IX86_BUILTIN_GATHER3DIV4SI:
39466 icode = CODE_FOR_avx512vl_gatherdiv4si;
39467 goto gather_gen;
39468 case IX86_BUILTIN_GATHER3DIV8SI:
39469 icode = CODE_FOR_avx512vl_gatherdiv8si;
39470 goto gather_gen;
39471 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39472 icode = CODE_FOR_avx512vl_gathersiv4df;
39473 goto gather_gen;
39474 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39475 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39476 goto gather_gen;
39477 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39478 icode = CODE_FOR_avx512vl_gathersiv4di;
39479 goto gather_gen;
39480 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39481 icode = CODE_FOR_avx512vl_gatherdiv8si;
39482 goto gather_gen;
39483 case IX86_BUILTIN_SCATTERSIV16SF:
39484 icode = CODE_FOR_avx512f_scattersiv16sf;
39485 goto scatter_gen;
39486 case IX86_BUILTIN_SCATTERSIV8DF:
39487 icode = CODE_FOR_avx512f_scattersiv8df;
39488 goto scatter_gen;
39489 case IX86_BUILTIN_SCATTERDIV16SF:
39490 icode = CODE_FOR_avx512f_scatterdiv16sf;
39491 goto scatter_gen;
39492 case IX86_BUILTIN_SCATTERDIV8DF:
39493 icode = CODE_FOR_avx512f_scatterdiv8df;
39494 goto scatter_gen;
39495 case IX86_BUILTIN_SCATTERSIV16SI:
39496 icode = CODE_FOR_avx512f_scattersiv16si;
39497 goto scatter_gen;
39498 case IX86_BUILTIN_SCATTERSIV8DI:
39499 icode = CODE_FOR_avx512f_scattersiv8di;
39500 goto scatter_gen;
39501 case IX86_BUILTIN_SCATTERDIV16SI:
39502 icode = CODE_FOR_avx512f_scatterdiv16si;
39503 goto scatter_gen;
39504 case IX86_BUILTIN_SCATTERDIV8DI:
39505 icode = CODE_FOR_avx512f_scatterdiv8di;
39506 goto scatter_gen;
39507 case IX86_BUILTIN_SCATTERSIV8SF:
39508 icode = CODE_FOR_avx512vl_scattersiv8sf;
39509 goto scatter_gen;
39510 case IX86_BUILTIN_SCATTERSIV4SF:
39511 icode = CODE_FOR_avx512vl_scattersiv4sf;
39512 goto scatter_gen;
39513 case IX86_BUILTIN_SCATTERSIV4DF:
39514 icode = CODE_FOR_avx512vl_scattersiv4df;
39515 goto scatter_gen;
39516 case IX86_BUILTIN_SCATTERSIV2DF:
39517 icode = CODE_FOR_avx512vl_scattersiv2df;
39518 goto scatter_gen;
39519 case IX86_BUILTIN_SCATTERDIV8SF:
39520 icode = CODE_FOR_avx512vl_scatterdiv8sf;
39521 goto scatter_gen;
39522 case IX86_BUILTIN_SCATTERDIV4SF:
39523 icode = CODE_FOR_avx512vl_scatterdiv4sf;
39524 goto scatter_gen;
39525 case IX86_BUILTIN_SCATTERDIV4DF:
39526 icode = CODE_FOR_avx512vl_scatterdiv4df;
39527 goto scatter_gen;
39528 case IX86_BUILTIN_SCATTERDIV2DF:
39529 icode = CODE_FOR_avx512vl_scatterdiv2df;
39530 goto scatter_gen;
39531 case IX86_BUILTIN_SCATTERSIV8SI:
39532 icode = CODE_FOR_avx512vl_scattersiv8si;
39533 goto scatter_gen;
39534 case IX86_BUILTIN_SCATTERSIV4SI:
39535 icode = CODE_FOR_avx512vl_scattersiv4si;
39536 goto scatter_gen;
39537 case IX86_BUILTIN_SCATTERSIV4DI:
39538 icode = CODE_FOR_avx512vl_scattersiv4di;
39539 goto scatter_gen;
39540 case IX86_BUILTIN_SCATTERSIV2DI:
39541 icode = CODE_FOR_avx512vl_scattersiv2di;
39542 goto scatter_gen;
39543 case IX86_BUILTIN_SCATTERDIV8SI:
39544 icode = CODE_FOR_avx512vl_scatterdiv8si;
39545 goto scatter_gen;
39546 case IX86_BUILTIN_SCATTERDIV4SI:
39547 icode = CODE_FOR_avx512vl_scatterdiv4si;
39548 goto scatter_gen;
39549 case IX86_BUILTIN_SCATTERDIV4DI:
39550 icode = CODE_FOR_avx512vl_scatterdiv4di;
39551 goto scatter_gen;
39552 case IX86_BUILTIN_SCATTERDIV2DI:
39553 icode = CODE_FOR_avx512vl_scatterdiv2di;
39554 goto scatter_gen;
39555 case IX86_BUILTIN_GATHERPFDPD:
39556 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
39557 goto vec_prefetch_gen;
39558 case IX86_BUILTIN_GATHERPFDPS:
39559 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
39560 goto vec_prefetch_gen;
39561 case IX86_BUILTIN_GATHERPFQPD:
39562 icode = CODE_FOR_avx512pf_gatherpfv8didf;
39563 goto vec_prefetch_gen;
39564 case IX86_BUILTIN_GATHERPFQPS:
39565 icode = CODE_FOR_avx512pf_gatherpfv8disf;
39566 goto vec_prefetch_gen;
39567 case IX86_BUILTIN_SCATTERPFDPD:
39568 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
39569 goto vec_prefetch_gen;
39570 case IX86_BUILTIN_SCATTERPFDPS:
39571 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
39572 goto vec_prefetch_gen;
39573 case IX86_BUILTIN_SCATTERPFQPD:
39574 icode = CODE_FOR_avx512pf_scatterpfv8didf;
39575 goto vec_prefetch_gen;
39576 case IX86_BUILTIN_SCATTERPFQPS:
39577 icode = CODE_FOR_avx512pf_scatterpfv8disf;
39578 goto vec_prefetch_gen;
39580 gather_gen:
39581 rtx half;
39582 rtx (*gen) (rtx, rtx);
39584 arg0 = CALL_EXPR_ARG (exp, 0);
39585 arg1 = CALL_EXPR_ARG (exp, 1);
39586 arg2 = CALL_EXPR_ARG (exp, 2);
39587 arg3 = CALL_EXPR_ARG (exp, 3);
39588 arg4 = CALL_EXPR_ARG (exp, 4);
39589 op0 = expand_normal (arg0);
39590 op1 = expand_normal (arg1);
39591 op2 = expand_normal (arg2);
39592 op3 = expand_normal (arg3);
39593 op4 = expand_normal (arg4);
39594 /* Note the arg order is different from the operand order. */
39595 mode0 = insn_data[icode].operand[1].mode;
39596 mode2 = insn_data[icode].operand[3].mode;
39597 mode3 = insn_data[icode].operand[4].mode;
39598 mode4 = insn_data[icode].operand[5].mode;
39600 if (target == NULL_RTX
39601 || GET_MODE (target) != insn_data[icode].operand[0].mode
39602 || !insn_data[icode].operand[0].predicate (target,
39603 GET_MODE (target)))
39604 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
39605 else
39606 subtarget = target;
39608 switch (fcode)
39610 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39611 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39612 half = gen_reg_rtx (V8SImode);
39613 if (!nonimmediate_operand (op2, V16SImode))
39614 op2 = copy_to_mode_reg (V16SImode, op2);
39615 emit_insn (gen_vec_extract_lo_v16si (half, op2));
39616 op2 = half;
39617 break;
39618 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39619 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39620 case IX86_BUILTIN_GATHERALTSIV4DF:
39621 case IX86_BUILTIN_GATHERALTSIV4DI:
39622 half = gen_reg_rtx (V4SImode);
39623 if (!nonimmediate_operand (op2, V8SImode))
39624 op2 = copy_to_mode_reg (V8SImode, op2);
39625 emit_insn (gen_vec_extract_lo_v8si (half, op2));
39626 op2 = half;
39627 break;
39628 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39629 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39630 half = gen_reg_rtx (mode0);
39631 if (mode0 == V8SFmode)
39632 gen = gen_vec_extract_lo_v16sf;
39633 else
39634 gen = gen_vec_extract_lo_v16si;
39635 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39636 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39637 emit_insn (gen (half, op0));
39638 op0 = half;
39639 if (GET_MODE (op3) != VOIDmode)
39641 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39642 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39643 emit_insn (gen (half, op3));
39644 op3 = half;
39646 break;
39647 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39648 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39649 case IX86_BUILTIN_GATHERALTDIV8SF:
39650 case IX86_BUILTIN_GATHERALTDIV8SI:
39651 half = gen_reg_rtx (mode0);
39652 if (mode0 == V4SFmode)
39653 gen = gen_vec_extract_lo_v8sf;
39654 else
39655 gen = gen_vec_extract_lo_v8si;
39656 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39657 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39658 emit_insn (gen (half, op0));
39659 op0 = half;
39660 if (GET_MODE (op3) != VOIDmode)
39662 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39663 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39664 emit_insn (gen (half, op3));
39665 op3 = half;
39667 break;
39668 default:
39669 break;
39672 /* Force memory operand only with base register here. But we
39673 don't want to do it on memory operand for other builtin
39674 functions. */
39675 op1 = ix86_zero_extend_to_Pmode (op1);
39677 if (!insn_data[icode].operand[1].predicate (op0, mode0))
39678 op0 = copy_to_mode_reg (mode0, op0);
39679 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
39680 op1 = copy_to_mode_reg (Pmode, op1);
39681 if (!insn_data[icode].operand[3].predicate (op2, mode2))
39682 op2 = copy_to_mode_reg (mode2, op2);
39683 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
39685 if (!insn_data[icode].operand[4].predicate (op3, mode3))
39686 op3 = copy_to_mode_reg (mode3, op3);
39688 else
39690 op3 = copy_to_reg (op3);
39691 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
39693 if (!insn_data[icode].operand[5].predicate (op4, mode4))
39695 error ("the last argument must be scale 1, 2, 4, 8");
39696 return const0_rtx;
39699 /* Optimize. If mask is known to have all high bits set,
39700 replace op0 with pc_rtx to signal that the instruction
39701 overwrites the whole destination and doesn't use its
39702 previous contents. */
39703 if (optimize)
39705 if (TREE_CODE (arg3) == INTEGER_CST)
39707 if (integer_all_onesp (arg3))
39708 op0 = pc_rtx;
39710 else if (TREE_CODE (arg3) == VECTOR_CST)
39712 unsigned int negative = 0;
39713 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
39715 tree cst = VECTOR_CST_ELT (arg3, i);
39716 if (TREE_CODE (cst) == INTEGER_CST
39717 && tree_int_cst_sign_bit (cst))
39718 negative++;
39719 else if (TREE_CODE (cst) == REAL_CST
39720 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
39721 negative++;
39723 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
39724 op0 = pc_rtx;
39726 else if (TREE_CODE (arg3) == SSA_NAME
39727 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
39729 /* Recognize also when mask is like:
39730 __v2df src = _mm_setzero_pd ();
39731 __v2df mask = _mm_cmpeq_pd (src, src);
39733 __v8sf src = _mm256_setzero_ps ();
39734 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
39735 as that is a cheaper way to load all ones into
39736 a register than having to load a constant from
39737 memory. */
39738 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
39739 if (is_gimple_call (def_stmt))
39741 tree fndecl = gimple_call_fndecl (def_stmt);
39742 if (fndecl
39743 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
39744 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
39746 case IX86_BUILTIN_CMPPD:
39747 case IX86_BUILTIN_CMPPS:
39748 case IX86_BUILTIN_CMPPD256:
39749 case IX86_BUILTIN_CMPPS256:
39750 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
39751 break;
39752 /* FALLTHRU */
39753 case IX86_BUILTIN_CMPEQPD:
39754 case IX86_BUILTIN_CMPEQPS:
39755 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
39756 && initializer_zerop (gimple_call_arg (def_stmt,
39757 1)))
39758 op0 = pc_rtx;
39759 break;
39760 default:
39761 break;
39767 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
39768 if (! pat)
39769 return const0_rtx;
39770 emit_insn (pat);
39772 switch (fcode)
39774 case IX86_BUILTIN_GATHER3DIV16SF:
39775 if (target == NULL_RTX)
39776 target = gen_reg_rtx (V8SFmode);
39777 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
39778 break;
39779 case IX86_BUILTIN_GATHER3DIV16SI:
39780 if (target == NULL_RTX)
39781 target = gen_reg_rtx (V8SImode);
39782 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
39783 break;
39784 case IX86_BUILTIN_GATHER3DIV8SF:
39785 case IX86_BUILTIN_GATHERDIV8SF:
39786 if (target == NULL_RTX)
39787 target = gen_reg_rtx (V4SFmode);
39788 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
39789 break;
39790 case IX86_BUILTIN_GATHER3DIV8SI:
39791 case IX86_BUILTIN_GATHERDIV8SI:
39792 if (target == NULL_RTX)
39793 target = gen_reg_rtx (V4SImode);
39794 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
39795 break;
39796 default:
39797 target = subtarget;
39798 break;
39800 return target;
39802 scatter_gen:
39803 arg0 = CALL_EXPR_ARG (exp, 0);
39804 arg1 = CALL_EXPR_ARG (exp, 1);
39805 arg2 = CALL_EXPR_ARG (exp, 2);
39806 arg3 = CALL_EXPR_ARG (exp, 3);
39807 arg4 = CALL_EXPR_ARG (exp, 4);
39808 op0 = expand_normal (arg0);
39809 op1 = expand_normal (arg1);
39810 op2 = expand_normal (arg2);
39811 op3 = expand_normal (arg3);
39812 op4 = expand_normal (arg4);
39813 mode1 = insn_data[icode].operand[1].mode;
39814 mode2 = insn_data[icode].operand[2].mode;
39815 mode3 = insn_data[icode].operand[3].mode;
39816 mode4 = insn_data[icode].operand[4].mode;
39818 /* Force memory operand only with base register here. But we
39819 don't want to do it on memory operand for other builtin
39820 functions. */
39821 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
39823 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39824 op0 = copy_to_mode_reg (Pmode, op0);
39826 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
39828 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39829 op1 = copy_to_mode_reg (mode1, op1);
39831 else
39833 op1 = copy_to_reg (op1);
39834 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
39837 if (!insn_data[icode].operand[2].predicate (op2, mode2))
39838 op2 = copy_to_mode_reg (mode2, op2);
39840 if (!insn_data[icode].operand[3].predicate (op3, mode3))
39841 op3 = copy_to_mode_reg (mode3, op3);
39843 if (!insn_data[icode].operand[4].predicate (op4, mode4))
39845 error ("the last argument must be scale 1, 2, 4, 8");
39846 return const0_rtx;
39849 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
39850 if (! pat)
39851 return const0_rtx;
39853 emit_insn (pat);
39854 return 0;
39856 vec_prefetch_gen:
39857 arg0 = CALL_EXPR_ARG (exp, 0);
39858 arg1 = CALL_EXPR_ARG (exp, 1);
39859 arg2 = CALL_EXPR_ARG (exp, 2);
39860 arg3 = CALL_EXPR_ARG (exp, 3);
39861 arg4 = CALL_EXPR_ARG (exp, 4);
39862 op0 = expand_normal (arg0);
39863 op1 = expand_normal (arg1);
39864 op2 = expand_normal (arg2);
39865 op3 = expand_normal (arg3);
39866 op4 = expand_normal (arg4);
39867 mode0 = insn_data[icode].operand[0].mode;
39868 mode1 = insn_data[icode].operand[1].mode;
39869 mode3 = insn_data[icode].operand[3].mode;
39870 mode4 = insn_data[icode].operand[4].mode;
39872 if (GET_MODE (op0) == mode0
39873 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
39875 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39876 op0 = copy_to_mode_reg (mode0, op0);
39878 else if (op0 != constm1_rtx)
39880 op0 = copy_to_reg (op0);
39881 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39884 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39885 op1 = copy_to_mode_reg (mode1, op1);
39887 /* Force memory operand only with base register here. But we
39888 don't want to do it on memory operand for other builtin
39889 functions. */
39890 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
39892 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
39893 op2 = copy_to_mode_reg (Pmode, op2);
39895 if (!insn_data[icode].operand[3].predicate (op3, mode3))
39897 error ("the forth argument must be scale 1, 2, 4, 8");
39898 return const0_rtx;
39901 if (!insn_data[icode].operand[4].predicate (op4, mode4))
39903 error ("incorrect hint operand");
39904 return const0_rtx;
39907 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
39908 if (! pat)
39909 return const0_rtx;
39911 emit_insn (pat);
39913 return 0;
39915 case IX86_BUILTIN_XABORT:
39916 icode = CODE_FOR_xabort;
39917 arg0 = CALL_EXPR_ARG (exp, 0);
39918 op0 = expand_normal (arg0);
39919 mode0 = insn_data[icode].operand[0].mode;
39920 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39922 error ("the xabort's argument must be an 8-bit immediate");
39923 return const0_rtx;
39925 emit_insn (gen_xabort (op0));
39926 return 0;
39928 default:
39929 break;
39932 for (i = 0, d = bdesc_special_args;
39933 i < ARRAY_SIZE (bdesc_special_args);
39934 i++, d++)
39935 if (d->code == fcode)
39936 return ix86_expand_special_args_builtin (d, exp, target);
39938 for (i = 0, d = bdesc_args;
39939 i < ARRAY_SIZE (bdesc_args);
39940 i++, d++)
39941 if (d->code == fcode)
39942 switch (fcode)
39944 case IX86_BUILTIN_FABSQ:
39945 case IX86_BUILTIN_COPYSIGNQ:
39946 if (!TARGET_SSE)
39947 /* Emit a normal call if SSE isn't available. */
39948 return expand_call (exp, target, ignore);
39949 default:
39950 return ix86_expand_args_builtin (d, exp, target);
39953 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
39954 if (d->code == fcode)
39955 return ix86_expand_sse_comi (d, exp, target);
39957 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
39958 if (d->code == fcode)
39959 return ix86_expand_round_builtin (d, exp, target);
39961 for (i = 0, d = bdesc_pcmpestr;
39962 i < ARRAY_SIZE (bdesc_pcmpestr);
39963 i++, d++)
39964 if (d->code == fcode)
39965 return ix86_expand_sse_pcmpestr (d, exp, target);
39967 for (i = 0, d = bdesc_pcmpistr;
39968 i < ARRAY_SIZE (bdesc_pcmpistr);
39969 i++, d++)
39970 if (d->code == fcode)
39971 return ix86_expand_sse_pcmpistr (d, exp, target);
39973 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
39974 if (d->code == fcode)
39975 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
39976 (enum ix86_builtin_func_type)
39977 d->flag, d->comparison);
39979 gcc_unreachable ();
39982 /* This returns the target-specific builtin with code CODE if
39983 current_function_decl has visibility on this builtin, which is checked
39984 using isa flags. Returns NULL_TREE otherwise. */
39986 static tree ix86_get_builtin (enum ix86_builtins code)
39988 struct cl_target_option *opts;
39989 tree target_tree = NULL_TREE;
39991 /* Determine the isa flags of current_function_decl. */
39993 if (current_function_decl)
39994 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
39996 if (target_tree == NULL)
39997 target_tree = target_option_default_node;
39999 opts = TREE_TARGET_OPTION (target_tree);
40001 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
40002 return ix86_builtin_decl (code, true);
40003 else
40004 return NULL_TREE;
40007 /* Return function decl for target specific builtin
40008 for given MPX builtin passed i FCODE. */
40009 static tree
40010 ix86_builtin_mpx_function (unsigned fcode)
40012 switch (fcode)
40014 case BUILT_IN_CHKP_BNDMK:
40015 return ix86_builtins[IX86_BUILTIN_BNDMK];
40017 case BUILT_IN_CHKP_BNDSTX:
40018 return ix86_builtins[IX86_BUILTIN_BNDSTX];
40020 case BUILT_IN_CHKP_BNDLDX:
40021 return ix86_builtins[IX86_BUILTIN_BNDLDX];
40023 case BUILT_IN_CHKP_BNDCL:
40024 return ix86_builtins[IX86_BUILTIN_BNDCL];
40026 case BUILT_IN_CHKP_BNDCU:
40027 return ix86_builtins[IX86_BUILTIN_BNDCU];
40029 case BUILT_IN_CHKP_BNDRET:
40030 return ix86_builtins[IX86_BUILTIN_BNDRET];
40032 case BUILT_IN_CHKP_INTERSECT:
40033 return ix86_builtins[IX86_BUILTIN_BNDINT];
40035 case BUILT_IN_CHKP_NARROW:
40036 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
40038 case BUILT_IN_CHKP_SIZEOF:
40039 return ix86_builtins[IX86_BUILTIN_SIZEOF];
40041 case BUILT_IN_CHKP_EXTRACT_LOWER:
40042 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
40044 case BUILT_IN_CHKP_EXTRACT_UPPER:
40045 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
40047 default:
40048 return NULL_TREE;
40051 gcc_unreachable ();
40054 /* Helper function for ix86_load_bounds and ix86_store_bounds.
40056 Return an address to be used to load/store bounds for pointer
40057 passed in SLOT.
40059 SLOT_NO is an integer constant holding number of a target
40060 dependent special slot to be used in case SLOT is not a memory.
40062 SPECIAL_BASE is a pointer to be used as a base of fake address
40063 to access special slots in Bounds Table. SPECIAL_BASE[-1],
40064 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
40066 static rtx
40067 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
40069 rtx addr = NULL;
40071 /* NULL slot means we pass bounds for pointer not passed to the
40072 function at all. Register slot means we pass pointer in a
40073 register. In both these cases bounds are passed via Bounds
40074 Table. Since we do not have actual pointer stored in memory,
40075 we have to use fake addresses to access Bounds Table. We
40076 start with (special_base - sizeof (void*)) and decrease this
40077 address by pointer size to get addresses for other slots. */
40078 if (!slot || REG_P (slot))
40080 gcc_assert (CONST_INT_P (slot_no));
40081 addr = plus_constant (Pmode, special_base,
40082 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
40084 /* If pointer is passed in a memory then its address is used to
40085 access Bounds Table. */
40086 else if (MEM_P (slot))
40088 addr = XEXP (slot, 0);
40089 if (!register_operand (addr, Pmode))
40090 addr = copy_addr_to_reg (addr);
40092 else
40093 gcc_unreachable ();
40095 return addr;
40098 /* Expand pass uses this hook to load bounds for function parameter
40099 PTR passed in SLOT in case its bounds are not passed in a register.
40101 If SLOT is a memory, then bounds are loaded as for regular pointer
40102 loaded from memory. PTR may be NULL in case SLOT is a memory.
40103 In such case value of PTR (if required) may be loaded from SLOT.
40105 If SLOT is NULL or a register then SLOT_NO is an integer constant
40106 holding number of the target dependent special slot which should be
40107 used to obtain bounds.
40109 Return loaded bounds. */
40111 static rtx
40112 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
40114 rtx reg = gen_reg_rtx (BNDmode);
40115 rtx addr;
40117 /* Get address to be used to access Bounds Table. Special slots start
40118 at the location of return address of the current function. */
40119 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
40121 /* Load pointer value from a memory if we don't have it. */
40122 if (!ptr)
40124 gcc_assert (MEM_P (slot));
40125 ptr = copy_addr_to_reg (slot);
40128 emit_insn (BNDmode == BND64mode
40129 ? gen_bnd64_ldx (reg, addr, ptr)
40130 : gen_bnd32_ldx (reg, addr, ptr));
40132 return reg;
40135 /* Expand pass uses this hook to store BOUNDS for call argument PTR
40136 passed in SLOT in case BOUNDS are not passed in a register.
40138 If SLOT is a memory, then BOUNDS are stored as for regular pointer
40139 stored in memory. PTR may be NULL in case SLOT is a memory.
40140 In such case value of PTR (if required) may be loaded from SLOT.
40142 If SLOT is NULL or a register then SLOT_NO is an integer constant
40143 holding number of the target dependent special slot which should be
40144 used to store BOUNDS. */
40146 static void
40147 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
40149 rtx addr;
40151 /* Get address to be used to access Bounds Table. Special slots start
40152 at the location of return address of a called function. */
40153 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
40155 /* Load pointer value from a memory if we don't have it. */
40156 if (!ptr)
40158 gcc_assert (MEM_P (slot));
40159 ptr = copy_addr_to_reg (slot);
40162 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
40163 if (!register_operand (bounds, BNDmode))
40164 bounds = copy_to_mode_reg (BNDmode, bounds);
40166 emit_insn (BNDmode == BND64mode
40167 ? gen_bnd64_stx (addr, ptr, bounds)
40168 : gen_bnd32_stx (addr, ptr, bounds));
40171 /* Load and return bounds returned by function in SLOT. */
40173 static rtx
40174 ix86_load_returned_bounds (rtx slot)
40176 rtx res;
40178 gcc_assert (REG_P (slot));
40179 res = gen_reg_rtx (BNDmode);
40180 emit_move_insn (res, slot);
40182 return res;
40185 /* Store BOUNDS returned by function into SLOT. */
40187 static void
40188 ix86_store_returned_bounds (rtx slot, rtx bounds)
40190 gcc_assert (REG_P (slot));
40191 emit_move_insn (slot, bounds);
40194 /* Returns a function decl for a vectorized version of the builtin function
40195 with builtin function code FN and the result vector type TYPE, or NULL_TREE
40196 if it is not available. */
40198 static tree
40199 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
40200 tree type_in)
40202 machine_mode in_mode, out_mode;
40203 int in_n, out_n;
40204 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
40206 if (TREE_CODE (type_out) != VECTOR_TYPE
40207 || TREE_CODE (type_in) != VECTOR_TYPE
40208 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
40209 return NULL_TREE;
40211 out_mode = TYPE_MODE (TREE_TYPE (type_out));
40212 out_n = TYPE_VECTOR_SUBPARTS (type_out);
40213 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40214 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40216 switch (fn)
40218 case BUILT_IN_SQRT:
40219 if (out_mode == DFmode && in_mode == DFmode)
40221 if (out_n == 2 && in_n == 2)
40222 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
40223 else if (out_n == 4 && in_n == 4)
40224 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
40225 else if (out_n == 8 && in_n == 8)
40226 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
40228 break;
40230 case BUILT_IN_EXP2F:
40231 if (out_mode == SFmode && in_mode == SFmode)
40233 if (out_n == 16 && in_n == 16)
40234 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
40236 break;
40238 case BUILT_IN_SQRTF:
40239 if (out_mode == SFmode && in_mode == SFmode)
40241 if (out_n == 4 && in_n == 4)
40242 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
40243 else if (out_n == 8 && in_n == 8)
40244 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
40245 else if (out_n == 16 && in_n == 16)
40246 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
40248 break;
40250 case BUILT_IN_IFLOOR:
40251 case BUILT_IN_LFLOOR:
40252 case BUILT_IN_LLFLOOR:
40253 /* The round insn does not trap on denormals. */
40254 if (flag_trapping_math || !TARGET_ROUND)
40255 break;
40257 if (out_mode == SImode && in_mode == DFmode)
40259 if (out_n == 4 && in_n == 2)
40260 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
40261 else if (out_n == 8 && in_n == 4)
40262 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
40263 else if (out_n == 16 && in_n == 8)
40264 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
40266 break;
40268 case BUILT_IN_IFLOORF:
40269 case BUILT_IN_LFLOORF:
40270 case BUILT_IN_LLFLOORF:
40271 /* The round insn does not trap on denormals. */
40272 if (flag_trapping_math || !TARGET_ROUND)
40273 break;
40275 if (out_mode == SImode && in_mode == SFmode)
40277 if (out_n == 4 && in_n == 4)
40278 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
40279 else if (out_n == 8 && in_n == 8)
40280 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
40282 break;
40284 case BUILT_IN_ICEIL:
40285 case BUILT_IN_LCEIL:
40286 case BUILT_IN_LLCEIL:
40287 /* The round insn does not trap on denormals. */
40288 if (flag_trapping_math || !TARGET_ROUND)
40289 break;
40291 if (out_mode == SImode && in_mode == DFmode)
40293 if (out_n == 4 && in_n == 2)
40294 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
40295 else if (out_n == 8 && in_n == 4)
40296 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
40297 else if (out_n == 16 && in_n == 8)
40298 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
40300 break;
40302 case BUILT_IN_ICEILF:
40303 case BUILT_IN_LCEILF:
40304 case BUILT_IN_LLCEILF:
40305 /* The round insn does not trap on denormals. */
40306 if (flag_trapping_math || !TARGET_ROUND)
40307 break;
40309 if (out_mode == SImode && in_mode == SFmode)
40311 if (out_n == 4 && in_n == 4)
40312 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
40313 else if (out_n == 8 && in_n == 8)
40314 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
40316 break;
40318 case BUILT_IN_IRINT:
40319 case BUILT_IN_LRINT:
40320 case BUILT_IN_LLRINT:
40321 if (out_mode == SImode && in_mode == DFmode)
40323 if (out_n == 4 && in_n == 2)
40324 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
40325 else if (out_n == 8 && in_n == 4)
40326 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
40328 break;
40330 case BUILT_IN_IRINTF:
40331 case BUILT_IN_LRINTF:
40332 case BUILT_IN_LLRINTF:
40333 if (out_mode == SImode && in_mode == SFmode)
40335 if (out_n == 4 && in_n == 4)
40336 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
40337 else if (out_n == 8 && in_n == 8)
40338 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
40340 break;
40342 case BUILT_IN_IROUND:
40343 case BUILT_IN_LROUND:
40344 case BUILT_IN_LLROUND:
40345 /* The round insn does not trap on denormals. */
40346 if (flag_trapping_math || !TARGET_ROUND)
40347 break;
40349 if (out_mode == SImode && in_mode == DFmode)
40351 if (out_n == 4 && in_n == 2)
40352 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
40353 else if (out_n == 8 && in_n == 4)
40354 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
40355 else if (out_n == 16 && in_n == 8)
40356 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
40358 break;
40360 case BUILT_IN_IROUNDF:
40361 case BUILT_IN_LROUNDF:
40362 case BUILT_IN_LLROUNDF:
40363 /* The round insn does not trap on denormals. */
40364 if (flag_trapping_math || !TARGET_ROUND)
40365 break;
40367 if (out_mode == SImode && in_mode == SFmode)
40369 if (out_n == 4 && in_n == 4)
40370 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
40371 else if (out_n == 8 && in_n == 8)
40372 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
40374 break;
40376 case BUILT_IN_COPYSIGN:
40377 if (out_mode == DFmode && in_mode == DFmode)
40379 if (out_n == 2 && in_n == 2)
40380 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
40381 else if (out_n == 4 && in_n == 4)
40382 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
40383 else if (out_n == 8 && in_n == 8)
40384 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
40386 break;
40388 case BUILT_IN_COPYSIGNF:
40389 if (out_mode == SFmode && in_mode == SFmode)
40391 if (out_n == 4 && in_n == 4)
40392 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
40393 else if (out_n == 8 && in_n == 8)
40394 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
40395 else if (out_n == 16 && in_n == 16)
40396 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
40398 break;
40400 case BUILT_IN_FLOOR:
40401 /* The round insn does not trap on denormals. */
40402 if (flag_trapping_math || !TARGET_ROUND)
40403 break;
40405 if (out_mode == DFmode && in_mode == DFmode)
40407 if (out_n == 2 && in_n == 2)
40408 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
40409 else if (out_n == 4 && in_n == 4)
40410 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
40412 break;
40414 case BUILT_IN_FLOORF:
40415 /* The round insn does not trap on denormals. */
40416 if (flag_trapping_math || !TARGET_ROUND)
40417 break;
40419 if (out_mode == SFmode && in_mode == SFmode)
40421 if (out_n == 4 && in_n == 4)
40422 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
40423 else if (out_n == 8 && in_n == 8)
40424 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
40426 break;
40428 case BUILT_IN_CEIL:
40429 /* The round insn does not trap on denormals. */
40430 if (flag_trapping_math || !TARGET_ROUND)
40431 break;
40433 if (out_mode == DFmode && in_mode == DFmode)
40435 if (out_n == 2 && in_n == 2)
40436 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
40437 else if (out_n == 4 && in_n == 4)
40438 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
40440 break;
40442 case BUILT_IN_CEILF:
40443 /* The round insn does not trap on denormals. */
40444 if (flag_trapping_math || !TARGET_ROUND)
40445 break;
40447 if (out_mode == SFmode && in_mode == SFmode)
40449 if (out_n == 4 && in_n == 4)
40450 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
40451 else if (out_n == 8 && in_n == 8)
40452 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
40454 break;
40456 case BUILT_IN_TRUNC:
40457 /* The round insn does not trap on denormals. */
40458 if (flag_trapping_math || !TARGET_ROUND)
40459 break;
40461 if (out_mode == DFmode && in_mode == DFmode)
40463 if (out_n == 2 && in_n == 2)
40464 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
40465 else if (out_n == 4 && in_n == 4)
40466 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
40468 break;
40470 case BUILT_IN_TRUNCF:
40471 /* The round insn does not trap on denormals. */
40472 if (flag_trapping_math || !TARGET_ROUND)
40473 break;
40475 if (out_mode == SFmode && in_mode == SFmode)
40477 if (out_n == 4 && in_n == 4)
40478 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
40479 else if (out_n == 8 && in_n == 8)
40480 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
40482 break;
40484 case BUILT_IN_RINT:
40485 /* The round insn does not trap on denormals. */
40486 if (flag_trapping_math || !TARGET_ROUND)
40487 break;
40489 if (out_mode == DFmode && in_mode == DFmode)
40491 if (out_n == 2 && in_n == 2)
40492 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
40493 else if (out_n == 4 && in_n == 4)
40494 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
40496 break;
40498 case BUILT_IN_RINTF:
40499 /* The round insn does not trap on denormals. */
40500 if (flag_trapping_math || !TARGET_ROUND)
40501 break;
40503 if (out_mode == SFmode && in_mode == SFmode)
40505 if (out_n == 4 && in_n == 4)
40506 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
40507 else if (out_n == 8 && in_n == 8)
40508 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
40510 break;
40512 case BUILT_IN_ROUND:
40513 /* The round insn does not trap on denormals. */
40514 if (flag_trapping_math || !TARGET_ROUND)
40515 break;
40517 if (out_mode == DFmode && in_mode == DFmode)
40519 if (out_n == 2 && in_n == 2)
40520 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
40521 else if (out_n == 4 && in_n == 4)
40522 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
40524 break;
40526 case BUILT_IN_ROUNDF:
40527 /* The round insn does not trap on denormals. */
40528 if (flag_trapping_math || !TARGET_ROUND)
40529 break;
40531 if (out_mode == SFmode && in_mode == SFmode)
40533 if (out_n == 4 && in_n == 4)
40534 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
40535 else if (out_n == 8 && in_n == 8)
40536 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
40538 break;
40540 case BUILT_IN_FMA:
40541 if (out_mode == DFmode && in_mode == DFmode)
40543 if (out_n == 2 && in_n == 2)
40544 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
40545 if (out_n == 4 && in_n == 4)
40546 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
40548 break;
40550 case BUILT_IN_FMAF:
40551 if (out_mode == SFmode && in_mode == SFmode)
40553 if (out_n == 4 && in_n == 4)
40554 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
40555 if (out_n == 8 && in_n == 8)
40556 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
40558 break;
40560 default:
40561 break;
40564 /* Dispatch to a handler for a vectorization library. */
40565 if (ix86_veclib_handler)
40566 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
40567 type_in);
40569 return NULL_TREE;
40572 /* Handler for an SVML-style interface to
40573 a library with vectorized intrinsics. */
40575 static tree
40576 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
40578 char name[20];
40579 tree fntype, new_fndecl, args;
40580 unsigned arity;
40581 const char *bname;
40582 machine_mode el_mode, in_mode;
40583 int n, in_n;
40585 /* The SVML is suitable for unsafe math only. */
40586 if (!flag_unsafe_math_optimizations)
40587 return NULL_TREE;
40589 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40590 n = TYPE_VECTOR_SUBPARTS (type_out);
40591 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40592 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40593 if (el_mode != in_mode
40594 || n != in_n)
40595 return NULL_TREE;
40597 switch (fn)
40599 case BUILT_IN_EXP:
40600 case BUILT_IN_LOG:
40601 case BUILT_IN_LOG10:
40602 case BUILT_IN_POW:
40603 case BUILT_IN_TANH:
40604 case BUILT_IN_TAN:
40605 case BUILT_IN_ATAN:
40606 case BUILT_IN_ATAN2:
40607 case BUILT_IN_ATANH:
40608 case BUILT_IN_CBRT:
40609 case BUILT_IN_SINH:
40610 case BUILT_IN_SIN:
40611 case BUILT_IN_ASINH:
40612 case BUILT_IN_ASIN:
40613 case BUILT_IN_COSH:
40614 case BUILT_IN_COS:
40615 case BUILT_IN_ACOSH:
40616 case BUILT_IN_ACOS:
40617 if (el_mode != DFmode || n != 2)
40618 return NULL_TREE;
40619 break;
40621 case BUILT_IN_EXPF:
40622 case BUILT_IN_LOGF:
40623 case BUILT_IN_LOG10F:
40624 case BUILT_IN_POWF:
40625 case BUILT_IN_TANHF:
40626 case BUILT_IN_TANF:
40627 case BUILT_IN_ATANF:
40628 case BUILT_IN_ATAN2F:
40629 case BUILT_IN_ATANHF:
40630 case BUILT_IN_CBRTF:
40631 case BUILT_IN_SINHF:
40632 case BUILT_IN_SINF:
40633 case BUILT_IN_ASINHF:
40634 case BUILT_IN_ASINF:
40635 case BUILT_IN_COSHF:
40636 case BUILT_IN_COSF:
40637 case BUILT_IN_ACOSHF:
40638 case BUILT_IN_ACOSF:
40639 if (el_mode != SFmode || n != 4)
40640 return NULL_TREE;
40641 break;
40643 default:
40644 return NULL_TREE;
40647 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40649 if (fn == BUILT_IN_LOGF)
40650 strcpy (name, "vmlsLn4");
40651 else if (fn == BUILT_IN_LOG)
40652 strcpy (name, "vmldLn2");
40653 else if (n == 4)
40655 sprintf (name, "vmls%s", bname+10);
40656 name[strlen (name)-1] = '4';
40658 else
40659 sprintf (name, "vmld%s2", bname+10);
40661 /* Convert to uppercase. */
40662 name[4] &= ~0x20;
40664 arity = 0;
40665 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40666 args;
40667 args = TREE_CHAIN (args))
40668 arity++;
40670 if (arity == 1)
40671 fntype = build_function_type_list (type_out, type_in, NULL);
40672 else
40673 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40675 /* Build a function declaration for the vectorized function. */
40676 new_fndecl = build_decl (BUILTINS_LOCATION,
40677 FUNCTION_DECL, get_identifier (name), fntype);
40678 TREE_PUBLIC (new_fndecl) = 1;
40679 DECL_EXTERNAL (new_fndecl) = 1;
40680 DECL_IS_NOVOPS (new_fndecl) = 1;
40681 TREE_READONLY (new_fndecl) = 1;
40683 return new_fndecl;
40686 /* Handler for an ACML-style interface to
40687 a library with vectorized intrinsics. */
40689 static tree
40690 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
40692 char name[20] = "__vr.._";
40693 tree fntype, new_fndecl, args;
40694 unsigned arity;
40695 const char *bname;
40696 machine_mode el_mode, in_mode;
40697 int n, in_n;
40699 /* The ACML is 64bits only and suitable for unsafe math only as
40700 it does not correctly support parts of IEEE with the required
40701 precision such as denormals. */
40702 if (!TARGET_64BIT
40703 || !flag_unsafe_math_optimizations)
40704 return NULL_TREE;
40706 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40707 n = TYPE_VECTOR_SUBPARTS (type_out);
40708 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40709 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40710 if (el_mode != in_mode
40711 || n != in_n)
40712 return NULL_TREE;
40714 switch (fn)
40716 case BUILT_IN_SIN:
40717 case BUILT_IN_COS:
40718 case BUILT_IN_EXP:
40719 case BUILT_IN_LOG:
40720 case BUILT_IN_LOG2:
40721 case BUILT_IN_LOG10:
40722 name[4] = 'd';
40723 name[5] = '2';
40724 if (el_mode != DFmode
40725 || n != 2)
40726 return NULL_TREE;
40727 break;
40729 case BUILT_IN_SINF:
40730 case BUILT_IN_COSF:
40731 case BUILT_IN_EXPF:
40732 case BUILT_IN_POWF:
40733 case BUILT_IN_LOGF:
40734 case BUILT_IN_LOG2F:
40735 case BUILT_IN_LOG10F:
40736 name[4] = 's';
40737 name[5] = '4';
40738 if (el_mode != SFmode
40739 || n != 4)
40740 return NULL_TREE;
40741 break;
40743 default:
40744 return NULL_TREE;
40747 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40748 sprintf (name + 7, "%s", bname+10);
40750 arity = 0;
40751 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40752 args;
40753 args = TREE_CHAIN (args))
40754 arity++;
40756 if (arity == 1)
40757 fntype = build_function_type_list (type_out, type_in, NULL);
40758 else
40759 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40761 /* Build a function declaration for the vectorized function. */
40762 new_fndecl = build_decl (BUILTINS_LOCATION,
40763 FUNCTION_DECL, get_identifier (name), fntype);
40764 TREE_PUBLIC (new_fndecl) = 1;
40765 DECL_EXTERNAL (new_fndecl) = 1;
40766 DECL_IS_NOVOPS (new_fndecl) = 1;
40767 TREE_READONLY (new_fndecl) = 1;
40769 return new_fndecl;
40772 /* Returns a decl of a function that implements gather load with
40773 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
40774 Return NULL_TREE if it is not available. */
40776 static tree
40777 ix86_vectorize_builtin_gather (const_tree mem_vectype,
40778 const_tree index_type, int scale)
40780 bool si;
40781 enum ix86_builtins code;
40783 if (! TARGET_AVX2)
40784 return NULL_TREE;
40786 if ((TREE_CODE (index_type) != INTEGER_TYPE
40787 && !POINTER_TYPE_P (index_type))
40788 || (TYPE_MODE (index_type) != SImode
40789 && TYPE_MODE (index_type) != DImode))
40790 return NULL_TREE;
40792 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
40793 return NULL_TREE;
40795 /* v*gather* insn sign extends index to pointer mode. */
40796 if (TYPE_PRECISION (index_type) < POINTER_SIZE
40797 && TYPE_UNSIGNED (index_type))
40798 return NULL_TREE;
40800 if (scale <= 0
40801 || scale > 8
40802 || (scale & (scale - 1)) != 0)
40803 return NULL_TREE;
40805 si = TYPE_MODE (index_type) == SImode;
40806 switch (TYPE_MODE (mem_vectype))
40808 case V2DFmode:
40809 if (TARGET_AVX512VL)
40810 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
40811 else
40812 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
40813 break;
40814 case V4DFmode:
40815 if (TARGET_AVX512VL)
40816 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
40817 else
40818 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
40819 break;
40820 case V2DImode:
40821 if (TARGET_AVX512VL)
40822 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
40823 else
40824 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
40825 break;
40826 case V4DImode:
40827 if (TARGET_AVX512VL)
40828 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
40829 else
40830 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
40831 break;
40832 case V4SFmode:
40833 if (TARGET_AVX512VL)
40834 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
40835 else
40836 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
40837 break;
40838 case V8SFmode:
40839 if (TARGET_AVX512VL)
40840 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
40841 else
40842 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
40843 break;
40844 case V4SImode:
40845 if (TARGET_AVX512VL)
40846 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
40847 else
40848 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
40849 break;
40850 case V8SImode:
40851 if (TARGET_AVX512VL)
40852 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
40853 else
40854 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
40855 break;
40856 case V8DFmode:
40857 if (TARGET_AVX512F)
40858 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
40859 else
40860 return NULL_TREE;
40861 break;
40862 case V8DImode:
40863 if (TARGET_AVX512F)
40864 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
40865 else
40866 return NULL_TREE;
40867 break;
40868 case V16SFmode:
40869 if (TARGET_AVX512F)
40870 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
40871 else
40872 return NULL_TREE;
40873 break;
40874 case V16SImode:
40875 if (TARGET_AVX512F)
40876 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
40877 else
40878 return NULL_TREE;
40879 break;
40880 default:
40881 return NULL_TREE;
40884 return ix86_get_builtin (code);
40887 /* Returns a code for a target-specific builtin that implements
40888 reciprocal of the function, or NULL_TREE if not available. */
40890 static tree
40891 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
40893 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
40894 && flag_finite_math_only && !flag_trapping_math
40895 && flag_unsafe_math_optimizations))
40896 return NULL_TREE;
40898 if (md_fn)
40899 /* Machine dependent builtins. */
40900 switch (fn)
40902 /* Vectorized version of sqrt to rsqrt conversion. */
40903 case IX86_BUILTIN_SQRTPS_NR:
40904 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
40906 case IX86_BUILTIN_SQRTPS_NR256:
40907 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
40909 default:
40910 return NULL_TREE;
40912 else
40913 /* Normal builtins. */
40914 switch (fn)
40916 /* Sqrt to rsqrt conversion. */
40917 case BUILT_IN_SQRTF:
40918 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
40920 default:
40921 return NULL_TREE;
40925 /* Helper for avx_vpermilps256_operand et al. This is also used by
40926 the expansion functions to turn the parallel back into a mask.
40927 The return value is 0 for no match and the imm8+1 for a match. */
40930 avx_vpermilp_parallel (rtx par, machine_mode mode)
40932 unsigned i, nelt = GET_MODE_NUNITS (mode);
40933 unsigned mask = 0;
40934 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
40936 if (XVECLEN (par, 0) != (int) nelt)
40937 return 0;
40939 /* Validate that all of the elements are constants, and not totally
40940 out of range. Copy the data into an integral array to make the
40941 subsequent checks easier. */
40942 for (i = 0; i < nelt; ++i)
40944 rtx er = XVECEXP (par, 0, i);
40945 unsigned HOST_WIDE_INT ei;
40947 if (!CONST_INT_P (er))
40948 return 0;
40949 ei = INTVAL (er);
40950 if (ei >= nelt)
40951 return 0;
40952 ipar[i] = ei;
40955 switch (mode)
40957 case V8DFmode:
40958 /* In the 512-bit DFmode case, we can only move elements within
40959 a 128-bit lane. First fill the second part of the mask,
40960 then fallthru. */
40961 for (i = 4; i < 6; ++i)
40963 if (ipar[i] < 4 || ipar[i] >= 6)
40964 return 0;
40965 mask |= (ipar[i] - 4) << i;
40967 for (i = 6; i < 8; ++i)
40969 if (ipar[i] < 6)
40970 return 0;
40971 mask |= (ipar[i] - 6) << i;
40973 /* FALLTHRU */
40975 case V4DFmode:
40976 /* In the 256-bit DFmode case, we can only move elements within
40977 a 128-bit lane. */
40978 for (i = 0; i < 2; ++i)
40980 if (ipar[i] >= 2)
40981 return 0;
40982 mask |= ipar[i] << i;
40984 for (i = 2; i < 4; ++i)
40986 if (ipar[i] < 2)
40987 return 0;
40988 mask |= (ipar[i] - 2) << i;
40990 break;
40992 case V16SFmode:
40993 /* In 512 bit SFmode case, permutation in the upper 256 bits
40994 must mirror the permutation in the lower 256-bits. */
40995 for (i = 0; i < 8; ++i)
40996 if (ipar[i] + 8 != ipar[i + 8])
40997 return 0;
40998 /* FALLTHRU */
41000 case V8SFmode:
41001 /* In 256 bit SFmode case, we have full freedom of
41002 movement within the low 128-bit lane, but the high 128-bit
41003 lane must mirror the exact same pattern. */
41004 for (i = 0; i < 4; ++i)
41005 if (ipar[i] + 4 != ipar[i + 4])
41006 return 0;
41007 nelt = 4;
41008 /* FALLTHRU */
41010 case V2DFmode:
41011 case V4SFmode:
41012 /* In the 128-bit case, we've full freedom in the placement of
41013 the elements from the source operand. */
41014 for (i = 0; i < nelt; ++i)
41015 mask |= ipar[i] << (i * (nelt / 2));
41016 break;
41018 default:
41019 gcc_unreachable ();
41022 /* Make sure success has a non-zero value by adding one. */
41023 return mask + 1;
41026 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
41027 the expansion functions to turn the parallel back into a mask.
41028 The return value is 0 for no match and the imm8+1 for a match. */
41031 avx_vperm2f128_parallel (rtx par, machine_mode mode)
41033 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
41034 unsigned mask = 0;
41035 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
41037 if (XVECLEN (par, 0) != (int) nelt)
41038 return 0;
41040 /* Validate that all of the elements are constants, and not totally
41041 out of range. Copy the data into an integral array to make the
41042 subsequent checks easier. */
41043 for (i = 0; i < nelt; ++i)
41045 rtx er = XVECEXP (par, 0, i);
41046 unsigned HOST_WIDE_INT ei;
41048 if (!CONST_INT_P (er))
41049 return 0;
41050 ei = INTVAL (er);
41051 if (ei >= 2 * nelt)
41052 return 0;
41053 ipar[i] = ei;
41056 /* Validate that the halves of the permute are halves. */
41057 for (i = 0; i < nelt2 - 1; ++i)
41058 if (ipar[i] + 1 != ipar[i + 1])
41059 return 0;
41060 for (i = nelt2; i < nelt - 1; ++i)
41061 if (ipar[i] + 1 != ipar[i + 1])
41062 return 0;
41064 /* Reconstruct the mask. */
41065 for (i = 0; i < 2; ++i)
41067 unsigned e = ipar[i * nelt2];
41068 if (e % nelt2)
41069 return 0;
41070 e /= nelt2;
41071 mask |= e << (i * 4);
41074 /* Make sure success has a non-zero value by adding one. */
41075 return mask + 1;
41078 /* Return a register priority for hard reg REGNO. */
41079 static int
41080 ix86_register_priority (int hard_regno)
41082 /* ebp and r13 as the base always wants a displacement, r12 as the
41083 base always wants an index. So discourage their usage in an
41084 address. */
41085 if (hard_regno == R12_REG || hard_regno == R13_REG)
41086 return 0;
41087 if (hard_regno == BP_REG)
41088 return 1;
41089 /* New x86-64 int registers result in bigger code size. Discourage
41090 them. */
41091 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
41092 return 2;
41093 /* New x86-64 SSE registers result in bigger code size. Discourage
41094 them. */
41095 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
41096 return 2;
41097 /* Usage of AX register results in smaller code. Prefer it. */
41098 if (hard_regno == 0)
41099 return 4;
41100 return 3;
41103 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
41105 Put float CONST_DOUBLE in the constant pool instead of fp regs.
41106 QImode must go into class Q_REGS.
41107 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
41108 movdf to do mem-to-mem moves through integer regs. */
41110 static reg_class_t
41111 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
41113 machine_mode mode = GET_MODE (x);
41115 /* We're only allowed to return a subclass of CLASS. Many of the
41116 following checks fail for NO_REGS, so eliminate that early. */
41117 if (regclass == NO_REGS)
41118 return NO_REGS;
41120 /* All classes can load zeros. */
41121 if (x == CONST0_RTX (mode))
41122 return regclass;
41124 /* Force constants into memory if we are loading a (nonzero) constant into
41125 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
41126 instructions to load from a constant. */
41127 if (CONSTANT_P (x)
41128 && (MAYBE_MMX_CLASS_P (regclass)
41129 || MAYBE_SSE_CLASS_P (regclass)
41130 || MAYBE_MASK_CLASS_P (regclass)))
41131 return NO_REGS;
41133 /* Prefer SSE regs only, if we can use them for math. */
41134 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
41135 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
41137 /* Floating-point constants need more complex checks. */
41138 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
41140 /* General regs can load everything. */
41141 if (reg_class_subset_p (regclass, GENERAL_REGS))
41142 return regclass;
41144 /* Floats can load 0 and 1 plus some others. Note that we eliminated
41145 zero above. We only want to wind up preferring 80387 registers if
41146 we plan on doing computation with them. */
41147 if (TARGET_80387
41148 && standard_80387_constant_p (x) > 0)
41150 /* Limit class to non-sse. */
41151 if (regclass == FLOAT_SSE_REGS)
41152 return FLOAT_REGS;
41153 if (regclass == FP_TOP_SSE_REGS)
41154 return FP_TOP_REG;
41155 if (regclass == FP_SECOND_SSE_REGS)
41156 return FP_SECOND_REG;
41157 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
41158 return regclass;
41161 return NO_REGS;
41164 /* Generally when we see PLUS here, it's the function invariant
41165 (plus soft-fp const_int). Which can only be computed into general
41166 regs. */
41167 if (GET_CODE (x) == PLUS)
41168 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
41170 /* QImode constants are easy to load, but non-constant QImode data
41171 must go into Q_REGS. */
41172 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
41174 if (reg_class_subset_p (regclass, Q_REGS))
41175 return regclass;
41176 if (reg_class_subset_p (Q_REGS, regclass))
41177 return Q_REGS;
41178 return NO_REGS;
41181 return regclass;
41184 /* Discourage putting floating-point values in SSE registers unless
41185 SSE math is being used, and likewise for the 387 registers. */
41186 static reg_class_t
41187 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
41189 machine_mode mode = GET_MODE (x);
41191 /* Restrict the output reload class to the register bank that we are doing
41192 math on. If we would like not to return a subset of CLASS, reject this
41193 alternative: if reload cannot do this, it will still use its choice. */
41194 mode = GET_MODE (x);
41195 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
41196 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
41198 if (X87_FLOAT_MODE_P (mode))
41200 if (regclass == FP_TOP_SSE_REGS)
41201 return FP_TOP_REG;
41202 else if (regclass == FP_SECOND_SSE_REGS)
41203 return FP_SECOND_REG;
41204 else
41205 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
41208 return regclass;
41211 static reg_class_t
41212 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
41213 machine_mode mode, secondary_reload_info *sri)
41215 /* Double-word spills from general registers to non-offsettable memory
41216 references (zero-extended addresses) require special handling. */
41217 if (TARGET_64BIT
41218 && MEM_P (x)
41219 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
41220 && INTEGER_CLASS_P (rclass)
41221 && !offsettable_memref_p (x))
41223 sri->icode = (in_p
41224 ? CODE_FOR_reload_noff_load
41225 : CODE_FOR_reload_noff_store);
41226 /* Add the cost of moving address to a temporary. */
41227 sri->extra_cost = 1;
41229 return NO_REGS;
41232 /* QImode spills from non-QI registers require
41233 intermediate register on 32bit targets. */
41234 if (mode == QImode
41235 && (MAYBE_MASK_CLASS_P (rclass)
41236 || (!TARGET_64BIT && !in_p
41237 && INTEGER_CLASS_P (rclass)
41238 && MAYBE_NON_Q_CLASS_P (rclass))))
41240 int regno;
41242 if (REG_P (x))
41243 regno = REGNO (x);
41244 else
41245 regno = -1;
41247 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
41248 regno = true_regnum (x);
41250 /* Return Q_REGS if the operand is in memory. */
41251 if (regno == -1)
41252 return Q_REGS;
41255 /* This condition handles corner case where an expression involving
41256 pointers gets vectorized. We're trying to use the address of a
41257 stack slot as a vector initializer.
41259 (set (reg:V2DI 74 [ vect_cst_.2 ])
41260 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
41262 Eventually frame gets turned into sp+offset like this:
41264 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41265 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41266 (const_int 392 [0x188]))))
41268 That later gets turned into:
41270 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41271 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41272 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
41274 We'll have the following reload recorded:
41276 Reload 0: reload_in (DI) =
41277 (plus:DI (reg/f:DI 7 sp)
41278 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
41279 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41280 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
41281 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
41282 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41283 reload_reg_rtx: (reg:V2DI 22 xmm1)
41285 Which isn't going to work since SSE instructions can't handle scalar
41286 additions. Returning GENERAL_REGS forces the addition into integer
41287 register and reload can handle subsequent reloads without problems. */
41289 if (in_p && GET_CODE (x) == PLUS
41290 && SSE_CLASS_P (rclass)
41291 && SCALAR_INT_MODE_P (mode))
41292 return GENERAL_REGS;
41294 return NO_REGS;
41297 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
41299 static bool
41300 ix86_class_likely_spilled_p (reg_class_t rclass)
41302 switch (rclass)
41304 case AREG:
41305 case DREG:
41306 case CREG:
41307 case BREG:
41308 case AD_REGS:
41309 case SIREG:
41310 case DIREG:
41311 case SSE_FIRST_REG:
41312 case FP_TOP_REG:
41313 case FP_SECOND_REG:
41314 case BND_REGS:
41315 return true;
41317 default:
41318 break;
41321 return false;
41324 /* If we are copying between general and FP registers, we need a memory
41325 location. The same is true for SSE and MMX registers.
41327 To optimize register_move_cost performance, allow inline variant.
41329 The macro can't work reliably when one of the CLASSES is class containing
41330 registers from multiple units (SSE, MMX, integer). We avoid this by never
41331 combining those units in single alternative in the machine description.
41332 Ensure that this constraint holds to avoid unexpected surprises.
41334 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
41335 enforce these sanity checks. */
41337 static inline bool
41338 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41339 machine_mode mode, int strict)
41341 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
41342 return false;
41343 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
41344 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
41345 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
41346 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
41347 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
41348 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
41350 gcc_assert (!strict || lra_in_progress);
41351 return true;
41354 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
41355 return true;
41357 /* Between mask and general, we have moves no larger than word size. */
41358 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
41359 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
41360 return true;
41362 /* ??? This is a lie. We do have moves between mmx/general, and for
41363 mmx/sse2. But by saying we need secondary memory we discourage the
41364 register allocator from using the mmx registers unless needed. */
41365 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
41366 return true;
41368 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41370 /* SSE1 doesn't have any direct moves from other classes. */
41371 if (!TARGET_SSE2)
41372 return true;
41374 /* If the target says that inter-unit moves are more expensive
41375 than moving through memory, then don't generate them. */
41376 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
41377 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
41378 return true;
41380 /* Between SSE and general, we have moves no larger than word size. */
41381 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41382 return true;
41385 return false;
41388 bool
41389 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41390 machine_mode mode, int strict)
41392 return inline_secondary_memory_needed (class1, class2, mode, strict);
41395 /* Implement the TARGET_CLASS_MAX_NREGS hook.
41397 On the 80386, this is the size of MODE in words,
41398 except in the FP regs, where a single reg is always enough. */
41400 static unsigned char
41401 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
41403 if (MAYBE_INTEGER_CLASS_P (rclass))
41405 if (mode == XFmode)
41406 return (TARGET_64BIT ? 2 : 3);
41407 else if (mode == XCmode)
41408 return (TARGET_64BIT ? 4 : 6);
41409 else
41410 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
41412 else
41414 if (COMPLEX_MODE_P (mode))
41415 return 2;
41416 else
41417 return 1;
41421 /* Return true if the registers in CLASS cannot represent the change from
41422 modes FROM to TO. */
41424 bool
41425 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
41426 enum reg_class regclass)
41428 if (from == to)
41429 return false;
41431 /* x87 registers can't do subreg at all, as all values are reformatted
41432 to extended precision. */
41433 if (MAYBE_FLOAT_CLASS_P (regclass))
41434 return true;
41436 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
41438 /* Vector registers do not support QI or HImode loads. If we don't
41439 disallow a change to these modes, reload will assume it's ok to
41440 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
41441 the vec_dupv4hi pattern. */
41442 if (GET_MODE_SIZE (from) < 4)
41443 return true;
41446 return false;
41449 /* Return the cost of moving data of mode M between a
41450 register and memory. A value of 2 is the default; this cost is
41451 relative to those in `REGISTER_MOVE_COST'.
41453 This function is used extensively by register_move_cost that is used to
41454 build tables at startup. Make it inline in this case.
41455 When IN is 2, return maximum of in and out move cost.
41457 If moving between registers and memory is more expensive than
41458 between two registers, you should define this macro to express the
41459 relative cost.
41461 Model also increased moving costs of QImode registers in non
41462 Q_REGS classes.
41464 static inline int
41465 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
41466 int in)
41468 int cost;
41469 if (FLOAT_CLASS_P (regclass))
41471 int index;
41472 switch (mode)
41474 case SFmode:
41475 index = 0;
41476 break;
41477 case DFmode:
41478 index = 1;
41479 break;
41480 case XFmode:
41481 index = 2;
41482 break;
41483 default:
41484 return 100;
41486 if (in == 2)
41487 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
41488 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
41490 if (SSE_CLASS_P (regclass))
41492 int index;
41493 switch (GET_MODE_SIZE (mode))
41495 case 4:
41496 index = 0;
41497 break;
41498 case 8:
41499 index = 1;
41500 break;
41501 case 16:
41502 index = 2;
41503 break;
41504 default:
41505 return 100;
41507 if (in == 2)
41508 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
41509 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
41511 if (MMX_CLASS_P (regclass))
41513 int index;
41514 switch (GET_MODE_SIZE (mode))
41516 case 4:
41517 index = 0;
41518 break;
41519 case 8:
41520 index = 1;
41521 break;
41522 default:
41523 return 100;
41525 if (in)
41526 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
41527 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
41529 switch (GET_MODE_SIZE (mode))
41531 case 1:
41532 if (Q_CLASS_P (regclass) || TARGET_64BIT)
41534 if (!in)
41535 return ix86_cost->int_store[0];
41536 if (TARGET_PARTIAL_REG_DEPENDENCY
41537 && optimize_function_for_speed_p (cfun))
41538 cost = ix86_cost->movzbl_load;
41539 else
41540 cost = ix86_cost->int_load[0];
41541 if (in == 2)
41542 return MAX (cost, ix86_cost->int_store[0]);
41543 return cost;
41545 else
41547 if (in == 2)
41548 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
41549 if (in)
41550 return ix86_cost->movzbl_load;
41551 else
41552 return ix86_cost->int_store[0] + 4;
41554 break;
41555 case 2:
41556 if (in == 2)
41557 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
41558 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
41559 default:
41560 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
41561 if (mode == TFmode)
41562 mode = XFmode;
41563 if (in == 2)
41564 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
41565 else if (in)
41566 cost = ix86_cost->int_load[2];
41567 else
41568 cost = ix86_cost->int_store[2];
41569 return (cost * (((int) GET_MODE_SIZE (mode)
41570 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
41574 static int
41575 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
41576 bool in)
41578 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
41582 /* Return the cost of moving data from a register in class CLASS1 to
41583 one in class CLASS2.
41585 It is not required that the cost always equal 2 when FROM is the same as TO;
41586 on some machines it is expensive to move between registers if they are not
41587 general registers. */
41589 static int
41590 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
41591 reg_class_t class2_i)
41593 enum reg_class class1 = (enum reg_class) class1_i;
41594 enum reg_class class2 = (enum reg_class) class2_i;
41596 /* In case we require secondary memory, compute cost of the store followed
41597 by load. In order to avoid bad register allocation choices, we need
41598 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
41600 if (inline_secondary_memory_needed (class1, class2, mode, 0))
41602 int cost = 1;
41604 cost += inline_memory_move_cost (mode, class1, 2);
41605 cost += inline_memory_move_cost (mode, class2, 2);
41607 /* In case of copying from general_purpose_register we may emit multiple
41608 stores followed by single load causing memory size mismatch stall.
41609 Count this as arbitrarily high cost of 20. */
41610 if (targetm.class_max_nregs (class1, mode)
41611 > targetm.class_max_nregs (class2, mode))
41612 cost += 20;
41614 /* In the case of FP/MMX moves, the registers actually overlap, and we
41615 have to switch modes in order to treat them differently. */
41616 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
41617 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
41618 cost += 20;
41620 return cost;
41623 /* Moves between SSE/MMX and integer unit are expensive. */
41624 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
41625 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41627 /* ??? By keeping returned value relatively high, we limit the number
41628 of moves between integer and MMX/SSE registers for all targets.
41629 Additionally, high value prevents problem with x86_modes_tieable_p(),
41630 where integer modes in MMX/SSE registers are not tieable
41631 because of missing QImode and HImode moves to, from or between
41632 MMX/SSE registers. */
41633 return MAX (8, ix86_cost->mmxsse_to_integer);
41635 if (MAYBE_FLOAT_CLASS_P (class1))
41636 return ix86_cost->fp_move;
41637 if (MAYBE_SSE_CLASS_P (class1))
41638 return ix86_cost->sse_move;
41639 if (MAYBE_MMX_CLASS_P (class1))
41640 return ix86_cost->mmx_move;
41641 return 2;
41644 /* Return TRUE if hard register REGNO can hold a value of machine-mode
41645 MODE. */
41647 bool
41648 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
41650 /* Flags and only flags can only hold CCmode values. */
41651 if (CC_REGNO_P (regno))
41652 return GET_MODE_CLASS (mode) == MODE_CC;
41653 if (GET_MODE_CLASS (mode) == MODE_CC
41654 || GET_MODE_CLASS (mode) == MODE_RANDOM
41655 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
41656 return false;
41657 if (STACK_REGNO_P (regno))
41658 return VALID_FP_MODE_P (mode);
41659 if (MASK_REGNO_P (regno))
41660 return (VALID_MASK_REG_MODE (mode)
41661 || ((TARGET_AVX512BW || TARGET_AVX512VBMI)
41662 && VALID_MASK_AVX512BW_MODE (mode)));
41663 if (BND_REGNO_P (regno))
41664 return VALID_BND_REG_MODE (mode);
41665 if (SSE_REGNO_P (regno))
41667 /* We implement the move patterns for all vector modes into and
41668 out of SSE registers, even when no operation instructions
41669 are available. */
41671 /* For AVX-512 we allow, regardless of regno:
41672 - XI mode
41673 - any of 512-bit wide vector mode
41674 - any scalar mode. */
41675 if (TARGET_AVX512F
41676 && (mode == XImode
41677 || VALID_AVX512F_REG_MODE (mode)
41678 || VALID_AVX512F_SCALAR_MODE (mode)))
41679 return true;
41681 /* TODO check for QI/HI scalars. */
41682 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
41683 if (TARGET_AVX512VL
41684 && (mode == OImode
41685 || mode == TImode
41686 || VALID_AVX256_REG_MODE (mode)
41687 || VALID_AVX512VL_128_REG_MODE (mode)))
41688 return true;
41690 /* xmm16-xmm31 are only available for AVX-512. */
41691 if (EXT_REX_SSE_REGNO_P (regno))
41692 return false;
41694 /* OImode and AVX modes are available only when AVX is enabled. */
41695 return ((TARGET_AVX
41696 && VALID_AVX256_REG_OR_OI_MODE (mode))
41697 || VALID_SSE_REG_MODE (mode)
41698 || VALID_SSE2_REG_MODE (mode)
41699 || VALID_MMX_REG_MODE (mode)
41700 || VALID_MMX_REG_MODE_3DNOW (mode));
41702 if (MMX_REGNO_P (regno))
41704 /* We implement the move patterns for 3DNOW modes even in MMX mode,
41705 so if the register is available at all, then we can move data of
41706 the given mode into or out of it. */
41707 return (VALID_MMX_REG_MODE (mode)
41708 || VALID_MMX_REG_MODE_3DNOW (mode));
41711 if (mode == QImode)
41713 /* Take care for QImode values - they can be in non-QI regs,
41714 but then they do cause partial register stalls. */
41715 if (ANY_QI_REGNO_P (regno))
41716 return true;
41717 if (!TARGET_PARTIAL_REG_STALL)
41718 return true;
41719 /* LRA checks if the hard register is OK for the given mode.
41720 QImode values can live in non-QI regs, so we allow all
41721 registers here. */
41722 if (lra_in_progress)
41723 return true;
41724 return !can_create_pseudo_p ();
41726 /* We handle both integer and floats in the general purpose registers. */
41727 else if (VALID_INT_MODE_P (mode))
41728 return true;
41729 else if (VALID_FP_MODE_P (mode))
41730 return true;
41731 else if (VALID_DFP_MODE_P (mode))
41732 return true;
41733 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
41734 on to use that value in smaller contexts, this can easily force a
41735 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
41736 supporting DImode, allow it. */
41737 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
41738 return true;
41740 return false;
41743 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
41744 tieable integer mode. */
41746 static bool
41747 ix86_tieable_integer_mode_p (machine_mode mode)
41749 switch (mode)
41751 case HImode:
41752 case SImode:
41753 return true;
41755 case QImode:
41756 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
41758 case DImode:
41759 return TARGET_64BIT;
41761 default:
41762 return false;
41766 /* Return true if MODE1 is accessible in a register that can hold MODE2
41767 without copying. That is, all register classes that can hold MODE2
41768 can also hold MODE1. */
41770 bool
41771 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
41773 if (mode1 == mode2)
41774 return true;
41776 if (ix86_tieable_integer_mode_p (mode1)
41777 && ix86_tieable_integer_mode_p (mode2))
41778 return true;
41780 /* MODE2 being XFmode implies fp stack or general regs, which means we
41781 can tie any smaller floating point modes to it. Note that we do not
41782 tie this with TFmode. */
41783 if (mode2 == XFmode)
41784 return mode1 == SFmode || mode1 == DFmode;
41786 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
41787 that we can tie it with SFmode. */
41788 if (mode2 == DFmode)
41789 return mode1 == SFmode;
41791 /* If MODE2 is only appropriate for an SSE register, then tie with
41792 any other mode acceptable to SSE registers. */
41793 if (GET_MODE_SIZE (mode2) == 32
41794 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41795 return (GET_MODE_SIZE (mode1) == 32
41796 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41797 if (GET_MODE_SIZE (mode2) == 16
41798 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41799 return (GET_MODE_SIZE (mode1) == 16
41800 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41802 /* If MODE2 is appropriate for an MMX register, then tie
41803 with any other mode acceptable to MMX registers. */
41804 if (GET_MODE_SIZE (mode2) == 8
41805 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
41806 return (GET_MODE_SIZE (mode1) == 8
41807 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
41809 return false;
41812 /* Return the cost of moving between two registers of mode MODE. */
41814 static int
41815 ix86_set_reg_reg_cost (machine_mode mode)
41817 unsigned int units = UNITS_PER_WORD;
41819 switch (GET_MODE_CLASS (mode))
41821 default:
41822 break;
41824 case MODE_CC:
41825 units = GET_MODE_SIZE (CCmode);
41826 break;
41828 case MODE_FLOAT:
41829 if ((TARGET_SSE && mode == TFmode)
41830 || (TARGET_80387 && mode == XFmode)
41831 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
41832 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
41833 units = GET_MODE_SIZE (mode);
41834 break;
41836 case MODE_COMPLEX_FLOAT:
41837 if ((TARGET_SSE && mode == TCmode)
41838 || (TARGET_80387 && mode == XCmode)
41839 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
41840 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
41841 units = GET_MODE_SIZE (mode);
41842 break;
41844 case MODE_VECTOR_INT:
41845 case MODE_VECTOR_FLOAT:
41846 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41847 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41848 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41849 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41850 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
41851 units = GET_MODE_SIZE (mode);
41854 /* Return the cost of moving between two registers of mode MODE,
41855 assuming that the move will be in pieces of at most UNITS bytes. */
41856 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
41859 /* Compute a (partial) cost for rtx X. Return true if the complete
41860 cost has been computed, and false if subexpressions should be
41861 scanned. In either case, *TOTAL contains the cost result. */
41863 static bool
41864 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
41865 bool speed)
41867 rtx mask;
41868 enum rtx_code code = (enum rtx_code) code_i;
41869 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
41870 machine_mode mode = GET_MODE (x);
41871 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
41873 switch (code)
41875 case SET:
41876 if (register_operand (SET_DEST (x), VOIDmode)
41877 && reg_or_0_operand (SET_SRC (x), VOIDmode))
41879 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
41880 return true;
41882 return false;
41884 case CONST_INT:
41885 case CONST:
41886 case LABEL_REF:
41887 case SYMBOL_REF:
41888 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
41889 *total = 3;
41890 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
41891 *total = 2;
41892 else if (flag_pic && SYMBOLIC_CONST (x)
41893 && !(TARGET_64BIT
41894 && (GET_CODE (x) == LABEL_REF
41895 || (GET_CODE (x) == SYMBOL_REF
41896 && SYMBOL_REF_LOCAL_P (x)))))
41897 *total = 1;
41898 else
41899 *total = 0;
41900 return true;
41902 case CONST_DOUBLE:
41903 if (mode == VOIDmode)
41905 *total = 0;
41906 return true;
41908 switch (standard_80387_constant_p (x))
41910 case 1: /* 0.0 */
41911 *total = 1;
41912 return true;
41913 default: /* Other constants */
41914 *total = 2;
41915 return true;
41916 case 0:
41917 case -1:
41918 break;
41920 if (SSE_FLOAT_MODE_P (mode))
41922 case CONST_VECTOR:
41923 switch (standard_sse_constant_p (x))
41925 case 0:
41926 break;
41927 case 1: /* 0: xor eliminates false dependency */
41928 *total = 0;
41929 return true;
41930 default: /* -1: cmp contains false dependency */
41931 *total = 1;
41932 return true;
41935 /* Fall back to (MEM (SYMBOL_REF)), since that's where
41936 it'll probably end up. Add a penalty for size. */
41937 *total = (COSTS_N_INSNS (1)
41938 + (flag_pic != 0 && !TARGET_64BIT)
41939 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
41940 return true;
41942 case ZERO_EXTEND:
41943 /* The zero extensions is often completely free on x86_64, so make
41944 it as cheap as possible. */
41945 if (TARGET_64BIT && mode == DImode
41946 && GET_MODE (XEXP (x, 0)) == SImode)
41947 *total = 1;
41948 else if (TARGET_ZERO_EXTEND_WITH_AND)
41949 *total = cost->add;
41950 else
41951 *total = cost->movzx;
41952 return false;
41954 case SIGN_EXTEND:
41955 *total = cost->movsx;
41956 return false;
41958 case ASHIFT:
41959 if (SCALAR_INT_MODE_P (mode)
41960 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
41961 && CONST_INT_P (XEXP (x, 1)))
41963 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
41964 if (value == 1)
41966 *total = cost->add;
41967 return false;
41969 if ((value == 2 || value == 3)
41970 && cost->lea <= cost->shift_const)
41972 *total = cost->lea;
41973 return false;
41976 /* FALLTHRU */
41978 case ROTATE:
41979 case ASHIFTRT:
41980 case LSHIFTRT:
41981 case ROTATERT:
41982 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
41984 /* ??? Should be SSE vector operation cost. */
41985 /* At least for published AMD latencies, this really is the same
41986 as the latency for a simple fpu operation like fabs. */
41987 /* V*QImode is emulated with 1-11 insns. */
41988 if (mode == V16QImode || mode == V32QImode)
41990 int count = 11;
41991 if (TARGET_XOP && mode == V16QImode)
41993 /* For XOP we use vpshab, which requires a broadcast of the
41994 value to the variable shift insn. For constants this
41995 means a V16Q const in mem; even when we can perform the
41996 shift with one insn set the cost to prefer paddb. */
41997 if (CONSTANT_P (XEXP (x, 1)))
41999 *total = (cost->fabs
42000 + rtx_cost (XEXP (x, 0), code, 0, speed)
42001 + (speed ? 2 : COSTS_N_BYTES (16)));
42002 return true;
42004 count = 3;
42006 else if (TARGET_SSSE3)
42007 count = 7;
42008 *total = cost->fabs * count;
42010 else
42011 *total = cost->fabs;
42013 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42015 if (CONST_INT_P (XEXP (x, 1)))
42017 if (INTVAL (XEXP (x, 1)) > 32)
42018 *total = cost->shift_const + COSTS_N_INSNS (2);
42019 else
42020 *total = cost->shift_const * 2;
42022 else
42024 if (GET_CODE (XEXP (x, 1)) == AND)
42025 *total = cost->shift_var * 2;
42026 else
42027 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
42030 else
42032 if (CONST_INT_P (XEXP (x, 1)))
42033 *total = cost->shift_const;
42034 else if (GET_CODE (XEXP (x, 1)) == SUBREG
42035 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
42037 /* Return the cost after shift-and truncation. */
42038 *total = cost->shift_var;
42039 return true;
42041 else
42042 *total = cost->shift_var;
42044 return false;
42046 case FMA:
42048 rtx sub;
42050 gcc_assert (FLOAT_MODE_P (mode));
42051 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
42053 /* ??? SSE scalar/vector cost should be used here. */
42054 /* ??? Bald assumption that fma has the same cost as fmul. */
42055 *total = cost->fmul;
42056 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
42058 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
42059 sub = XEXP (x, 0);
42060 if (GET_CODE (sub) == NEG)
42061 sub = XEXP (sub, 0);
42062 *total += rtx_cost (sub, FMA, 0, speed);
42064 sub = XEXP (x, 2);
42065 if (GET_CODE (sub) == NEG)
42066 sub = XEXP (sub, 0);
42067 *total += rtx_cost (sub, FMA, 2, speed);
42068 return true;
42071 case MULT:
42072 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42074 /* ??? SSE scalar cost should be used here. */
42075 *total = cost->fmul;
42076 return false;
42078 else if (X87_FLOAT_MODE_P (mode))
42080 *total = cost->fmul;
42081 return false;
42083 else if (FLOAT_MODE_P (mode))
42085 /* ??? SSE vector cost should be used here. */
42086 *total = cost->fmul;
42087 return false;
42089 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42091 /* V*QImode is emulated with 7-13 insns. */
42092 if (mode == V16QImode || mode == V32QImode)
42094 int extra = 11;
42095 if (TARGET_XOP && mode == V16QImode)
42096 extra = 5;
42097 else if (TARGET_SSSE3)
42098 extra = 6;
42099 *total = cost->fmul * 2 + cost->fabs * extra;
42101 /* V*DImode is emulated with 5-8 insns. */
42102 else if (mode == V2DImode || mode == V4DImode)
42104 if (TARGET_XOP && mode == V2DImode)
42105 *total = cost->fmul * 2 + cost->fabs * 3;
42106 else
42107 *total = cost->fmul * 3 + cost->fabs * 5;
42109 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
42110 insns, including two PMULUDQ. */
42111 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
42112 *total = cost->fmul * 2 + cost->fabs * 5;
42113 else
42114 *total = cost->fmul;
42115 return false;
42117 else
42119 rtx op0 = XEXP (x, 0);
42120 rtx op1 = XEXP (x, 1);
42121 int nbits;
42122 if (CONST_INT_P (XEXP (x, 1)))
42124 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42125 for (nbits = 0; value != 0; value &= value - 1)
42126 nbits++;
42128 else
42129 /* This is arbitrary. */
42130 nbits = 7;
42132 /* Compute costs correctly for widening multiplication. */
42133 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
42134 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
42135 == GET_MODE_SIZE (mode))
42137 int is_mulwiden = 0;
42138 machine_mode inner_mode = GET_MODE (op0);
42140 if (GET_CODE (op0) == GET_CODE (op1))
42141 is_mulwiden = 1, op1 = XEXP (op1, 0);
42142 else if (CONST_INT_P (op1))
42144 if (GET_CODE (op0) == SIGN_EXTEND)
42145 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
42146 == INTVAL (op1);
42147 else
42148 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
42151 if (is_mulwiden)
42152 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
42155 *total = (cost->mult_init[MODE_INDEX (mode)]
42156 + nbits * cost->mult_bit
42157 + rtx_cost (op0, outer_code, opno, speed)
42158 + rtx_cost (op1, outer_code, opno, speed));
42160 return true;
42163 case DIV:
42164 case UDIV:
42165 case MOD:
42166 case UMOD:
42167 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42168 /* ??? SSE cost should be used here. */
42169 *total = cost->fdiv;
42170 else if (X87_FLOAT_MODE_P (mode))
42171 *total = cost->fdiv;
42172 else if (FLOAT_MODE_P (mode))
42173 /* ??? SSE vector cost should be used here. */
42174 *total = cost->fdiv;
42175 else
42176 *total = cost->divide[MODE_INDEX (mode)];
42177 return false;
42179 case PLUS:
42180 if (GET_MODE_CLASS (mode) == MODE_INT
42181 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
42183 if (GET_CODE (XEXP (x, 0)) == PLUS
42184 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
42185 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
42186 && CONSTANT_P (XEXP (x, 1)))
42188 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
42189 if (val == 2 || val == 4 || val == 8)
42191 *total = cost->lea;
42192 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42193 outer_code, opno, speed);
42194 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
42195 outer_code, opno, speed);
42196 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42197 return true;
42200 else if (GET_CODE (XEXP (x, 0)) == MULT
42201 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
42203 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
42204 if (val == 2 || val == 4 || val == 8)
42206 *total = cost->lea;
42207 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42208 outer_code, opno, speed);
42209 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42210 return true;
42213 else if (GET_CODE (XEXP (x, 0)) == PLUS)
42215 *total = cost->lea;
42216 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42217 outer_code, opno, speed);
42218 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42219 outer_code, opno, speed);
42220 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42221 return true;
42224 /* FALLTHRU */
42226 case MINUS:
42227 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42229 /* ??? SSE cost should be used here. */
42230 *total = cost->fadd;
42231 return false;
42233 else if (X87_FLOAT_MODE_P (mode))
42235 *total = cost->fadd;
42236 return false;
42238 else if (FLOAT_MODE_P (mode))
42240 /* ??? SSE vector cost should be used here. */
42241 *total = cost->fadd;
42242 return false;
42244 /* FALLTHRU */
42246 case AND:
42247 case IOR:
42248 case XOR:
42249 if (GET_MODE_CLASS (mode) == MODE_INT
42250 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42252 *total = (cost->add * 2
42253 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
42254 << (GET_MODE (XEXP (x, 0)) != DImode))
42255 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
42256 << (GET_MODE (XEXP (x, 1)) != DImode)));
42257 return true;
42259 /* FALLTHRU */
42261 case NEG:
42262 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42264 /* ??? SSE cost should be used here. */
42265 *total = cost->fchs;
42266 return false;
42268 else if (X87_FLOAT_MODE_P (mode))
42270 *total = cost->fchs;
42271 return false;
42273 else if (FLOAT_MODE_P (mode))
42275 /* ??? SSE vector cost should be used here. */
42276 *total = cost->fchs;
42277 return false;
42279 /* FALLTHRU */
42281 case NOT:
42282 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42284 /* ??? Should be SSE vector operation cost. */
42285 /* At least for published AMD latencies, this really is the same
42286 as the latency for a simple fpu operation like fabs. */
42287 *total = cost->fabs;
42289 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42290 *total = cost->add * 2;
42291 else
42292 *total = cost->add;
42293 return false;
42295 case COMPARE:
42296 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
42297 && XEXP (XEXP (x, 0), 1) == const1_rtx
42298 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
42299 && XEXP (x, 1) == const0_rtx)
42301 /* This kind of construct is implemented using test[bwl].
42302 Treat it as if we had an AND. */
42303 *total = (cost->add
42304 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
42305 + rtx_cost (const1_rtx, outer_code, opno, speed));
42306 return true;
42308 return false;
42310 case FLOAT_EXTEND:
42311 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
42312 *total = 0;
42313 return false;
42315 case ABS:
42316 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42317 /* ??? SSE cost should be used here. */
42318 *total = cost->fabs;
42319 else if (X87_FLOAT_MODE_P (mode))
42320 *total = cost->fabs;
42321 else if (FLOAT_MODE_P (mode))
42322 /* ??? SSE vector cost should be used here. */
42323 *total = cost->fabs;
42324 return false;
42326 case SQRT:
42327 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42328 /* ??? SSE cost should be used here. */
42329 *total = cost->fsqrt;
42330 else if (X87_FLOAT_MODE_P (mode))
42331 *total = cost->fsqrt;
42332 else if (FLOAT_MODE_P (mode))
42333 /* ??? SSE vector cost should be used here. */
42334 *total = cost->fsqrt;
42335 return false;
42337 case UNSPEC:
42338 if (XINT (x, 1) == UNSPEC_TP)
42339 *total = 0;
42340 return false;
42342 case VEC_SELECT:
42343 case VEC_CONCAT:
42344 case VEC_DUPLICATE:
42345 /* ??? Assume all of these vector manipulation patterns are
42346 recognizable. In which case they all pretty much have the
42347 same cost. */
42348 *total = cost->fabs;
42349 return true;
42350 case VEC_MERGE:
42351 mask = XEXP (x, 2);
42352 /* This is masked instruction, assume the same cost,
42353 as nonmasked variant. */
42354 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
42355 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
42356 else
42357 *total = cost->fabs;
42358 return true;
42360 default:
42361 return false;
42365 #if TARGET_MACHO
42367 static int current_machopic_label_num;
42369 /* Given a symbol name and its associated stub, write out the
42370 definition of the stub. */
42372 void
42373 machopic_output_stub (FILE *file, const char *symb, const char *stub)
42375 unsigned int length;
42376 char *binder_name, *symbol_name, lazy_ptr_name[32];
42377 int label = ++current_machopic_label_num;
42379 /* For 64-bit we shouldn't get here. */
42380 gcc_assert (!TARGET_64BIT);
42382 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
42383 symb = targetm.strip_name_encoding (symb);
42385 length = strlen (stub);
42386 binder_name = XALLOCAVEC (char, length + 32);
42387 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
42389 length = strlen (symb);
42390 symbol_name = XALLOCAVEC (char, length + 32);
42391 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
42393 sprintf (lazy_ptr_name, "L%d$lz", label);
42395 if (MACHOPIC_ATT_STUB)
42396 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
42397 else if (MACHOPIC_PURE)
42398 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
42399 else
42400 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
42402 fprintf (file, "%s:\n", stub);
42403 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42405 if (MACHOPIC_ATT_STUB)
42407 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
42409 else if (MACHOPIC_PURE)
42411 /* PIC stub. */
42412 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42413 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
42414 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
42415 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
42416 label, lazy_ptr_name, label);
42417 fprintf (file, "\tjmp\t*%%ecx\n");
42419 else
42420 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
42422 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
42423 it needs no stub-binding-helper. */
42424 if (MACHOPIC_ATT_STUB)
42425 return;
42427 fprintf (file, "%s:\n", binder_name);
42429 if (MACHOPIC_PURE)
42431 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
42432 fprintf (file, "\tpushl\t%%ecx\n");
42434 else
42435 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
42437 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
42439 /* N.B. Keep the correspondence of these
42440 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
42441 old-pic/new-pic/non-pic stubs; altering this will break
42442 compatibility with existing dylibs. */
42443 if (MACHOPIC_PURE)
42445 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42446 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
42448 else
42449 /* 16-byte -mdynamic-no-pic stub. */
42450 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
42452 fprintf (file, "%s:\n", lazy_ptr_name);
42453 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42454 fprintf (file, ASM_LONG "%s\n", binder_name);
42456 #endif /* TARGET_MACHO */
42458 /* Order the registers for register allocator. */
42460 void
42461 x86_order_regs_for_local_alloc (void)
42463 int pos = 0;
42464 int i;
42466 /* First allocate the local general purpose registers. */
42467 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42468 if (GENERAL_REGNO_P (i) && call_used_regs[i])
42469 reg_alloc_order [pos++] = i;
42471 /* Global general purpose registers. */
42472 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42473 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
42474 reg_alloc_order [pos++] = i;
42476 /* x87 registers come first in case we are doing FP math
42477 using them. */
42478 if (!TARGET_SSE_MATH)
42479 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42480 reg_alloc_order [pos++] = i;
42482 /* SSE registers. */
42483 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
42484 reg_alloc_order [pos++] = i;
42485 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
42486 reg_alloc_order [pos++] = i;
42488 /* Extended REX SSE registers. */
42489 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
42490 reg_alloc_order [pos++] = i;
42492 /* Mask register. */
42493 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
42494 reg_alloc_order [pos++] = i;
42496 /* MPX bound registers. */
42497 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
42498 reg_alloc_order [pos++] = i;
42500 /* x87 registers. */
42501 if (TARGET_SSE_MATH)
42502 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42503 reg_alloc_order [pos++] = i;
42505 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
42506 reg_alloc_order [pos++] = i;
42508 /* Initialize the rest of array as we do not allocate some registers
42509 at all. */
42510 while (pos < FIRST_PSEUDO_REGISTER)
42511 reg_alloc_order [pos++] = 0;
42514 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
42515 in struct attribute_spec handler. */
42516 static tree
42517 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
42518 tree args,
42519 int,
42520 bool *no_add_attrs)
42522 if (TREE_CODE (*node) != FUNCTION_TYPE
42523 && TREE_CODE (*node) != METHOD_TYPE
42524 && TREE_CODE (*node) != FIELD_DECL
42525 && TREE_CODE (*node) != TYPE_DECL)
42527 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42528 name);
42529 *no_add_attrs = true;
42530 return NULL_TREE;
42532 if (TARGET_64BIT)
42534 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
42535 name);
42536 *no_add_attrs = true;
42537 return NULL_TREE;
42539 if (is_attribute_p ("callee_pop_aggregate_return", name))
42541 tree cst;
42543 cst = TREE_VALUE (args);
42544 if (TREE_CODE (cst) != INTEGER_CST)
42546 warning (OPT_Wattributes,
42547 "%qE attribute requires an integer constant argument",
42548 name);
42549 *no_add_attrs = true;
42551 else if (compare_tree_int (cst, 0) != 0
42552 && compare_tree_int (cst, 1) != 0)
42554 warning (OPT_Wattributes,
42555 "argument to %qE attribute is neither zero, nor one",
42556 name);
42557 *no_add_attrs = true;
42560 return NULL_TREE;
42563 return NULL_TREE;
42566 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
42567 struct attribute_spec.handler. */
42568 static tree
42569 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
42570 bool *no_add_attrs)
42572 if (TREE_CODE (*node) != FUNCTION_TYPE
42573 && TREE_CODE (*node) != METHOD_TYPE
42574 && TREE_CODE (*node) != FIELD_DECL
42575 && TREE_CODE (*node) != TYPE_DECL)
42577 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42578 name);
42579 *no_add_attrs = true;
42580 return NULL_TREE;
42583 /* Can combine regparm with all attributes but fastcall. */
42584 if (is_attribute_p ("ms_abi", name))
42586 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
42588 error ("ms_abi and sysv_abi attributes are not compatible");
42591 return NULL_TREE;
42593 else if (is_attribute_p ("sysv_abi", name))
42595 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
42597 error ("ms_abi and sysv_abi attributes are not compatible");
42600 return NULL_TREE;
42603 return NULL_TREE;
42606 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
42607 struct attribute_spec.handler. */
42608 static tree
42609 ix86_handle_struct_type_attribute (tree *type, tree name, tree, int,
42610 bool *no_add_attrs)
42612 if (!(RECORD_OR_UNION_TYPE_P (*type)))
42614 warning (OPT_Wattributes, "%qE attribute ignored", name);
42615 *no_add_attrs = true;
42618 else if ((is_attribute_p ("ms_struct", name)
42619 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
42620 || ((is_attribute_p ("gcc_struct", name)
42621 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
42623 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
42624 name);
42625 *no_add_attrs = true;
42628 return NULL_TREE;
42631 static tree
42632 ix86_handle_struct_decl_attribute (tree *node, tree name, tree arg, int flags,
42633 bool *no_add_attrs)
42635 if (TREE_CODE (*node) == TYPE_DECL)
42636 return ix86_handle_struct_type_attribute (&(TREE_TYPE (*node)), name, arg,
42637 flags, no_add_attrs);
42639 warning (OPT_Wattributes, "%qE attribute ignored", name);
42640 *no_add_attrs = true;
42642 return NULL_TREE;
42645 static tree
42646 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
42647 bool *no_add_attrs)
42649 if (TREE_CODE (*node) != FUNCTION_DECL)
42651 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42652 name);
42653 *no_add_attrs = true;
42655 return NULL_TREE;
42658 static bool
42659 ix86_ms_bitfield_layout_p (const_tree record_type)
42661 return ((TARGET_MS_BITFIELD_LAYOUT
42662 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
42663 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
42666 /* Returns an expression indicating where the this parameter is
42667 located on entry to the FUNCTION. */
42669 static rtx
42670 x86_this_parameter (tree function)
42672 tree type = TREE_TYPE (function);
42673 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
42674 int nregs;
42676 if (TARGET_64BIT)
42678 const int *parm_regs;
42680 if (ix86_function_type_abi (type) == MS_ABI)
42681 parm_regs = x86_64_ms_abi_int_parameter_registers;
42682 else
42683 parm_regs = x86_64_int_parameter_registers;
42684 return gen_rtx_REG (Pmode, parm_regs[aggr]);
42687 nregs = ix86_function_regparm (type, function);
42689 if (nregs > 0 && !stdarg_p (type))
42691 int regno;
42692 unsigned int ccvt = ix86_get_callcvt (type);
42694 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42695 regno = aggr ? DX_REG : CX_REG;
42696 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42698 regno = CX_REG;
42699 if (aggr)
42700 return gen_rtx_MEM (SImode,
42701 plus_constant (Pmode, stack_pointer_rtx, 4));
42703 else
42705 regno = AX_REG;
42706 if (aggr)
42708 regno = DX_REG;
42709 if (nregs == 1)
42710 return gen_rtx_MEM (SImode,
42711 plus_constant (Pmode,
42712 stack_pointer_rtx, 4));
42715 return gen_rtx_REG (SImode, regno);
42718 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
42719 aggr ? 8 : 4));
42722 /* Determine whether x86_output_mi_thunk can succeed. */
42724 static bool
42725 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
42726 const_tree function)
42728 /* 64-bit can handle anything. */
42729 if (TARGET_64BIT)
42730 return true;
42732 /* For 32-bit, everything's fine if we have one free register. */
42733 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
42734 return true;
42736 /* Need a free register for vcall_offset. */
42737 if (vcall_offset)
42738 return false;
42740 /* Need a free register for GOT references. */
42741 if (flag_pic && !targetm.binds_local_p (function))
42742 return false;
42744 /* Otherwise ok. */
42745 return true;
42748 /* Output the assembler code for a thunk function. THUNK_DECL is the
42749 declaration for the thunk function itself, FUNCTION is the decl for
42750 the target function. DELTA is an immediate constant offset to be
42751 added to THIS. If VCALL_OFFSET is nonzero, the word at
42752 *(*this + vcall_offset) should be added to THIS. */
42754 static void
42755 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
42756 HOST_WIDE_INT vcall_offset, tree function)
42758 rtx this_param = x86_this_parameter (function);
42759 rtx this_reg, tmp, fnaddr;
42760 unsigned int tmp_regno;
42761 rtx_insn *insn;
42763 if (TARGET_64BIT)
42764 tmp_regno = R10_REG;
42765 else
42767 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
42768 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42769 tmp_regno = AX_REG;
42770 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42771 tmp_regno = DX_REG;
42772 else
42773 tmp_regno = CX_REG;
42776 emit_note (NOTE_INSN_PROLOGUE_END);
42778 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
42779 pull it in now and let DELTA benefit. */
42780 if (REG_P (this_param))
42781 this_reg = this_param;
42782 else if (vcall_offset)
42784 /* Put the this parameter into %eax. */
42785 this_reg = gen_rtx_REG (Pmode, AX_REG);
42786 emit_move_insn (this_reg, this_param);
42788 else
42789 this_reg = NULL_RTX;
42791 /* Adjust the this parameter by a fixed constant. */
42792 if (delta)
42794 rtx delta_rtx = GEN_INT (delta);
42795 rtx delta_dst = this_reg ? this_reg : this_param;
42797 if (TARGET_64BIT)
42799 if (!x86_64_general_operand (delta_rtx, Pmode))
42801 tmp = gen_rtx_REG (Pmode, tmp_regno);
42802 emit_move_insn (tmp, delta_rtx);
42803 delta_rtx = tmp;
42807 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
42810 /* Adjust the this parameter by a value stored in the vtable. */
42811 if (vcall_offset)
42813 rtx vcall_addr, vcall_mem, this_mem;
42815 tmp = gen_rtx_REG (Pmode, tmp_regno);
42817 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
42818 if (Pmode != ptr_mode)
42819 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
42820 emit_move_insn (tmp, this_mem);
42822 /* Adjust the this parameter. */
42823 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
42824 if (TARGET_64BIT
42825 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
42827 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
42828 emit_move_insn (tmp2, GEN_INT (vcall_offset));
42829 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
42832 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
42833 if (Pmode != ptr_mode)
42834 emit_insn (gen_addsi_1_zext (this_reg,
42835 gen_rtx_REG (ptr_mode,
42836 REGNO (this_reg)),
42837 vcall_mem));
42838 else
42839 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
42842 /* If necessary, drop THIS back to its stack slot. */
42843 if (this_reg && this_reg != this_param)
42844 emit_move_insn (this_param, this_reg);
42846 fnaddr = XEXP (DECL_RTL (function), 0);
42847 if (TARGET_64BIT)
42849 if (!flag_pic || targetm.binds_local_p (function)
42850 || TARGET_PECOFF)
42852 else
42854 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
42855 tmp = gen_rtx_CONST (Pmode, tmp);
42856 fnaddr = gen_const_mem (Pmode, tmp);
42859 else
42861 if (!flag_pic || targetm.binds_local_p (function))
42863 #if TARGET_MACHO
42864 else if (TARGET_MACHO)
42866 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
42867 fnaddr = XEXP (fnaddr, 0);
42869 #endif /* TARGET_MACHO */
42870 else
42872 tmp = gen_rtx_REG (Pmode, CX_REG);
42873 output_set_got (tmp, NULL_RTX);
42875 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
42876 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
42877 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
42878 fnaddr = gen_const_mem (Pmode, fnaddr);
42882 /* Our sibling call patterns do not allow memories, because we have no
42883 predicate that can distinguish between frame and non-frame memory.
42884 For our purposes here, we can get away with (ab)using a jump pattern,
42885 because we're going to do no optimization. */
42886 if (MEM_P (fnaddr))
42888 if (sibcall_insn_operand (fnaddr, word_mode))
42890 fnaddr = XEXP (DECL_RTL (function), 0);
42891 tmp = gen_rtx_MEM (QImode, fnaddr);
42892 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
42893 tmp = emit_call_insn (tmp);
42894 SIBLING_CALL_P (tmp) = 1;
42896 else
42897 emit_jump_insn (gen_indirect_jump (fnaddr));
42899 else
42901 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
42903 // CM_LARGE_PIC always uses pseudo PIC register which is
42904 // uninitialized. Since FUNCTION is local and calling it
42905 // doesn't go through PLT, we use scratch register %r11 as
42906 // PIC register and initialize it here.
42907 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
42908 ix86_init_large_pic_reg (tmp_regno);
42909 fnaddr = legitimize_pic_address (fnaddr,
42910 gen_rtx_REG (Pmode, tmp_regno));
42913 if (!sibcall_insn_operand (fnaddr, word_mode))
42915 tmp = gen_rtx_REG (word_mode, tmp_regno);
42916 if (GET_MODE (fnaddr) != word_mode)
42917 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
42918 emit_move_insn (tmp, fnaddr);
42919 fnaddr = tmp;
42922 tmp = gen_rtx_MEM (QImode, fnaddr);
42923 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
42924 tmp = emit_call_insn (tmp);
42925 SIBLING_CALL_P (tmp) = 1;
42927 emit_barrier ();
42929 /* Emit just enough of rest_of_compilation to get the insns emitted.
42930 Note that use_thunk calls assemble_start_function et al. */
42931 insn = get_insns ();
42932 shorten_branches (insn);
42933 final_start_function (insn, file, 1);
42934 final (insn, file, 1);
42935 final_end_function ();
42938 static void
42939 x86_file_start (void)
42941 default_file_start ();
42942 if (TARGET_16BIT)
42943 fputs ("\t.code16gcc\n", asm_out_file);
42944 #if TARGET_MACHO
42945 darwin_file_start ();
42946 #endif
42947 if (X86_FILE_START_VERSION_DIRECTIVE)
42948 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
42949 if (X86_FILE_START_FLTUSED)
42950 fputs ("\t.global\t__fltused\n", asm_out_file);
42951 if (ix86_asm_dialect == ASM_INTEL)
42952 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
42956 x86_field_alignment (tree field, int computed)
42958 machine_mode mode;
42959 tree type = TREE_TYPE (field);
42961 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
42962 return computed;
42963 mode = TYPE_MODE (strip_array_types (type));
42964 if (mode == DFmode || mode == DCmode
42965 || GET_MODE_CLASS (mode) == MODE_INT
42966 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
42967 return MIN (32, computed);
42968 return computed;
42971 /* Print call to TARGET to FILE. */
42973 static void
42974 x86_print_call_or_nop (FILE *file, const char *target)
42976 if (flag_nop_mcount)
42977 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
42978 else
42979 fprintf (file, "1:\tcall\t%s\n", target);
42982 /* Output assembler code to FILE to increment profiler label # LABELNO
42983 for profiling a function entry. */
42984 void
42985 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
42987 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
42988 : MCOUNT_NAME);
42989 if (TARGET_64BIT)
42991 #ifndef NO_PROFILE_COUNTERS
42992 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
42993 #endif
42995 if (!TARGET_PECOFF && flag_pic)
42996 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
42997 else
42998 x86_print_call_or_nop (file, mcount_name);
43000 else if (flag_pic)
43002 #ifndef NO_PROFILE_COUNTERS
43003 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
43004 LPREFIX, labelno);
43005 #endif
43006 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
43008 else
43010 #ifndef NO_PROFILE_COUNTERS
43011 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
43012 LPREFIX, labelno);
43013 #endif
43014 x86_print_call_or_nop (file, mcount_name);
43017 if (flag_record_mcount)
43019 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
43020 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
43021 fprintf (file, "\t.previous\n");
43025 /* We don't have exact information about the insn sizes, but we may assume
43026 quite safely that we are informed about all 1 byte insns and memory
43027 address sizes. This is enough to eliminate unnecessary padding in
43028 99% of cases. */
43030 static int
43031 min_insn_size (rtx_insn *insn)
43033 int l = 0, len;
43035 if (!INSN_P (insn) || !active_insn_p (insn))
43036 return 0;
43038 /* Discard alignments we've emit and jump instructions. */
43039 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
43040 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
43041 return 0;
43043 /* Important case - calls are always 5 bytes.
43044 It is common to have many calls in the row. */
43045 if (CALL_P (insn)
43046 && symbolic_reference_mentioned_p (PATTERN (insn))
43047 && !SIBLING_CALL_P (insn))
43048 return 5;
43049 len = get_attr_length (insn);
43050 if (len <= 1)
43051 return 1;
43053 /* For normal instructions we rely on get_attr_length being exact,
43054 with a few exceptions. */
43055 if (!JUMP_P (insn))
43057 enum attr_type type = get_attr_type (insn);
43059 switch (type)
43061 case TYPE_MULTI:
43062 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
43063 || asm_noperands (PATTERN (insn)) >= 0)
43064 return 0;
43065 break;
43066 case TYPE_OTHER:
43067 case TYPE_FCMP:
43068 break;
43069 default:
43070 /* Otherwise trust get_attr_length. */
43071 return len;
43074 l = get_attr_length_address (insn);
43075 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
43076 l = 4;
43078 if (l)
43079 return 1+l;
43080 else
43081 return 2;
43084 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43086 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
43087 window. */
43089 static void
43090 ix86_avoid_jump_mispredicts (void)
43092 rtx_insn *insn, *start = get_insns ();
43093 int nbytes = 0, njumps = 0;
43094 int isjump = 0;
43096 /* Look for all minimal intervals of instructions containing 4 jumps.
43097 The intervals are bounded by START and INSN. NBYTES is the total
43098 size of instructions in the interval including INSN and not including
43099 START. When the NBYTES is smaller than 16 bytes, it is possible
43100 that the end of START and INSN ends up in the same 16byte page.
43102 The smallest offset in the page INSN can start is the case where START
43103 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
43104 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
43106 Don't consider asm goto as jump, while it can contain a jump, it doesn't
43107 have to, control transfer to label(s) can be performed through other
43108 means, and also we estimate minimum length of all asm stmts as 0. */
43109 for (insn = start; insn; insn = NEXT_INSN (insn))
43111 int min_size;
43113 if (LABEL_P (insn))
43115 int align = label_to_alignment (insn);
43116 int max_skip = label_to_max_skip (insn);
43118 if (max_skip > 15)
43119 max_skip = 15;
43120 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
43121 already in the current 16 byte page, because otherwise
43122 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
43123 bytes to reach 16 byte boundary. */
43124 if (align <= 0
43125 || (align <= 3 && max_skip != (1 << align) - 1))
43126 max_skip = 0;
43127 if (dump_file)
43128 fprintf (dump_file, "Label %i with max_skip %i\n",
43129 INSN_UID (insn), max_skip);
43130 if (max_skip)
43132 while (nbytes + max_skip >= 16)
43134 start = NEXT_INSN (start);
43135 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43136 || CALL_P (start))
43137 njumps--, isjump = 1;
43138 else
43139 isjump = 0;
43140 nbytes -= min_insn_size (start);
43143 continue;
43146 min_size = min_insn_size (insn);
43147 nbytes += min_size;
43148 if (dump_file)
43149 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
43150 INSN_UID (insn), min_size);
43151 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
43152 || CALL_P (insn))
43153 njumps++;
43154 else
43155 continue;
43157 while (njumps > 3)
43159 start = NEXT_INSN (start);
43160 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43161 || CALL_P (start))
43162 njumps--, isjump = 1;
43163 else
43164 isjump = 0;
43165 nbytes -= min_insn_size (start);
43167 gcc_assert (njumps >= 0);
43168 if (dump_file)
43169 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
43170 INSN_UID (start), INSN_UID (insn), nbytes);
43172 if (njumps == 3 && isjump && nbytes < 16)
43174 int padsize = 15 - nbytes + min_insn_size (insn);
43176 if (dump_file)
43177 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
43178 INSN_UID (insn), padsize);
43179 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
43183 #endif
43185 /* AMD Athlon works faster
43186 when RET is not destination of conditional jump or directly preceded
43187 by other jump instruction. We avoid the penalty by inserting NOP just
43188 before the RET instructions in such cases. */
43189 static void
43190 ix86_pad_returns (void)
43192 edge e;
43193 edge_iterator ei;
43195 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43197 basic_block bb = e->src;
43198 rtx_insn *ret = BB_END (bb);
43199 rtx_insn *prev;
43200 bool replace = false;
43202 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
43203 || optimize_bb_for_size_p (bb))
43204 continue;
43205 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
43206 if (active_insn_p (prev) || LABEL_P (prev))
43207 break;
43208 if (prev && LABEL_P (prev))
43210 edge e;
43211 edge_iterator ei;
43213 FOR_EACH_EDGE (e, ei, bb->preds)
43214 if (EDGE_FREQUENCY (e) && e->src->index >= 0
43215 && !(e->flags & EDGE_FALLTHRU))
43217 replace = true;
43218 break;
43221 if (!replace)
43223 prev = prev_active_insn (ret);
43224 if (prev
43225 && ((JUMP_P (prev) && any_condjump_p (prev))
43226 || CALL_P (prev)))
43227 replace = true;
43228 /* Empty functions get branch mispredict even when
43229 the jump destination is not visible to us. */
43230 if (!prev && !optimize_function_for_size_p (cfun))
43231 replace = true;
43233 if (replace)
43235 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
43236 delete_insn (ret);
43241 /* Count the minimum number of instructions in BB. Return 4 if the
43242 number of instructions >= 4. */
43244 static int
43245 ix86_count_insn_bb (basic_block bb)
43247 rtx_insn *insn;
43248 int insn_count = 0;
43250 /* Count number of instructions in this block. Return 4 if the number
43251 of instructions >= 4. */
43252 FOR_BB_INSNS (bb, insn)
43254 /* Only happen in exit blocks. */
43255 if (JUMP_P (insn)
43256 && ANY_RETURN_P (PATTERN (insn)))
43257 break;
43259 if (NONDEBUG_INSN_P (insn)
43260 && GET_CODE (PATTERN (insn)) != USE
43261 && GET_CODE (PATTERN (insn)) != CLOBBER)
43263 insn_count++;
43264 if (insn_count >= 4)
43265 return insn_count;
43269 return insn_count;
43273 /* Count the minimum number of instructions in code path in BB.
43274 Return 4 if the number of instructions >= 4. */
43276 static int
43277 ix86_count_insn (basic_block bb)
43279 edge e;
43280 edge_iterator ei;
43281 int min_prev_count;
43283 /* Only bother counting instructions along paths with no
43284 more than 2 basic blocks between entry and exit. Given
43285 that BB has an edge to exit, determine if a predecessor
43286 of BB has an edge from entry. If so, compute the number
43287 of instructions in the predecessor block. If there
43288 happen to be multiple such blocks, compute the minimum. */
43289 min_prev_count = 4;
43290 FOR_EACH_EDGE (e, ei, bb->preds)
43292 edge prev_e;
43293 edge_iterator prev_ei;
43295 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43297 min_prev_count = 0;
43298 break;
43300 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
43302 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43304 int count = ix86_count_insn_bb (e->src);
43305 if (count < min_prev_count)
43306 min_prev_count = count;
43307 break;
43312 if (min_prev_count < 4)
43313 min_prev_count += ix86_count_insn_bb (bb);
43315 return min_prev_count;
43318 /* Pad short function to 4 instructions. */
43320 static void
43321 ix86_pad_short_function (void)
43323 edge e;
43324 edge_iterator ei;
43326 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43328 rtx_insn *ret = BB_END (e->src);
43329 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
43331 int insn_count = ix86_count_insn (e->src);
43333 /* Pad short function. */
43334 if (insn_count < 4)
43336 rtx_insn *insn = ret;
43338 /* Find epilogue. */
43339 while (insn
43340 && (!NOTE_P (insn)
43341 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
43342 insn = PREV_INSN (insn);
43344 if (!insn)
43345 insn = ret;
43347 /* Two NOPs count as one instruction. */
43348 insn_count = 2 * (4 - insn_count);
43349 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
43355 /* Fix up a Windows system unwinder issue. If an EH region falls through into
43356 the epilogue, the Windows system unwinder will apply epilogue logic and
43357 produce incorrect offsets. This can be avoided by adding a nop between
43358 the last insn that can throw and the first insn of the epilogue. */
43360 static void
43361 ix86_seh_fixup_eh_fallthru (void)
43363 edge e;
43364 edge_iterator ei;
43366 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43368 rtx_insn *insn, *next;
43370 /* Find the beginning of the epilogue. */
43371 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
43372 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
43373 break;
43374 if (insn == NULL)
43375 continue;
43377 /* We only care about preceding insns that can throw. */
43378 insn = prev_active_insn (insn);
43379 if (insn == NULL || !can_throw_internal (insn))
43380 continue;
43382 /* Do not separate calls from their debug information. */
43383 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
43384 if (NOTE_P (next)
43385 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
43386 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
43387 insn = next;
43388 else
43389 break;
43391 emit_insn_after (gen_nops (const1_rtx), insn);
43395 /* Implement machine specific optimizations. We implement padding of returns
43396 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
43397 static void
43398 ix86_reorg (void)
43400 /* We are freeing block_for_insn in the toplev to keep compatibility
43401 with old MDEP_REORGS that are not CFG based. Recompute it now. */
43402 compute_bb_for_insn ();
43404 if (TARGET_SEH && current_function_has_exception_handlers ())
43405 ix86_seh_fixup_eh_fallthru ();
43407 if (optimize && optimize_function_for_speed_p (cfun))
43409 if (TARGET_PAD_SHORT_FUNCTION)
43410 ix86_pad_short_function ();
43411 else if (TARGET_PAD_RETURNS)
43412 ix86_pad_returns ();
43413 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43414 if (TARGET_FOUR_JUMP_LIMIT)
43415 ix86_avoid_jump_mispredicts ();
43416 #endif
43420 /* Return nonzero when QImode register that must be represented via REX prefix
43421 is used. */
43422 bool
43423 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
43425 int i;
43426 extract_insn_cached (insn);
43427 for (i = 0; i < recog_data.n_operands; i++)
43428 if (GENERAL_REG_P (recog_data.operand[i])
43429 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
43430 return true;
43431 return false;
43434 /* Return true when INSN mentions register that must be encoded using REX
43435 prefix. */
43436 bool
43437 x86_extended_reg_mentioned_p (rtx insn)
43439 subrtx_iterator::array_type array;
43440 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
43442 const_rtx x = *iter;
43443 if (REG_P (x)
43444 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
43445 return true;
43447 return false;
43450 /* If profitable, negate (without causing overflow) integer constant
43451 of mode MODE at location LOC. Return true in this case. */
43452 bool
43453 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
43455 HOST_WIDE_INT val;
43457 if (!CONST_INT_P (*loc))
43458 return false;
43460 switch (mode)
43462 case DImode:
43463 /* DImode x86_64 constants must fit in 32 bits. */
43464 gcc_assert (x86_64_immediate_operand (*loc, mode));
43466 mode = SImode;
43467 break;
43469 case SImode:
43470 case HImode:
43471 case QImode:
43472 break;
43474 default:
43475 gcc_unreachable ();
43478 /* Avoid overflows. */
43479 if (mode_signbit_p (mode, *loc))
43480 return false;
43482 val = INTVAL (*loc);
43484 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
43485 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
43486 if ((val < 0 && val != -128)
43487 || val == 128)
43489 *loc = GEN_INT (-val);
43490 return true;
43493 return false;
43496 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
43497 optabs would emit if we didn't have TFmode patterns. */
43499 void
43500 x86_emit_floatuns (rtx operands[2])
43502 rtx_code_label *neglab, *donelab;
43503 rtx i0, i1, f0, in, out;
43504 machine_mode mode, inmode;
43506 inmode = GET_MODE (operands[1]);
43507 gcc_assert (inmode == SImode || inmode == DImode);
43509 out = operands[0];
43510 in = force_reg (inmode, operands[1]);
43511 mode = GET_MODE (out);
43512 neglab = gen_label_rtx ();
43513 donelab = gen_label_rtx ();
43514 f0 = gen_reg_rtx (mode);
43516 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
43518 expand_float (out, in, 0);
43520 emit_jump_insn (gen_jump (donelab));
43521 emit_barrier ();
43523 emit_label (neglab);
43525 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
43526 1, OPTAB_DIRECT);
43527 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
43528 1, OPTAB_DIRECT);
43529 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
43531 expand_float (f0, i0, 0);
43533 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
43535 emit_label (donelab);
43538 static bool canonicalize_perm (struct expand_vec_perm_d *d);
43539 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
43540 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
43541 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
43543 /* Get a vector mode of the same size as the original but with elements
43544 twice as wide. This is only guaranteed to apply to integral vectors. */
43546 static inline machine_mode
43547 get_mode_wider_vector (machine_mode o)
43549 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
43550 machine_mode n = GET_MODE_WIDER_MODE (o);
43551 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
43552 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
43553 return n;
43556 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
43557 fill target with val via vec_duplicate. */
43559 static bool
43560 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
43562 bool ok;
43563 rtx_insn *insn;
43564 rtx dup;
43566 /* First attempt to recognize VAL as-is. */
43567 dup = gen_rtx_VEC_DUPLICATE (mode, val);
43568 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
43569 if (recog_memoized (insn) < 0)
43571 rtx_insn *seq;
43572 /* If that fails, force VAL into a register. */
43574 start_sequence ();
43575 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
43576 seq = get_insns ();
43577 end_sequence ();
43578 if (seq)
43579 emit_insn_before (seq, insn);
43581 ok = recog_memoized (insn) >= 0;
43582 gcc_assert (ok);
43584 return true;
43587 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43588 with all elements equal to VAR. Return true if successful. */
43590 static bool
43591 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
43592 rtx target, rtx val)
43594 bool ok;
43596 switch (mode)
43598 case V2SImode:
43599 case V2SFmode:
43600 if (!mmx_ok)
43601 return false;
43602 /* FALLTHRU */
43604 case V4DFmode:
43605 case V4DImode:
43606 case V8SFmode:
43607 case V8SImode:
43608 case V2DFmode:
43609 case V2DImode:
43610 case V4SFmode:
43611 case V4SImode:
43612 case V16SImode:
43613 case V8DImode:
43614 case V16SFmode:
43615 case V8DFmode:
43616 return ix86_vector_duplicate_value (mode, target, val);
43618 case V4HImode:
43619 if (!mmx_ok)
43620 return false;
43621 if (TARGET_SSE || TARGET_3DNOW_A)
43623 rtx x;
43625 val = gen_lowpart (SImode, val);
43626 x = gen_rtx_TRUNCATE (HImode, val);
43627 x = gen_rtx_VEC_DUPLICATE (mode, x);
43628 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43629 return true;
43631 goto widen;
43633 case V8QImode:
43634 if (!mmx_ok)
43635 return false;
43636 goto widen;
43638 case V8HImode:
43639 if (TARGET_AVX2)
43640 return ix86_vector_duplicate_value (mode, target, val);
43642 if (TARGET_SSE2)
43644 struct expand_vec_perm_d dperm;
43645 rtx tmp1, tmp2;
43647 permute:
43648 memset (&dperm, 0, sizeof (dperm));
43649 dperm.target = target;
43650 dperm.vmode = mode;
43651 dperm.nelt = GET_MODE_NUNITS (mode);
43652 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
43653 dperm.one_operand_p = true;
43655 /* Extend to SImode using a paradoxical SUBREG. */
43656 tmp1 = gen_reg_rtx (SImode);
43657 emit_move_insn (tmp1, gen_lowpart (SImode, val));
43659 /* Insert the SImode value as low element of a V4SImode vector. */
43660 tmp2 = gen_reg_rtx (V4SImode);
43661 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
43662 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
43664 ok = (expand_vec_perm_1 (&dperm)
43665 || expand_vec_perm_broadcast_1 (&dperm));
43666 gcc_assert (ok);
43667 return ok;
43669 goto widen;
43671 case V16QImode:
43672 if (TARGET_AVX2)
43673 return ix86_vector_duplicate_value (mode, target, val);
43675 if (TARGET_SSE2)
43676 goto permute;
43677 goto widen;
43679 widen:
43680 /* Replicate the value once into the next wider mode and recurse. */
43682 machine_mode smode, wsmode, wvmode;
43683 rtx x;
43685 smode = GET_MODE_INNER (mode);
43686 wvmode = get_mode_wider_vector (mode);
43687 wsmode = GET_MODE_INNER (wvmode);
43689 val = convert_modes (wsmode, smode, val, true);
43690 x = expand_simple_binop (wsmode, ASHIFT, val,
43691 GEN_INT (GET_MODE_BITSIZE (smode)),
43692 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43693 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
43695 x = gen_reg_rtx (wvmode);
43696 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
43697 gcc_assert (ok);
43698 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
43699 return ok;
43702 case V16HImode:
43703 case V32QImode:
43704 if (TARGET_AVX2)
43705 return ix86_vector_duplicate_value (mode, target, val);
43706 else
43708 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
43709 rtx x = gen_reg_rtx (hvmode);
43711 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43712 gcc_assert (ok);
43714 x = gen_rtx_VEC_CONCAT (mode, x, x);
43715 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43717 return true;
43719 case V64QImode:
43720 case V32HImode:
43721 if (TARGET_AVX512BW)
43722 return ix86_vector_duplicate_value (mode, target, val);
43723 else
43725 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
43726 rtx x = gen_reg_rtx (hvmode);
43728 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43729 gcc_assert (ok);
43731 x = gen_rtx_VEC_CONCAT (mode, x, x);
43732 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43734 return true;
43736 default:
43737 return false;
43741 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43742 whose ONE_VAR element is VAR, and other elements are zero. Return true
43743 if successful. */
43745 static bool
43746 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
43747 rtx target, rtx var, int one_var)
43749 machine_mode vsimode;
43750 rtx new_target;
43751 rtx x, tmp;
43752 bool use_vector_set = false;
43754 switch (mode)
43756 case V2DImode:
43757 /* For SSE4.1, we normally use vector set. But if the second
43758 element is zero and inter-unit moves are OK, we use movq
43759 instead. */
43760 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
43761 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
43762 && one_var == 0));
43763 break;
43764 case V16QImode:
43765 case V4SImode:
43766 case V4SFmode:
43767 use_vector_set = TARGET_SSE4_1;
43768 break;
43769 case V8HImode:
43770 use_vector_set = TARGET_SSE2;
43771 break;
43772 case V4HImode:
43773 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
43774 break;
43775 case V32QImode:
43776 case V16HImode:
43777 case V8SImode:
43778 case V8SFmode:
43779 case V4DFmode:
43780 use_vector_set = TARGET_AVX;
43781 break;
43782 case V4DImode:
43783 /* Use ix86_expand_vector_set in 64bit mode only. */
43784 use_vector_set = TARGET_AVX && TARGET_64BIT;
43785 break;
43786 default:
43787 break;
43790 if (use_vector_set)
43792 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
43793 var = force_reg (GET_MODE_INNER (mode), var);
43794 ix86_expand_vector_set (mmx_ok, target, var, one_var);
43795 return true;
43798 switch (mode)
43800 case V2SFmode:
43801 case V2SImode:
43802 if (!mmx_ok)
43803 return false;
43804 /* FALLTHRU */
43806 case V2DFmode:
43807 case V2DImode:
43808 if (one_var != 0)
43809 return false;
43810 var = force_reg (GET_MODE_INNER (mode), var);
43811 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
43812 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43813 return true;
43815 case V4SFmode:
43816 case V4SImode:
43817 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
43818 new_target = gen_reg_rtx (mode);
43819 else
43820 new_target = target;
43821 var = force_reg (GET_MODE_INNER (mode), var);
43822 x = gen_rtx_VEC_DUPLICATE (mode, var);
43823 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
43824 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
43825 if (one_var != 0)
43827 /* We need to shuffle the value to the correct position, so
43828 create a new pseudo to store the intermediate result. */
43830 /* With SSE2, we can use the integer shuffle insns. */
43831 if (mode != V4SFmode && TARGET_SSE2)
43833 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
43834 const1_rtx,
43835 GEN_INT (one_var == 1 ? 0 : 1),
43836 GEN_INT (one_var == 2 ? 0 : 1),
43837 GEN_INT (one_var == 3 ? 0 : 1)));
43838 if (target != new_target)
43839 emit_move_insn (target, new_target);
43840 return true;
43843 /* Otherwise convert the intermediate result to V4SFmode and
43844 use the SSE1 shuffle instructions. */
43845 if (mode != V4SFmode)
43847 tmp = gen_reg_rtx (V4SFmode);
43848 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
43850 else
43851 tmp = new_target;
43853 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
43854 const1_rtx,
43855 GEN_INT (one_var == 1 ? 0 : 1),
43856 GEN_INT (one_var == 2 ? 0+4 : 1+4),
43857 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
43859 if (mode != V4SFmode)
43860 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
43861 else if (tmp != target)
43862 emit_move_insn (target, tmp);
43864 else if (target != new_target)
43865 emit_move_insn (target, new_target);
43866 return true;
43868 case V8HImode:
43869 case V16QImode:
43870 vsimode = V4SImode;
43871 goto widen;
43872 case V4HImode:
43873 case V8QImode:
43874 if (!mmx_ok)
43875 return false;
43876 vsimode = V2SImode;
43877 goto widen;
43878 widen:
43879 if (one_var != 0)
43880 return false;
43882 /* Zero extend the variable element to SImode and recurse. */
43883 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
43885 x = gen_reg_rtx (vsimode);
43886 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
43887 var, one_var))
43888 gcc_unreachable ();
43890 emit_move_insn (target, gen_lowpart (mode, x));
43891 return true;
43893 default:
43894 return false;
43898 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43899 consisting of the values in VALS. It is known that all elements
43900 except ONE_VAR are constants. Return true if successful. */
43902 static bool
43903 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
43904 rtx target, rtx vals, int one_var)
43906 rtx var = XVECEXP (vals, 0, one_var);
43907 machine_mode wmode;
43908 rtx const_vec, x;
43910 const_vec = copy_rtx (vals);
43911 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
43912 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
43914 switch (mode)
43916 case V2DFmode:
43917 case V2DImode:
43918 case V2SFmode:
43919 case V2SImode:
43920 /* For the two element vectors, it's just as easy to use
43921 the general case. */
43922 return false;
43924 case V4DImode:
43925 /* Use ix86_expand_vector_set in 64bit mode only. */
43926 if (!TARGET_64BIT)
43927 return false;
43928 case V4DFmode:
43929 case V8SFmode:
43930 case V8SImode:
43931 case V16HImode:
43932 case V32QImode:
43933 case V4SFmode:
43934 case V4SImode:
43935 case V8HImode:
43936 case V4HImode:
43937 break;
43939 case V16QImode:
43940 if (TARGET_SSE4_1)
43941 break;
43942 wmode = V8HImode;
43943 goto widen;
43944 case V8QImode:
43945 wmode = V4HImode;
43946 goto widen;
43947 widen:
43948 /* There's no way to set one QImode entry easily. Combine
43949 the variable value with its adjacent constant value, and
43950 promote to an HImode set. */
43951 x = XVECEXP (vals, 0, one_var ^ 1);
43952 if (one_var & 1)
43954 var = convert_modes (HImode, QImode, var, true);
43955 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
43956 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43957 x = GEN_INT (INTVAL (x) & 0xff);
43959 else
43961 var = convert_modes (HImode, QImode, var, true);
43962 x = gen_int_mode (INTVAL (x) << 8, HImode);
43964 if (x != const0_rtx)
43965 var = expand_simple_binop (HImode, IOR, var, x, var,
43966 1, OPTAB_LIB_WIDEN);
43968 x = gen_reg_rtx (wmode);
43969 emit_move_insn (x, gen_lowpart (wmode, const_vec));
43970 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
43972 emit_move_insn (target, gen_lowpart (mode, x));
43973 return true;
43975 default:
43976 return false;
43979 emit_move_insn (target, const_vec);
43980 ix86_expand_vector_set (mmx_ok, target, var, one_var);
43981 return true;
43984 /* A subroutine of ix86_expand_vector_init_general. Use vector
43985 concatenate to handle the most general case: all values variable,
43986 and none identical. */
43988 static void
43989 ix86_expand_vector_init_concat (machine_mode mode,
43990 rtx target, rtx *ops, int n)
43992 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
43993 rtx first[16], second[8], third[4];
43994 rtvec v;
43995 int i, j;
43997 switch (n)
43999 case 2:
44000 switch (mode)
44002 case V16SImode:
44003 cmode = V8SImode;
44004 break;
44005 case V16SFmode:
44006 cmode = V8SFmode;
44007 break;
44008 case V8DImode:
44009 cmode = V4DImode;
44010 break;
44011 case V8DFmode:
44012 cmode = V4DFmode;
44013 break;
44014 case V8SImode:
44015 cmode = V4SImode;
44016 break;
44017 case V8SFmode:
44018 cmode = V4SFmode;
44019 break;
44020 case V4DImode:
44021 cmode = V2DImode;
44022 break;
44023 case V4DFmode:
44024 cmode = V2DFmode;
44025 break;
44026 case V4SImode:
44027 cmode = V2SImode;
44028 break;
44029 case V4SFmode:
44030 cmode = V2SFmode;
44031 break;
44032 case V2DImode:
44033 cmode = DImode;
44034 break;
44035 case V2SImode:
44036 cmode = SImode;
44037 break;
44038 case V2DFmode:
44039 cmode = DFmode;
44040 break;
44041 case V2SFmode:
44042 cmode = SFmode;
44043 break;
44044 default:
44045 gcc_unreachable ();
44048 if (!register_operand (ops[1], cmode))
44049 ops[1] = force_reg (cmode, ops[1]);
44050 if (!register_operand (ops[0], cmode))
44051 ops[0] = force_reg (cmode, ops[0]);
44052 emit_insn (gen_rtx_SET (VOIDmode, target,
44053 gen_rtx_VEC_CONCAT (mode, ops[0],
44054 ops[1])));
44055 break;
44057 case 4:
44058 switch (mode)
44060 case V4DImode:
44061 cmode = V2DImode;
44062 break;
44063 case V4DFmode:
44064 cmode = V2DFmode;
44065 break;
44066 case V4SImode:
44067 cmode = V2SImode;
44068 break;
44069 case V4SFmode:
44070 cmode = V2SFmode;
44071 break;
44072 default:
44073 gcc_unreachable ();
44075 goto half;
44077 case 8:
44078 switch (mode)
44080 case V8DImode:
44081 cmode = V2DImode;
44082 hmode = V4DImode;
44083 break;
44084 case V8DFmode:
44085 cmode = V2DFmode;
44086 hmode = V4DFmode;
44087 break;
44088 case V8SImode:
44089 cmode = V2SImode;
44090 hmode = V4SImode;
44091 break;
44092 case V8SFmode:
44093 cmode = V2SFmode;
44094 hmode = V4SFmode;
44095 break;
44096 default:
44097 gcc_unreachable ();
44099 goto half;
44101 case 16:
44102 switch (mode)
44104 case V16SImode:
44105 cmode = V2SImode;
44106 hmode = V4SImode;
44107 gmode = V8SImode;
44108 break;
44109 case V16SFmode:
44110 cmode = V2SFmode;
44111 hmode = V4SFmode;
44112 gmode = V8SFmode;
44113 break;
44114 default:
44115 gcc_unreachable ();
44117 goto half;
44119 half:
44120 /* FIXME: We process inputs backward to help RA. PR 36222. */
44121 i = n - 1;
44122 j = (n >> 1) - 1;
44123 for (; i > 0; i -= 2, j--)
44125 first[j] = gen_reg_rtx (cmode);
44126 v = gen_rtvec (2, ops[i - 1], ops[i]);
44127 ix86_expand_vector_init (false, first[j],
44128 gen_rtx_PARALLEL (cmode, v));
44131 n >>= 1;
44132 if (n > 4)
44134 gcc_assert (hmode != VOIDmode);
44135 gcc_assert (gmode != VOIDmode);
44136 for (i = j = 0; i < n; i += 2, j++)
44138 second[j] = gen_reg_rtx (hmode);
44139 ix86_expand_vector_init_concat (hmode, second [j],
44140 &first [i], 2);
44142 n >>= 1;
44143 for (i = j = 0; i < n; i += 2, j++)
44145 third[j] = gen_reg_rtx (gmode);
44146 ix86_expand_vector_init_concat (gmode, third[j],
44147 &second[i], 2);
44149 n >>= 1;
44150 ix86_expand_vector_init_concat (mode, target, third, n);
44152 else if (n > 2)
44154 gcc_assert (hmode != VOIDmode);
44155 for (i = j = 0; i < n; i += 2, j++)
44157 second[j] = gen_reg_rtx (hmode);
44158 ix86_expand_vector_init_concat (hmode, second [j],
44159 &first [i], 2);
44161 n >>= 1;
44162 ix86_expand_vector_init_concat (mode, target, second, n);
44164 else
44165 ix86_expand_vector_init_concat (mode, target, first, n);
44166 break;
44168 default:
44169 gcc_unreachable ();
44173 /* A subroutine of ix86_expand_vector_init_general. Use vector
44174 interleave to handle the most general case: all values variable,
44175 and none identical. */
44177 static void
44178 ix86_expand_vector_init_interleave (machine_mode mode,
44179 rtx target, rtx *ops, int n)
44181 machine_mode first_imode, second_imode, third_imode, inner_mode;
44182 int i, j;
44183 rtx op0, op1;
44184 rtx (*gen_load_even) (rtx, rtx, rtx);
44185 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
44186 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
44188 switch (mode)
44190 case V8HImode:
44191 gen_load_even = gen_vec_setv8hi;
44192 gen_interleave_first_low = gen_vec_interleave_lowv4si;
44193 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44194 inner_mode = HImode;
44195 first_imode = V4SImode;
44196 second_imode = V2DImode;
44197 third_imode = VOIDmode;
44198 break;
44199 case V16QImode:
44200 gen_load_even = gen_vec_setv16qi;
44201 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
44202 gen_interleave_second_low = gen_vec_interleave_lowv4si;
44203 inner_mode = QImode;
44204 first_imode = V8HImode;
44205 second_imode = V4SImode;
44206 third_imode = V2DImode;
44207 break;
44208 default:
44209 gcc_unreachable ();
44212 for (i = 0; i < n; i++)
44214 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
44215 op0 = gen_reg_rtx (SImode);
44216 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
44218 /* Insert the SImode value as low element of V4SImode vector. */
44219 op1 = gen_reg_rtx (V4SImode);
44220 op0 = gen_rtx_VEC_MERGE (V4SImode,
44221 gen_rtx_VEC_DUPLICATE (V4SImode,
44222 op0),
44223 CONST0_RTX (V4SImode),
44224 const1_rtx);
44225 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
44227 /* Cast the V4SImode vector back to a vector in orignal mode. */
44228 op0 = gen_reg_rtx (mode);
44229 emit_move_insn (op0, gen_lowpart (mode, op1));
44231 /* Load even elements into the second position. */
44232 emit_insn (gen_load_even (op0,
44233 force_reg (inner_mode,
44234 ops [i + i + 1]),
44235 const1_rtx));
44237 /* Cast vector to FIRST_IMODE vector. */
44238 ops[i] = gen_reg_rtx (first_imode);
44239 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
44242 /* Interleave low FIRST_IMODE vectors. */
44243 for (i = j = 0; i < n; i += 2, j++)
44245 op0 = gen_reg_rtx (first_imode);
44246 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
44248 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
44249 ops[j] = gen_reg_rtx (second_imode);
44250 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
44253 /* Interleave low SECOND_IMODE vectors. */
44254 switch (second_imode)
44256 case V4SImode:
44257 for (i = j = 0; i < n / 2; i += 2, j++)
44259 op0 = gen_reg_rtx (second_imode);
44260 emit_insn (gen_interleave_second_low (op0, ops[i],
44261 ops[i + 1]));
44263 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
44264 vector. */
44265 ops[j] = gen_reg_rtx (third_imode);
44266 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
44268 second_imode = V2DImode;
44269 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44270 /* FALLTHRU */
44272 case V2DImode:
44273 op0 = gen_reg_rtx (second_imode);
44274 emit_insn (gen_interleave_second_low (op0, ops[0],
44275 ops[1]));
44277 /* Cast the SECOND_IMODE vector back to a vector on original
44278 mode. */
44279 emit_insn (gen_rtx_SET (VOIDmode, target,
44280 gen_lowpart (mode, op0)));
44281 break;
44283 default:
44284 gcc_unreachable ();
44288 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
44289 all values variable, and none identical. */
44291 static void
44292 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
44293 rtx target, rtx vals)
44295 rtx ops[64], op0, op1, op2, op3, op4, op5;
44296 machine_mode half_mode = VOIDmode;
44297 machine_mode quarter_mode = VOIDmode;
44298 int n, i;
44300 switch (mode)
44302 case V2SFmode:
44303 case V2SImode:
44304 if (!mmx_ok && !TARGET_SSE)
44305 break;
44306 /* FALLTHRU */
44308 case V16SImode:
44309 case V16SFmode:
44310 case V8DFmode:
44311 case V8DImode:
44312 case V8SFmode:
44313 case V8SImode:
44314 case V4DFmode:
44315 case V4DImode:
44316 case V4SFmode:
44317 case V4SImode:
44318 case V2DFmode:
44319 case V2DImode:
44320 n = GET_MODE_NUNITS (mode);
44321 for (i = 0; i < n; i++)
44322 ops[i] = XVECEXP (vals, 0, i);
44323 ix86_expand_vector_init_concat (mode, target, ops, n);
44324 return;
44326 case V32QImode:
44327 half_mode = V16QImode;
44328 goto half;
44330 case V16HImode:
44331 half_mode = V8HImode;
44332 goto half;
44334 half:
44335 n = GET_MODE_NUNITS (mode);
44336 for (i = 0; i < n; i++)
44337 ops[i] = XVECEXP (vals, 0, i);
44338 op0 = gen_reg_rtx (half_mode);
44339 op1 = gen_reg_rtx (half_mode);
44340 ix86_expand_vector_init_interleave (half_mode, op0, ops,
44341 n >> 2);
44342 ix86_expand_vector_init_interleave (half_mode, op1,
44343 &ops [n >> 1], n >> 2);
44344 emit_insn (gen_rtx_SET (VOIDmode, target,
44345 gen_rtx_VEC_CONCAT (mode, op0, op1)));
44346 return;
44348 case V64QImode:
44349 quarter_mode = V16QImode;
44350 half_mode = V32QImode;
44351 goto quarter;
44353 case V32HImode:
44354 quarter_mode = V8HImode;
44355 half_mode = V16HImode;
44356 goto quarter;
44358 quarter:
44359 n = GET_MODE_NUNITS (mode);
44360 for (i = 0; i < n; i++)
44361 ops[i] = XVECEXP (vals, 0, i);
44362 op0 = gen_reg_rtx (quarter_mode);
44363 op1 = gen_reg_rtx (quarter_mode);
44364 op2 = gen_reg_rtx (quarter_mode);
44365 op3 = gen_reg_rtx (quarter_mode);
44366 op4 = gen_reg_rtx (half_mode);
44367 op5 = gen_reg_rtx (half_mode);
44368 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
44369 n >> 3);
44370 ix86_expand_vector_init_interleave (quarter_mode, op1,
44371 &ops [n >> 2], n >> 3);
44372 ix86_expand_vector_init_interleave (quarter_mode, op2,
44373 &ops [n >> 1], n >> 3);
44374 ix86_expand_vector_init_interleave (quarter_mode, op3,
44375 &ops [(n >> 1) | (n >> 2)], n >> 3);
44376 emit_insn (gen_rtx_SET (VOIDmode, op4,
44377 gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
44378 emit_insn (gen_rtx_SET (VOIDmode, op5,
44379 gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
44380 emit_insn (gen_rtx_SET (VOIDmode, target,
44381 gen_rtx_VEC_CONCAT (mode, op4, op5)));
44382 return;
44384 case V16QImode:
44385 if (!TARGET_SSE4_1)
44386 break;
44387 /* FALLTHRU */
44389 case V8HImode:
44390 if (!TARGET_SSE2)
44391 break;
44393 /* Don't use ix86_expand_vector_init_interleave if we can't
44394 move from GPR to SSE register directly. */
44395 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
44396 break;
44398 n = GET_MODE_NUNITS (mode);
44399 for (i = 0; i < n; i++)
44400 ops[i] = XVECEXP (vals, 0, i);
44401 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
44402 return;
44404 case V4HImode:
44405 case V8QImode:
44406 break;
44408 default:
44409 gcc_unreachable ();
44413 int i, j, n_elts, n_words, n_elt_per_word;
44414 machine_mode inner_mode;
44415 rtx words[4], shift;
44417 inner_mode = GET_MODE_INNER (mode);
44418 n_elts = GET_MODE_NUNITS (mode);
44419 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
44420 n_elt_per_word = n_elts / n_words;
44421 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
44423 for (i = 0; i < n_words; ++i)
44425 rtx word = NULL_RTX;
44427 for (j = 0; j < n_elt_per_word; ++j)
44429 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
44430 elt = convert_modes (word_mode, inner_mode, elt, true);
44432 if (j == 0)
44433 word = elt;
44434 else
44436 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
44437 word, 1, OPTAB_LIB_WIDEN);
44438 word = expand_simple_binop (word_mode, IOR, word, elt,
44439 word, 1, OPTAB_LIB_WIDEN);
44443 words[i] = word;
44446 if (n_words == 1)
44447 emit_move_insn (target, gen_lowpart (mode, words[0]));
44448 else if (n_words == 2)
44450 rtx tmp = gen_reg_rtx (mode);
44451 emit_clobber (tmp);
44452 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
44453 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
44454 emit_move_insn (target, tmp);
44456 else if (n_words == 4)
44458 rtx tmp = gen_reg_rtx (V4SImode);
44459 gcc_assert (word_mode == SImode);
44460 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
44461 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
44462 emit_move_insn (target, gen_lowpart (mode, tmp));
44464 else
44465 gcc_unreachable ();
44469 /* Initialize vector TARGET via VALS. Suppress the use of MMX
44470 instructions unless MMX_OK is true. */
44472 void
44473 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
44475 machine_mode mode = GET_MODE (target);
44476 machine_mode inner_mode = GET_MODE_INNER (mode);
44477 int n_elts = GET_MODE_NUNITS (mode);
44478 int n_var = 0, one_var = -1;
44479 bool all_same = true, all_const_zero = true;
44480 int i;
44481 rtx x;
44483 for (i = 0; i < n_elts; ++i)
44485 x = XVECEXP (vals, 0, i);
44486 if (!(CONST_INT_P (x)
44487 || GET_CODE (x) == CONST_DOUBLE
44488 || GET_CODE (x) == CONST_FIXED))
44489 n_var++, one_var = i;
44490 else if (x != CONST0_RTX (inner_mode))
44491 all_const_zero = false;
44492 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
44493 all_same = false;
44496 /* Constants are best loaded from the constant pool. */
44497 if (n_var == 0)
44499 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
44500 return;
44503 /* If all values are identical, broadcast the value. */
44504 if (all_same
44505 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
44506 XVECEXP (vals, 0, 0)))
44507 return;
44509 /* Values where only one field is non-constant are best loaded from
44510 the pool and overwritten via move later. */
44511 if (n_var == 1)
44513 if (all_const_zero
44514 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
44515 XVECEXP (vals, 0, one_var),
44516 one_var))
44517 return;
44519 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
44520 return;
44523 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
44526 void
44527 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
44529 machine_mode mode = GET_MODE (target);
44530 machine_mode inner_mode = GET_MODE_INNER (mode);
44531 machine_mode half_mode;
44532 bool use_vec_merge = false;
44533 rtx tmp;
44534 static rtx (*gen_extract[6][2]) (rtx, rtx)
44536 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
44537 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
44538 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
44539 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
44540 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
44541 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
44543 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
44545 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
44546 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
44547 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
44548 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
44549 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
44550 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
44552 int i, j, n;
44554 switch (mode)
44556 case V2SFmode:
44557 case V2SImode:
44558 if (mmx_ok)
44560 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44561 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
44562 if (elt == 0)
44563 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44564 else
44565 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44566 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44567 return;
44569 break;
44571 case V2DImode:
44572 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
44573 if (use_vec_merge)
44574 break;
44576 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44577 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
44578 if (elt == 0)
44579 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44580 else
44581 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44582 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44583 return;
44585 case V2DFmode:
44587 rtx op0, op1;
44589 /* For the two element vectors, we implement a VEC_CONCAT with
44590 the extraction of the other element. */
44592 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
44593 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
44595 if (elt == 0)
44596 op0 = val, op1 = tmp;
44597 else
44598 op0 = tmp, op1 = val;
44600 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
44601 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44603 return;
44605 case V4SFmode:
44606 use_vec_merge = TARGET_SSE4_1;
44607 if (use_vec_merge)
44608 break;
44610 switch (elt)
44612 case 0:
44613 use_vec_merge = true;
44614 break;
44616 case 1:
44617 /* tmp = target = A B C D */
44618 tmp = copy_to_reg (target);
44619 /* target = A A B B */
44620 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
44621 /* target = X A B B */
44622 ix86_expand_vector_set (false, target, val, 0);
44623 /* target = A X C D */
44624 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44625 const1_rtx, const0_rtx,
44626 GEN_INT (2+4), GEN_INT (3+4)));
44627 return;
44629 case 2:
44630 /* tmp = target = A B C D */
44631 tmp = copy_to_reg (target);
44632 /* tmp = X B C D */
44633 ix86_expand_vector_set (false, tmp, val, 0);
44634 /* target = A B X D */
44635 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44636 const0_rtx, const1_rtx,
44637 GEN_INT (0+4), GEN_INT (3+4)));
44638 return;
44640 case 3:
44641 /* tmp = target = A B C D */
44642 tmp = copy_to_reg (target);
44643 /* tmp = X B C D */
44644 ix86_expand_vector_set (false, tmp, val, 0);
44645 /* target = A B X D */
44646 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44647 const0_rtx, const1_rtx,
44648 GEN_INT (2+4), GEN_INT (0+4)));
44649 return;
44651 default:
44652 gcc_unreachable ();
44654 break;
44656 case V4SImode:
44657 use_vec_merge = TARGET_SSE4_1;
44658 if (use_vec_merge)
44659 break;
44661 /* Element 0 handled by vec_merge below. */
44662 if (elt == 0)
44664 use_vec_merge = true;
44665 break;
44668 if (TARGET_SSE2)
44670 /* With SSE2, use integer shuffles to swap element 0 and ELT,
44671 store into element 0, then shuffle them back. */
44673 rtx order[4];
44675 order[0] = GEN_INT (elt);
44676 order[1] = const1_rtx;
44677 order[2] = const2_rtx;
44678 order[3] = GEN_INT (3);
44679 order[elt] = const0_rtx;
44681 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44682 order[1], order[2], order[3]));
44684 ix86_expand_vector_set (false, target, val, 0);
44686 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44687 order[1], order[2], order[3]));
44689 else
44691 /* For SSE1, we have to reuse the V4SF code. */
44692 rtx t = gen_reg_rtx (V4SFmode);
44693 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
44694 emit_move_insn (target, gen_lowpart (mode, t));
44696 return;
44698 case V8HImode:
44699 use_vec_merge = TARGET_SSE2;
44700 break;
44701 case V4HImode:
44702 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44703 break;
44705 case V16QImode:
44706 use_vec_merge = TARGET_SSE4_1;
44707 break;
44709 case V8QImode:
44710 break;
44712 case V32QImode:
44713 half_mode = V16QImode;
44714 j = 0;
44715 n = 16;
44716 goto half;
44718 case V16HImode:
44719 half_mode = V8HImode;
44720 j = 1;
44721 n = 8;
44722 goto half;
44724 case V8SImode:
44725 half_mode = V4SImode;
44726 j = 2;
44727 n = 4;
44728 goto half;
44730 case V4DImode:
44731 half_mode = V2DImode;
44732 j = 3;
44733 n = 2;
44734 goto half;
44736 case V8SFmode:
44737 half_mode = V4SFmode;
44738 j = 4;
44739 n = 4;
44740 goto half;
44742 case V4DFmode:
44743 half_mode = V2DFmode;
44744 j = 5;
44745 n = 2;
44746 goto half;
44748 half:
44749 /* Compute offset. */
44750 i = elt / n;
44751 elt %= n;
44753 gcc_assert (i <= 1);
44755 /* Extract the half. */
44756 tmp = gen_reg_rtx (half_mode);
44757 emit_insn (gen_extract[j][i] (tmp, target));
44759 /* Put val in tmp at elt. */
44760 ix86_expand_vector_set (false, tmp, val, elt);
44762 /* Put it back. */
44763 emit_insn (gen_insert[j][i] (target, target, tmp));
44764 return;
44766 case V8DFmode:
44767 if (TARGET_AVX512F)
44769 tmp = gen_reg_rtx (mode);
44770 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44771 gen_rtx_VEC_DUPLICATE (mode, val)));
44772 emit_insn (gen_avx512f_blendmv8df (target, tmp, target,
44773 force_reg (QImode, GEN_INT (1 << elt))));
44774 return;
44776 else
44777 break;
44778 case V8DImode:
44779 if (TARGET_AVX512F)
44781 tmp = gen_reg_rtx (mode);
44782 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44783 gen_rtx_VEC_DUPLICATE (mode, val)));
44784 emit_insn (gen_avx512f_blendmv8di (target, tmp, target,
44785 force_reg (QImode, GEN_INT (1 << elt))));
44786 return;
44788 else
44789 break;
44790 case V16SFmode:
44791 if (TARGET_AVX512F)
44793 tmp = gen_reg_rtx (mode);
44794 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44795 gen_rtx_VEC_DUPLICATE (mode, val)));
44796 emit_insn (gen_avx512f_blendmv16sf (target, tmp, target,
44797 force_reg (HImode, GEN_INT (1 << elt))));
44798 return;
44800 else
44801 break;
44802 case V16SImode:
44803 if (TARGET_AVX512F)
44805 tmp = gen_reg_rtx (mode);
44806 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44807 gen_rtx_VEC_DUPLICATE (mode, val)));
44808 emit_insn (gen_avx512f_blendmv16si (target, tmp, target,
44809 force_reg (HImode, GEN_INT (1 << elt))));
44810 return;
44812 else
44813 break;
44814 case V32HImode:
44815 if (TARGET_AVX512F && TARGET_AVX512BW)
44817 tmp = gen_reg_rtx (mode);
44818 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44819 gen_rtx_VEC_DUPLICATE (mode, val)));
44820 emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target,
44821 force_reg (SImode, GEN_INT (1 << elt))));
44822 return;
44824 else
44825 break;
44826 case V64QImode:
44827 if (TARGET_AVX512F && TARGET_AVX512BW)
44829 tmp = gen_reg_rtx (mode);
44830 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44831 gen_rtx_VEC_DUPLICATE (mode, val)));
44832 emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target,
44833 force_reg (DImode, GEN_INT (1 << elt))));
44834 return;
44836 else
44837 break;
44839 default:
44840 break;
44843 if (use_vec_merge)
44845 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
44846 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
44847 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44849 else
44851 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
44853 emit_move_insn (mem, target);
44855 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
44856 emit_move_insn (tmp, val);
44858 emit_move_insn (target, mem);
44862 void
44863 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
44865 machine_mode mode = GET_MODE (vec);
44866 machine_mode inner_mode = GET_MODE_INNER (mode);
44867 bool use_vec_extr = false;
44868 rtx tmp;
44870 switch (mode)
44872 case V2SImode:
44873 case V2SFmode:
44874 if (!mmx_ok)
44875 break;
44876 /* FALLTHRU */
44878 case V2DFmode:
44879 case V2DImode:
44880 use_vec_extr = true;
44881 break;
44883 case V4SFmode:
44884 use_vec_extr = TARGET_SSE4_1;
44885 if (use_vec_extr)
44886 break;
44888 switch (elt)
44890 case 0:
44891 tmp = vec;
44892 break;
44894 case 1:
44895 case 3:
44896 tmp = gen_reg_rtx (mode);
44897 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
44898 GEN_INT (elt), GEN_INT (elt),
44899 GEN_INT (elt+4), GEN_INT (elt+4)));
44900 break;
44902 case 2:
44903 tmp = gen_reg_rtx (mode);
44904 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
44905 break;
44907 default:
44908 gcc_unreachable ();
44910 vec = tmp;
44911 use_vec_extr = true;
44912 elt = 0;
44913 break;
44915 case V4SImode:
44916 use_vec_extr = TARGET_SSE4_1;
44917 if (use_vec_extr)
44918 break;
44920 if (TARGET_SSE2)
44922 switch (elt)
44924 case 0:
44925 tmp = vec;
44926 break;
44928 case 1:
44929 case 3:
44930 tmp = gen_reg_rtx (mode);
44931 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
44932 GEN_INT (elt), GEN_INT (elt),
44933 GEN_INT (elt), GEN_INT (elt)));
44934 break;
44936 case 2:
44937 tmp = gen_reg_rtx (mode);
44938 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
44939 break;
44941 default:
44942 gcc_unreachable ();
44944 vec = tmp;
44945 use_vec_extr = true;
44946 elt = 0;
44948 else
44950 /* For SSE1, we have to reuse the V4SF code. */
44951 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
44952 gen_lowpart (V4SFmode, vec), elt);
44953 return;
44955 break;
44957 case V8HImode:
44958 use_vec_extr = TARGET_SSE2;
44959 break;
44960 case V4HImode:
44961 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44962 break;
44964 case V16QImode:
44965 use_vec_extr = TARGET_SSE4_1;
44966 break;
44968 case V8SFmode:
44969 if (TARGET_AVX)
44971 tmp = gen_reg_rtx (V4SFmode);
44972 if (elt < 4)
44973 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
44974 else
44975 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
44976 ix86_expand_vector_extract (false, target, tmp, elt & 3);
44977 return;
44979 break;
44981 case V4DFmode:
44982 if (TARGET_AVX)
44984 tmp = gen_reg_rtx (V2DFmode);
44985 if (elt < 2)
44986 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
44987 else
44988 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
44989 ix86_expand_vector_extract (false, target, tmp, elt & 1);
44990 return;
44992 break;
44994 case V32QImode:
44995 if (TARGET_AVX)
44997 tmp = gen_reg_rtx (V16QImode);
44998 if (elt < 16)
44999 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
45000 else
45001 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
45002 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45003 return;
45005 break;
45007 case V16HImode:
45008 if (TARGET_AVX)
45010 tmp = gen_reg_rtx (V8HImode);
45011 if (elt < 8)
45012 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
45013 else
45014 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
45015 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45016 return;
45018 break;
45020 case V8SImode:
45021 if (TARGET_AVX)
45023 tmp = gen_reg_rtx (V4SImode);
45024 if (elt < 4)
45025 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
45026 else
45027 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
45028 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45029 return;
45031 break;
45033 case V4DImode:
45034 if (TARGET_AVX)
45036 tmp = gen_reg_rtx (V2DImode);
45037 if (elt < 2)
45038 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
45039 else
45040 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
45041 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45042 return;
45044 break;
45046 case V32HImode:
45047 if (TARGET_AVX512BW)
45049 tmp = gen_reg_rtx (V16HImode);
45050 if (elt < 16)
45051 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
45052 else
45053 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
45054 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45055 return;
45057 break;
45059 case V64QImode:
45060 if (TARGET_AVX512BW)
45062 tmp = gen_reg_rtx (V32QImode);
45063 if (elt < 32)
45064 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
45065 else
45066 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
45067 ix86_expand_vector_extract (false, target, tmp, elt & 31);
45068 return;
45070 break;
45072 case V16SFmode:
45073 tmp = gen_reg_rtx (V8SFmode);
45074 if (elt < 8)
45075 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
45076 else
45077 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
45078 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45079 return;
45081 case V8DFmode:
45082 tmp = gen_reg_rtx (V4DFmode);
45083 if (elt < 4)
45084 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
45085 else
45086 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
45087 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45088 return;
45090 case V16SImode:
45091 tmp = gen_reg_rtx (V8SImode);
45092 if (elt < 8)
45093 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
45094 else
45095 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
45096 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45097 return;
45099 case V8DImode:
45100 tmp = gen_reg_rtx (V4DImode);
45101 if (elt < 4)
45102 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
45103 else
45104 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
45105 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45106 return;
45108 case V8QImode:
45109 /* ??? Could extract the appropriate HImode element and shift. */
45110 default:
45111 break;
45114 if (use_vec_extr)
45116 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
45117 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
45119 /* Let the rtl optimizers know about the zero extension performed. */
45120 if (inner_mode == QImode || inner_mode == HImode)
45122 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
45123 target = gen_lowpart (SImode, target);
45126 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45128 else
45130 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45132 emit_move_insn (mem, vec);
45134 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45135 emit_move_insn (target, tmp);
45139 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
45140 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
45141 The upper bits of DEST are undefined, though they shouldn't cause
45142 exceptions (some bits from src or all zeros are ok). */
45144 static void
45145 emit_reduc_half (rtx dest, rtx src, int i)
45147 rtx tem, d = dest;
45148 switch (GET_MODE (src))
45150 case V4SFmode:
45151 if (i == 128)
45152 tem = gen_sse_movhlps (dest, src, src);
45153 else
45154 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
45155 GEN_INT (1 + 4), GEN_INT (1 + 4));
45156 break;
45157 case V2DFmode:
45158 tem = gen_vec_interleave_highv2df (dest, src, src);
45159 break;
45160 case V16QImode:
45161 case V8HImode:
45162 case V4SImode:
45163 case V2DImode:
45164 d = gen_reg_rtx (V1TImode);
45165 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
45166 GEN_INT (i / 2));
45167 break;
45168 case V8SFmode:
45169 if (i == 256)
45170 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
45171 else
45172 tem = gen_avx_shufps256 (dest, src, src,
45173 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
45174 break;
45175 case V4DFmode:
45176 if (i == 256)
45177 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
45178 else
45179 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
45180 break;
45181 case V32QImode:
45182 case V16HImode:
45183 case V8SImode:
45184 case V4DImode:
45185 if (i == 256)
45187 if (GET_MODE (dest) != V4DImode)
45188 d = gen_reg_rtx (V4DImode);
45189 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
45190 gen_lowpart (V4DImode, src),
45191 const1_rtx);
45193 else
45195 d = gen_reg_rtx (V2TImode);
45196 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
45197 GEN_INT (i / 2));
45199 break;
45200 case V64QImode:
45201 case V32HImode:
45202 case V16SImode:
45203 case V16SFmode:
45204 case V8DImode:
45205 case V8DFmode:
45206 if (i > 128)
45207 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
45208 gen_lowpart (V16SImode, src),
45209 gen_lowpart (V16SImode, src),
45210 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
45211 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
45212 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
45213 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
45214 GEN_INT (0xC), GEN_INT (0xD),
45215 GEN_INT (0xE), GEN_INT (0xF),
45216 GEN_INT (0x10), GEN_INT (0x11),
45217 GEN_INT (0x12), GEN_INT (0x13),
45218 GEN_INT (0x14), GEN_INT (0x15),
45219 GEN_INT (0x16), GEN_INT (0x17));
45220 else
45221 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
45222 gen_lowpart (V16SImode, src),
45223 GEN_INT (i == 128 ? 0x2 : 0x1),
45224 GEN_INT (0x3),
45225 GEN_INT (0x3),
45226 GEN_INT (0x3),
45227 GEN_INT (i == 128 ? 0x6 : 0x5),
45228 GEN_INT (0x7),
45229 GEN_INT (0x7),
45230 GEN_INT (0x7),
45231 GEN_INT (i == 128 ? 0xA : 0x9),
45232 GEN_INT (0xB),
45233 GEN_INT (0xB),
45234 GEN_INT (0xB),
45235 GEN_INT (i == 128 ? 0xE : 0xD),
45236 GEN_INT (0xF),
45237 GEN_INT (0xF),
45238 GEN_INT (0xF));
45239 break;
45240 default:
45241 gcc_unreachable ();
45243 emit_insn (tem);
45244 if (d != dest)
45245 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
45248 /* Expand a vector reduction. FN is the binary pattern to reduce;
45249 DEST is the destination; IN is the input vector. */
45251 void
45252 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
45254 rtx half, dst, vec = in;
45255 machine_mode mode = GET_MODE (in);
45256 int i;
45258 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
45259 if (TARGET_SSE4_1
45260 && mode == V8HImode
45261 && fn == gen_uminv8hi3)
45263 emit_insn (gen_sse4_1_phminposuw (dest, in));
45264 return;
45267 for (i = GET_MODE_BITSIZE (mode);
45268 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
45269 i >>= 1)
45271 half = gen_reg_rtx (mode);
45272 emit_reduc_half (half, vec, i);
45273 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
45274 dst = dest;
45275 else
45276 dst = gen_reg_rtx (mode);
45277 emit_insn (fn (dst, half, vec));
45278 vec = dst;
45282 /* Target hook for scalar_mode_supported_p. */
45283 static bool
45284 ix86_scalar_mode_supported_p (machine_mode mode)
45286 if (DECIMAL_FLOAT_MODE_P (mode))
45287 return default_decimal_float_supported_p ();
45288 else if (mode == TFmode)
45289 return true;
45290 else
45291 return default_scalar_mode_supported_p (mode);
45294 /* Implements target hook vector_mode_supported_p. */
45295 static bool
45296 ix86_vector_mode_supported_p (machine_mode mode)
45298 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
45299 return true;
45300 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
45301 return true;
45302 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
45303 return true;
45304 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
45305 return true;
45306 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
45307 return true;
45308 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
45309 return true;
45310 return false;
45313 /* Implement target hook libgcc_floating_mode_supported_p. */
45314 static bool
45315 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
45317 switch (mode)
45319 case SFmode:
45320 case DFmode:
45321 case XFmode:
45322 return true;
45324 case TFmode:
45325 #ifdef IX86_NO_LIBGCC_TFMODE
45326 return false;
45327 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
45328 return TARGET_LONG_DOUBLE_128;
45329 #else
45330 return true;
45331 #endif
45333 default:
45334 return false;
45338 /* Target hook for c_mode_for_suffix. */
45339 static machine_mode
45340 ix86_c_mode_for_suffix (char suffix)
45342 if (suffix == 'q')
45343 return TFmode;
45344 if (suffix == 'w')
45345 return XFmode;
45347 return VOIDmode;
45350 /* Worker function for TARGET_MD_ASM_CLOBBERS.
45352 We do this in the new i386 backend to maintain source compatibility
45353 with the old cc0-based compiler. */
45355 static tree
45356 ix86_md_asm_clobbers (tree, tree, tree clobbers)
45358 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
45359 clobbers);
45360 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
45361 clobbers);
45362 return clobbers;
45365 /* Implements target vector targetm.asm.encode_section_info. */
45367 static void ATTRIBUTE_UNUSED
45368 ix86_encode_section_info (tree decl, rtx rtl, int first)
45370 default_encode_section_info (decl, rtl, first);
45372 if (ix86_in_large_data_p (decl))
45373 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
45376 /* Worker function for REVERSE_CONDITION. */
45378 enum rtx_code
45379 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
45381 return (mode != CCFPmode && mode != CCFPUmode
45382 ? reverse_condition (code)
45383 : reverse_condition_maybe_unordered (code));
45386 /* Output code to perform an x87 FP register move, from OPERANDS[1]
45387 to OPERANDS[0]. */
45389 const char *
45390 output_387_reg_move (rtx insn, rtx *operands)
45392 if (REG_P (operands[0]))
45394 if (REG_P (operands[1])
45395 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45397 if (REGNO (operands[0]) == FIRST_STACK_REG)
45398 return output_387_ffreep (operands, 0);
45399 return "fstp\t%y0";
45401 if (STACK_TOP_P (operands[0]))
45402 return "fld%Z1\t%y1";
45403 return "fst\t%y0";
45405 else if (MEM_P (operands[0]))
45407 gcc_assert (REG_P (operands[1]));
45408 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45409 return "fstp%Z0\t%y0";
45410 else
45412 /* There is no non-popping store to memory for XFmode.
45413 So if we need one, follow the store with a load. */
45414 if (GET_MODE (operands[0]) == XFmode)
45415 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
45416 else
45417 return "fst%Z0\t%y0";
45420 else
45421 gcc_unreachable();
45424 /* Output code to perform a conditional jump to LABEL, if C2 flag in
45425 FP status register is set. */
45427 void
45428 ix86_emit_fp_unordered_jump (rtx label)
45430 rtx reg = gen_reg_rtx (HImode);
45431 rtx temp;
45433 emit_insn (gen_x86_fnstsw_1 (reg));
45435 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
45437 emit_insn (gen_x86_sahf_1 (reg));
45439 temp = gen_rtx_REG (CCmode, FLAGS_REG);
45440 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
45442 else
45444 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
45446 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
45447 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
45450 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
45451 gen_rtx_LABEL_REF (VOIDmode, label),
45452 pc_rtx);
45453 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
45455 emit_jump_insn (temp);
45456 predict_jump (REG_BR_PROB_BASE * 10 / 100);
45459 /* Output code to perform a log1p XFmode calculation. */
45461 void ix86_emit_i387_log1p (rtx op0, rtx op1)
45463 rtx_code_label *label1 = gen_label_rtx ();
45464 rtx_code_label *label2 = gen_label_rtx ();
45466 rtx tmp = gen_reg_rtx (XFmode);
45467 rtx tmp2 = gen_reg_rtx (XFmode);
45468 rtx test;
45470 emit_insn (gen_absxf2 (tmp, op1));
45471 test = gen_rtx_GE (VOIDmode, tmp,
45472 CONST_DOUBLE_FROM_REAL_VALUE (
45473 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
45474 XFmode));
45475 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
45477 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45478 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
45479 emit_jump (label2);
45481 emit_label (label1);
45482 emit_move_insn (tmp, CONST1_RTX (XFmode));
45483 emit_insn (gen_addxf3 (tmp, op1, tmp));
45484 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45485 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
45487 emit_label (label2);
45490 /* Emit code for round calculation. */
45491 void ix86_emit_i387_round (rtx op0, rtx op1)
45493 machine_mode inmode = GET_MODE (op1);
45494 machine_mode outmode = GET_MODE (op0);
45495 rtx e1, e2, res, tmp, tmp1, half;
45496 rtx scratch = gen_reg_rtx (HImode);
45497 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
45498 rtx_code_label *jump_label = gen_label_rtx ();
45499 rtx insn;
45500 rtx (*gen_abs) (rtx, rtx);
45501 rtx (*gen_neg) (rtx, rtx);
45503 switch (inmode)
45505 case SFmode:
45506 gen_abs = gen_abssf2;
45507 break;
45508 case DFmode:
45509 gen_abs = gen_absdf2;
45510 break;
45511 case XFmode:
45512 gen_abs = gen_absxf2;
45513 break;
45514 default:
45515 gcc_unreachable ();
45518 switch (outmode)
45520 case SFmode:
45521 gen_neg = gen_negsf2;
45522 break;
45523 case DFmode:
45524 gen_neg = gen_negdf2;
45525 break;
45526 case XFmode:
45527 gen_neg = gen_negxf2;
45528 break;
45529 case HImode:
45530 gen_neg = gen_neghi2;
45531 break;
45532 case SImode:
45533 gen_neg = gen_negsi2;
45534 break;
45535 case DImode:
45536 gen_neg = gen_negdi2;
45537 break;
45538 default:
45539 gcc_unreachable ();
45542 e1 = gen_reg_rtx (inmode);
45543 e2 = gen_reg_rtx (inmode);
45544 res = gen_reg_rtx (outmode);
45546 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
45548 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
45550 /* scratch = fxam(op1) */
45551 emit_insn (gen_rtx_SET (VOIDmode, scratch,
45552 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
45553 UNSPEC_FXAM)));
45554 /* e1 = fabs(op1) */
45555 emit_insn (gen_abs (e1, op1));
45557 /* e2 = e1 + 0.5 */
45558 half = force_reg (inmode, half);
45559 emit_insn (gen_rtx_SET (VOIDmode, e2,
45560 gen_rtx_PLUS (inmode, e1, half)));
45562 /* res = floor(e2) */
45563 if (inmode != XFmode)
45565 tmp1 = gen_reg_rtx (XFmode);
45567 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
45568 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
45570 else
45571 tmp1 = e2;
45573 switch (outmode)
45575 case SFmode:
45576 case DFmode:
45578 rtx tmp0 = gen_reg_rtx (XFmode);
45580 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
45582 emit_insn (gen_rtx_SET (VOIDmode, res,
45583 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
45584 UNSPEC_TRUNC_NOOP)));
45586 break;
45587 case XFmode:
45588 emit_insn (gen_frndintxf2_floor (res, tmp1));
45589 break;
45590 case HImode:
45591 emit_insn (gen_lfloorxfhi2 (res, tmp1));
45592 break;
45593 case SImode:
45594 emit_insn (gen_lfloorxfsi2 (res, tmp1));
45595 break;
45596 case DImode:
45597 emit_insn (gen_lfloorxfdi2 (res, tmp1));
45598 break;
45599 default:
45600 gcc_unreachable ();
45603 /* flags = signbit(a) */
45604 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
45606 /* if (flags) then res = -res */
45607 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
45608 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
45609 gen_rtx_LABEL_REF (VOIDmode, jump_label),
45610 pc_rtx);
45611 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
45612 predict_jump (REG_BR_PROB_BASE * 50 / 100);
45613 JUMP_LABEL (insn) = jump_label;
45615 emit_insn (gen_neg (res, res));
45617 emit_label (jump_label);
45618 LABEL_NUSES (jump_label) = 1;
45620 emit_move_insn (op0, res);
45623 /* Output code to perform a Newton-Rhapson approximation of a single precision
45624 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
45626 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
45628 rtx x0, x1, e0, e1;
45630 x0 = gen_reg_rtx (mode);
45631 e0 = gen_reg_rtx (mode);
45632 e1 = gen_reg_rtx (mode);
45633 x1 = gen_reg_rtx (mode);
45635 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
45637 b = force_reg (mode, b);
45639 /* x0 = rcp(b) estimate */
45640 if (mode == V16SFmode || mode == V8DFmode)
45641 emit_insn (gen_rtx_SET (VOIDmode, x0,
45642 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45643 UNSPEC_RCP14)));
45644 else
45645 emit_insn (gen_rtx_SET (VOIDmode, x0,
45646 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45647 UNSPEC_RCP)));
45649 /* e0 = x0 * b */
45650 emit_insn (gen_rtx_SET (VOIDmode, e0,
45651 gen_rtx_MULT (mode, x0, b)));
45653 /* e0 = x0 * e0 */
45654 emit_insn (gen_rtx_SET (VOIDmode, e0,
45655 gen_rtx_MULT (mode, x0, e0)));
45657 /* e1 = x0 + x0 */
45658 emit_insn (gen_rtx_SET (VOIDmode, e1,
45659 gen_rtx_PLUS (mode, x0, x0)));
45661 /* x1 = e1 - e0 */
45662 emit_insn (gen_rtx_SET (VOIDmode, x1,
45663 gen_rtx_MINUS (mode, e1, e0)));
45665 /* res = a * x1 */
45666 emit_insn (gen_rtx_SET (VOIDmode, res,
45667 gen_rtx_MULT (mode, a, x1)));
45670 /* Output code to perform a Newton-Rhapson approximation of a
45671 single precision floating point [reciprocal] square root. */
45673 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode,
45674 bool recip)
45676 rtx x0, e0, e1, e2, e3, mthree, mhalf;
45677 REAL_VALUE_TYPE r;
45678 int unspec;
45680 x0 = gen_reg_rtx (mode);
45681 e0 = gen_reg_rtx (mode);
45682 e1 = gen_reg_rtx (mode);
45683 e2 = gen_reg_rtx (mode);
45684 e3 = gen_reg_rtx (mode);
45686 real_from_integer (&r, VOIDmode, -3, SIGNED);
45687 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45689 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
45690 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45691 unspec = UNSPEC_RSQRT;
45693 if (VECTOR_MODE_P (mode))
45695 mthree = ix86_build_const_vector (mode, true, mthree);
45696 mhalf = ix86_build_const_vector (mode, true, mhalf);
45697 /* There is no 512-bit rsqrt. There is however rsqrt14. */
45698 if (GET_MODE_SIZE (mode) == 64)
45699 unspec = UNSPEC_RSQRT14;
45702 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
45703 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
45705 a = force_reg (mode, a);
45707 /* x0 = rsqrt(a) estimate */
45708 emit_insn (gen_rtx_SET (VOIDmode, x0,
45709 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
45710 unspec)));
45712 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
45713 if (!recip)
45715 rtx zero, mask;
45717 zero = gen_reg_rtx (mode);
45718 mask = gen_reg_rtx (mode);
45720 zero = force_reg (mode, CONST0_RTX(mode));
45722 /* Handle masked compare. */
45723 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
45725 mask = gen_reg_rtx (HImode);
45726 /* Imm value 0x4 corresponds to not-equal comparison. */
45727 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
45728 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
45730 else
45732 emit_insn (gen_rtx_SET (VOIDmode, mask,
45733 gen_rtx_NE (mode, zero, a)));
45735 emit_insn (gen_rtx_SET (VOIDmode, x0,
45736 gen_rtx_AND (mode, x0, mask)));
45740 /* e0 = x0 * a */
45741 emit_insn (gen_rtx_SET (VOIDmode, e0,
45742 gen_rtx_MULT (mode, x0, a)));
45743 /* e1 = e0 * x0 */
45744 emit_insn (gen_rtx_SET (VOIDmode, e1,
45745 gen_rtx_MULT (mode, e0, x0)));
45747 /* e2 = e1 - 3. */
45748 mthree = force_reg (mode, mthree);
45749 emit_insn (gen_rtx_SET (VOIDmode, e2,
45750 gen_rtx_PLUS (mode, e1, mthree)));
45752 mhalf = force_reg (mode, mhalf);
45753 if (recip)
45754 /* e3 = -.5 * x0 */
45755 emit_insn (gen_rtx_SET (VOIDmode, e3,
45756 gen_rtx_MULT (mode, x0, mhalf)));
45757 else
45758 /* e3 = -.5 * e0 */
45759 emit_insn (gen_rtx_SET (VOIDmode, e3,
45760 gen_rtx_MULT (mode, e0, mhalf)));
45761 /* ret = e2 * e3 */
45762 emit_insn (gen_rtx_SET (VOIDmode, res,
45763 gen_rtx_MULT (mode, e2, e3)));
45766 #ifdef TARGET_SOLARIS
45767 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
45769 static void
45770 i386_solaris_elf_named_section (const char *name, unsigned int flags,
45771 tree decl)
45773 /* With Binutils 2.15, the "@unwind" marker must be specified on
45774 every occurrence of the ".eh_frame" section, not just the first
45775 one. */
45776 if (TARGET_64BIT
45777 && strcmp (name, ".eh_frame") == 0)
45779 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
45780 flags & SECTION_WRITE ? "aw" : "a");
45781 return;
45784 #ifndef USE_GAS
45785 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
45787 solaris_elf_asm_comdat_section (name, flags, decl);
45788 return;
45790 #endif
45792 default_elf_asm_named_section (name, flags, decl);
45794 #endif /* TARGET_SOLARIS */
45796 /* Return the mangling of TYPE if it is an extended fundamental type. */
45798 static const char *
45799 ix86_mangle_type (const_tree type)
45801 type = TYPE_MAIN_VARIANT (type);
45803 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
45804 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
45805 return NULL;
45807 switch (TYPE_MODE (type))
45809 case TFmode:
45810 /* __float128 is "g". */
45811 return "g";
45812 case XFmode:
45813 /* "long double" or __float80 is "e". */
45814 return "e";
45815 default:
45816 return NULL;
45820 /* For 32-bit code we can save PIC register setup by using
45821 __stack_chk_fail_local hidden function instead of calling
45822 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
45823 register, so it is better to call __stack_chk_fail directly. */
45825 static tree ATTRIBUTE_UNUSED
45826 ix86_stack_protect_fail (void)
45828 return TARGET_64BIT
45829 ? default_external_stack_protect_fail ()
45830 : default_hidden_stack_protect_fail ();
45833 /* Select a format to encode pointers in exception handling data. CODE
45834 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
45835 true if the symbol may be affected by dynamic relocations.
45837 ??? All x86 object file formats are capable of representing this.
45838 After all, the relocation needed is the same as for the call insn.
45839 Whether or not a particular assembler allows us to enter such, I
45840 guess we'll have to see. */
45842 asm_preferred_eh_data_format (int code, int global)
45844 if (flag_pic)
45846 int type = DW_EH_PE_sdata8;
45847 if (!TARGET_64BIT
45848 || ix86_cmodel == CM_SMALL_PIC
45849 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
45850 type = DW_EH_PE_sdata4;
45851 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
45853 if (ix86_cmodel == CM_SMALL
45854 || (ix86_cmodel == CM_MEDIUM && code))
45855 return DW_EH_PE_udata4;
45856 return DW_EH_PE_absptr;
45859 /* Expand copysign from SIGN to the positive value ABS_VALUE
45860 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
45861 the sign-bit. */
45862 static void
45863 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
45865 machine_mode mode = GET_MODE (sign);
45866 rtx sgn = gen_reg_rtx (mode);
45867 if (mask == NULL_RTX)
45869 machine_mode vmode;
45871 if (mode == SFmode)
45872 vmode = V4SFmode;
45873 else if (mode == DFmode)
45874 vmode = V2DFmode;
45875 else
45876 vmode = mode;
45878 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
45879 if (!VECTOR_MODE_P (mode))
45881 /* We need to generate a scalar mode mask in this case. */
45882 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45883 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45884 mask = gen_reg_rtx (mode);
45885 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
45888 else
45889 mask = gen_rtx_NOT (mode, mask);
45890 emit_insn (gen_rtx_SET (VOIDmode, sgn,
45891 gen_rtx_AND (mode, mask, sign)));
45892 emit_insn (gen_rtx_SET (VOIDmode, result,
45893 gen_rtx_IOR (mode, abs_value, sgn)));
45896 /* Expand fabs (OP0) and return a new rtx that holds the result. The
45897 mask for masking out the sign-bit is stored in *SMASK, if that is
45898 non-null. */
45899 static rtx
45900 ix86_expand_sse_fabs (rtx op0, rtx *smask)
45902 machine_mode vmode, mode = GET_MODE (op0);
45903 rtx xa, mask;
45905 xa = gen_reg_rtx (mode);
45906 if (mode == SFmode)
45907 vmode = V4SFmode;
45908 else if (mode == DFmode)
45909 vmode = V2DFmode;
45910 else
45911 vmode = mode;
45912 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
45913 if (!VECTOR_MODE_P (mode))
45915 /* We need to generate a scalar mode mask in this case. */
45916 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45917 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45918 mask = gen_reg_rtx (mode);
45919 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
45921 emit_insn (gen_rtx_SET (VOIDmode, xa,
45922 gen_rtx_AND (mode, op0, mask)));
45924 if (smask)
45925 *smask = mask;
45927 return xa;
45930 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
45931 swapping the operands if SWAP_OPERANDS is true. The expanded
45932 code is a forward jump to a newly created label in case the
45933 comparison is true. The generated label rtx is returned. */
45934 static rtx_code_label *
45935 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
45936 bool swap_operands)
45938 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
45939 rtx_code_label *label;
45940 rtx tmp;
45942 if (swap_operands)
45943 std::swap (op0, op1);
45945 label = gen_label_rtx ();
45946 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
45947 emit_insn (gen_rtx_SET (VOIDmode, tmp,
45948 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
45949 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
45950 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
45951 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
45952 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
45953 JUMP_LABEL (tmp) = label;
45955 return label;
45958 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
45959 using comparison code CODE. Operands are swapped for the comparison if
45960 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
45961 static rtx
45962 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
45963 bool swap_operands)
45965 rtx (*insn)(rtx, rtx, rtx, rtx);
45966 machine_mode mode = GET_MODE (op0);
45967 rtx mask = gen_reg_rtx (mode);
45969 if (swap_operands)
45970 std::swap (op0, op1);
45972 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
45974 emit_insn (insn (mask, op0, op1,
45975 gen_rtx_fmt_ee (code, mode, op0, op1)));
45976 return mask;
45979 /* Generate and return a rtx of mode MODE for 2**n where n is the number
45980 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
45981 static rtx
45982 ix86_gen_TWO52 (machine_mode mode)
45984 REAL_VALUE_TYPE TWO52r;
45985 rtx TWO52;
45987 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
45988 TWO52 = const_double_from_real_value (TWO52r, mode);
45989 TWO52 = force_reg (mode, TWO52);
45991 return TWO52;
45994 /* Expand SSE sequence for computing lround from OP1 storing
45995 into OP0. */
45996 void
45997 ix86_expand_lround (rtx op0, rtx op1)
45999 /* C code for the stuff we're doing below:
46000 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
46001 return (long)tmp;
46003 machine_mode mode = GET_MODE (op1);
46004 const struct real_format *fmt;
46005 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46006 rtx adj;
46008 /* load nextafter (0.5, 0.0) */
46009 fmt = REAL_MODE_FORMAT (mode);
46010 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46011 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46013 /* adj = copysign (0.5, op1) */
46014 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
46015 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
46017 /* adj = op1 + adj */
46018 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
46020 /* op0 = (imode)adj */
46021 expand_fix (op0, adj, 0);
46024 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
46025 into OPERAND0. */
46026 void
46027 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
46029 /* C code for the stuff we're doing below (for do_floor):
46030 xi = (long)op1;
46031 xi -= (double)xi > op1 ? 1 : 0;
46032 return xi;
46034 machine_mode fmode = GET_MODE (op1);
46035 machine_mode imode = GET_MODE (op0);
46036 rtx ireg, freg, tmp;
46037 rtx_code_label *label;
46039 /* reg = (long)op1 */
46040 ireg = gen_reg_rtx (imode);
46041 expand_fix (ireg, op1, 0);
46043 /* freg = (double)reg */
46044 freg = gen_reg_rtx (fmode);
46045 expand_float (freg, ireg, 0);
46047 /* ireg = (freg > op1) ? ireg - 1 : ireg */
46048 label = ix86_expand_sse_compare_and_jump (UNLE,
46049 freg, op1, !do_floor);
46050 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
46051 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
46052 emit_move_insn (ireg, tmp);
46054 emit_label (label);
46055 LABEL_NUSES (label) = 1;
46057 emit_move_insn (op0, ireg);
46060 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
46061 result in OPERAND0. */
46062 void
46063 ix86_expand_rint (rtx operand0, rtx operand1)
46065 /* C code for the stuff we're doing below:
46066 xa = fabs (operand1);
46067 if (!isless (xa, 2**52))
46068 return operand1;
46069 xa = xa + 2**52 - 2**52;
46070 return copysign (xa, operand1);
46072 machine_mode mode = GET_MODE (operand0);
46073 rtx res, xa, TWO52, mask;
46074 rtx_code_label *label;
46076 res = gen_reg_rtx (mode);
46077 emit_move_insn (res, operand1);
46079 /* xa = abs (operand1) */
46080 xa = ix86_expand_sse_fabs (res, &mask);
46082 /* if (!isless (xa, TWO52)) goto label; */
46083 TWO52 = ix86_gen_TWO52 (mode);
46084 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46086 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46087 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46089 ix86_sse_copysign_to_positive (res, xa, res, mask);
46091 emit_label (label);
46092 LABEL_NUSES (label) = 1;
46094 emit_move_insn (operand0, res);
46097 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46098 into OPERAND0. */
46099 void
46100 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
46102 /* C code for the stuff we expand below.
46103 double xa = fabs (x), x2;
46104 if (!isless (xa, TWO52))
46105 return x;
46106 xa = xa + TWO52 - TWO52;
46107 x2 = copysign (xa, x);
46108 Compensate. Floor:
46109 if (x2 > x)
46110 x2 -= 1;
46111 Compensate. Ceil:
46112 if (x2 < x)
46113 x2 -= -1;
46114 return x2;
46116 machine_mode mode = GET_MODE (operand0);
46117 rtx xa, TWO52, tmp, one, res, mask;
46118 rtx_code_label *label;
46120 TWO52 = ix86_gen_TWO52 (mode);
46122 /* Temporary for holding the result, initialized to the input
46123 operand to ease control flow. */
46124 res = gen_reg_rtx (mode);
46125 emit_move_insn (res, operand1);
46127 /* xa = abs (operand1) */
46128 xa = ix86_expand_sse_fabs (res, &mask);
46130 /* if (!isless (xa, TWO52)) goto label; */
46131 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46133 /* xa = xa + TWO52 - TWO52; */
46134 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46135 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46137 /* xa = copysign (xa, operand1) */
46138 ix86_sse_copysign_to_positive (xa, xa, res, mask);
46140 /* generate 1.0 or -1.0 */
46141 one = force_reg (mode,
46142 const_double_from_real_value (do_floor
46143 ? dconst1 : dconstm1, mode));
46145 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46146 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46147 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46148 gen_rtx_AND (mode, one, tmp)));
46149 /* We always need to subtract here to preserve signed zero. */
46150 tmp = expand_simple_binop (mode, MINUS,
46151 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46152 emit_move_insn (res, tmp);
46154 emit_label (label);
46155 LABEL_NUSES (label) = 1;
46157 emit_move_insn (operand0, res);
46160 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46161 into OPERAND0. */
46162 void
46163 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
46165 /* C code for the stuff we expand below.
46166 double xa = fabs (x), x2;
46167 if (!isless (xa, TWO52))
46168 return x;
46169 x2 = (double)(long)x;
46170 Compensate. Floor:
46171 if (x2 > x)
46172 x2 -= 1;
46173 Compensate. Ceil:
46174 if (x2 < x)
46175 x2 += 1;
46176 if (HONOR_SIGNED_ZEROS (mode))
46177 return copysign (x2, x);
46178 return x2;
46180 machine_mode mode = GET_MODE (operand0);
46181 rtx xa, xi, TWO52, tmp, one, res, mask;
46182 rtx_code_label *label;
46184 TWO52 = ix86_gen_TWO52 (mode);
46186 /* Temporary for holding the result, initialized to the input
46187 operand to ease control flow. */
46188 res = gen_reg_rtx (mode);
46189 emit_move_insn (res, operand1);
46191 /* xa = abs (operand1) */
46192 xa = ix86_expand_sse_fabs (res, &mask);
46194 /* if (!isless (xa, TWO52)) goto label; */
46195 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46197 /* xa = (double)(long)x */
46198 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46199 expand_fix (xi, res, 0);
46200 expand_float (xa, xi, 0);
46202 /* generate 1.0 */
46203 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46205 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46206 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46207 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46208 gen_rtx_AND (mode, one, tmp)));
46209 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
46210 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46211 emit_move_insn (res, tmp);
46213 if (HONOR_SIGNED_ZEROS (mode))
46214 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46216 emit_label (label);
46217 LABEL_NUSES (label) = 1;
46219 emit_move_insn (operand0, res);
46222 /* Expand SSE sequence for computing round from OPERAND1 storing
46223 into OPERAND0. Sequence that works without relying on DImode truncation
46224 via cvttsd2siq that is only available on 64bit targets. */
46225 void
46226 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
46228 /* C code for the stuff we expand below.
46229 double xa = fabs (x), xa2, x2;
46230 if (!isless (xa, TWO52))
46231 return x;
46232 Using the absolute value and copying back sign makes
46233 -0.0 -> -0.0 correct.
46234 xa2 = xa + TWO52 - TWO52;
46235 Compensate.
46236 dxa = xa2 - xa;
46237 if (dxa <= -0.5)
46238 xa2 += 1;
46239 else if (dxa > 0.5)
46240 xa2 -= 1;
46241 x2 = copysign (xa2, x);
46242 return x2;
46244 machine_mode mode = GET_MODE (operand0);
46245 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
46246 rtx_code_label *label;
46248 TWO52 = ix86_gen_TWO52 (mode);
46250 /* Temporary for holding the result, initialized to the input
46251 operand to ease control flow. */
46252 res = gen_reg_rtx (mode);
46253 emit_move_insn (res, operand1);
46255 /* xa = abs (operand1) */
46256 xa = ix86_expand_sse_fabs (res, &mask);
46258 /* if (!isless (xa, TWO52)) goto label; */
46259 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46261 /* xa2 = xa + TWO52 - TWO52; */
46262 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46263 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
46265 /* dxa = xa2 - xa; */
46266 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
46268 /* generate 0.5, 1.0 and -0.5 */
46269 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
46270 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
46271 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
46272 0, OPTAB_DIRECT);
46274 /* Compensate. */
46275 tmp = gen_reg_rtx (mode);
46276 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
46277 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
46278 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46279 gen_rtx_AND (mode, one, tmp)));
46280 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46281 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
46282 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
46283 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46284 gen_rtx_AND (mode, one, tmp)));
46285 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46287 /* res = copysign (xa2, operand1) */
46288 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
46290 emit_label (label);
46291 LABEL_NUSES (label) = 1;
46293 emit_move_insn (operand0, res);
46296 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46297 into OPERAND0. */
46298 void
46299 ix86_expand_trunc (rtx operand0, rtx operand1)
46301 /* C code for SSE variant we expand below.
46302 double xa = fabs (x), x2;
46303 if (!isless (xa, TWO52))
46304 return x;
46305 x2 = (double)(long)x;
46306 if (HONOR_SIGNED_ZEROS (mode))
46307 return copysign (x2, x);
46308 return x2;
46310 machine_mode mode = GET_MODE (operand0);
46311 rtx xa, xi, TWO52, res, mask;
46312 rtx_code_label *label;
46314 TWO52 = ix86_gen_TWO52 (mode);
46316 /* Temporary for holding the result, initialized to the input
46317 operand to ease control flow. */
46318 res = gen_reg_rtx (mode);
46319 emit_move_insn (res, operand1);
46321 /* xa = abs (operand1) */
46322 xa = ix86_expand_sse_fabs (res, &mask);
46324 /* if (!isless (xa, TWO52)) goto label; */
46325 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46327 /* x = (double)(long)x */
46328 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46329 expand_fix (xi, res, 0);
46330 expand_float (res, xi, 0);
46332 if (HONOR_SIGNED_ZEROS (mode))
46333 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46335 emit_label (label);
46336 LABEL_NUSES (label) = 1;
46338 emit_move_insn (operand0, res);
46341 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46342 into OPERAND0. */
46343 void
46344 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
46346 machine_mode mode = GET_MODE (operand0);
46347 rtx xa, mask, TWO52, one, res, smask, tmp;
46348 rtx_code_label *label;
46350 /* C code for SSE variant we expand below.
46351 double xa = fabs (x), x2;
46352 if (!isless (xa, TWO52))
46353 return x;
46354 xa2 = xa + TWO52 - TWO52;
46355 Compensate:
46356 if (xa2 > xa)
46357 xa2 -= 1.0;
46358 x2 = copysign (xa2, x);
46359 return x2;
46362 TWO52 = ix86_gen_TWO52 (mode);
46364 /* Temporary for holding the result, initialized to the input
46365 operand to ease control flow. */
46366 res = gen_reg_rtx (mode);
46367 emit_move_insn (res, operand1);
46369 /* xa = abs (operand1) */
46370 xa = ix86_expand_sse_fabs (res, &smask);
46372 /* if (!isless (xa, TWO52)) goto label; */
46373 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46375 /* res = xa + TWO52 - TWO52; */
46376 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46377 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
46378 emit_move_insn (res, tmp);
46380 /* generate 1.0 */
46381 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46383 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
46384 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
46385 emit_insn (gen_rtx_SET (VOIDmode, mask,
46386 gen_rtx_AND (mode, mask, one)));
46387 tmp = expand_simple_binop (mode, MINUS,
46388 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
46389 emit_move_insn (res, tmp);
46391 /* res = copysign (res, operand1) */
46392 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
46394 emit_label (label);
46395 LABEL_NUSES (label) = 1;
46397 emit_move_insn (operand0, res);
46400 /* Expand SSE sequence for computing round from OPERAND1 storing
46401 into OPERAND0. */
46402 void
46403 ix86_expand_round (rtx operand0, rtx operand1)
46405 /* C code for the stuff we're doing below:
46406 double xa = fabs (x);
46407 if (!isless (xa, TWO52))
46408 return x;
46409 xa = (double)(long)(xa + nextafter (0.5, 0.0));
46410 return copysign (xa, x);
46412 machine_mode mode = GET_MODE (operand0);
46413 rtx res, TWO52, xa, xi, half, mask;
46414 rtx_code_label *label;
46415 const struct real_format *fmt;
46416 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46418 /* Temporary for holding the result, initialized to the input
46419 operand to ease control flow. */
46420 res = gen_reg_rtx (mode);
46421 emit_move_insn (res, operand1);
46423 TWO52 = ix86_gen_TWO52 (mode);
46424 xa = ix86_expand_sse_fabs (res, &mask);
46425 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46427 /* load nextafter (0.5, 0.0) */
46428 fmt = REAL_MODE_FORMAT (mode);
46429 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46430 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46432 /* xa = xa + 0.5 */
46433 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
46434 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
46436 /* xa = (double)(int64_t)xa */
46437 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46438 expand_fix (xi, xa, 0);
46439 expand_float (xa, xi, 0);
46441 /* res = copysign (xa, operand1) */
46442 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
46444 emit_label (label);
46445 LABEL_NUSES (label) = 1;
46447 emit_move_insn (operand0, res);
46450 /* Expand SSE sequence for computing round
46451 from OP1 storing into OP0 using sse4 round insn. */
46452 void
46453 ix86_expand_round_sse4 (rtx op0, rtx op1)
46455 machine_mode mode = GET_MODE (op0);
46456 rtx e1, e2, res, half;
46457 const struct real_format *fmt;
46458 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46459 rtx (*gen_copysign) (rtx, rtx, rtx);
46460 rtx (*gen_round) (rtx, rtx, rtx);
46462 switch (mode)
46464 case SFmode:
46465 gen_copysign = gen_copysignsf3;
46466 gen_round = gen_sse4_1_roundsf2;
46467 break;
46468 case DFmode:
46469 gen_copysign = gen_copysigndf3;
46470 gen_round = gen_sse4_1_rounddf2;
46471 break;
46472 default:
46473 gcc_unreachable ();
46476 /* round (a) = trunc (a + copysign (0.5, a)) */
46478 /* load nextafter (0.5, 0.0) */
46479 fmt = REAL_MODE_FORMAT (mode);
46480 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46481 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46482 half = const_double_from_real_value (pred_half, mode);
46484 /* e1 = copysign (0.5, op1) */
46485 e1 = gen_reg_rtx (mode);
46486 emit_insn (gen_copysign (e1, half, op1));
46488 /* e2 = op1 + e1 */
46489 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
46491 /* res = trunc (e2) */
46492 res = gen_reg_rtx (mode);
46493 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
46495 emit_move_insn (op0, res);
46499 /* Table of valid machine attributes. */
46500 static const struct attribute_spec ix86_attribute_table[] =
46502 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, decl_handler,
46503 type_handler, affects_type_identity } */
46504 /* Stdcall attribute says callee is responsible for popping arguments
46505 if they are not variable. */
46506 { "stdcall", 0, 0, false, true, true, NULL, ix86_handle_cconv_attribute,
46507 true },
46508 /* Fastcall attribute says callee is responsible for popping arguments
46509 if they are not variable. */
46510 { "fastcall", 0, 0, false, true, true, NULL, ix86_handle_cconv_attribute,
46511 true },
46512 /* Thiscall attribute says callee is responsible for popping arguments
46513 if they are not variable. */
46514 { "thiscall", 0, 0, false, true, true, NULL, ix86_handle_cconv_attribute,
46515 true },
46516 /* Cdecl attribute says the callee is a normal C declaration */
46517 { "cdecl", 0, 0, false, true, true, NULL, ix86_handle_cconv_attribute,
46518 true },
46519 /* Regparm attribute specifies how many integer arguments are to be
46520 passed in registers. */
46521 { "regparm", 1, 1, false, true, true, NULL, ix86_handle_cconv_attribute,
46522 true },
46523 /* Sseregparm attribute says we are using x86_64 calling conventions
46524 for FP arguments. */
46525 { "sseregparm", 0, 0, false, true, true, NULL, ix86_handle_cconv_attribute,
46526 true },
46527 /* The transactional memory builtins are implicitly regparm or fastcall
46528 depending on the ABI. Override the generic do-nothing attribute that
46529 these builtins were declared with. */
46530 { "*tm regparm", 0, 0, false, true, true, NULL,
46531 ix86_handle_tm_regparm_attribute, true },
46532 /* force_align_arg_pointer says this function realigns the stack at entry. */
46533 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
46534 false, true, true, NULL, ix86_handle_cconv_attribute, false },
46535 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46536 { "dllimport", 0, 0, false, false, false, handle_dll_decl_attribute,
46537 handle_dll_type_attribute, false },
46538 { "dllexport", 0, 0, false, false, false, handle_dll_decl_attribute,
46539 handle_dll_type_attribute, false },
46540 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute, NULL,
46541 false },
46542 #endif
46543 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_decl_attribute,
46544 ix86_handle_struct_type_attribute, false },
46545 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_decl_attribute,
46546 ix86_handle_struct_type_attribute, false },
46547 #ifdef SUBTARGET_ATTRIBUTE_TABLE
46548 SUBTARGET_ATTRIBUTE_TABLE,
46549 #endif
46550 /* ms_abi and sysv_abi calling convention function attributes. */
46551 { "ms_abi", 0, 0, false, true, true, NULL, ix86_handle_abi_attribute, true },
46552 { "sysv_abi", 0, 0, false, true, true, NULL, ix86_handle_abi_attribute,
46553 true },
46554 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
46555 NULL, false },
46556 { "callee_pop_aggregate_return", 1, 1, false, true, true, NULL,
46557 ix86_handle_callee_pop_aggregate_return, true },
46558 /* End element. */
46559 { NULL, 0, 0, false, false, false, NULL, NULL, false }
46562 /* Implement targetm.vectorize.builtin_vectorization_cost. */
46563 static int
46564 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
46565 tree vectype, int)
46567 unsigned elements;
46569 switch (type_of_cost)
46571 case scalar_stmt:
46572 return ix86_cost->scalar_stmt_cost;
46574 case scalar_load:
46575 return ix86_cost->scalar_load_cost;
46577 case scalar_store:
46578 return ix86_cost->scalar_store_cost;
46580 case vector_stmt:
46581 return ix86_cost->vec_stmt_cost;
46583 case vector_load:
46584 return ix86_cost->vec_align_load_cost;
46586 case vector_store:
46587 return ix86_cost->vec_store_cost;
46589 case vec_to_scalar:
46590 return ix86_cost->vec_to_scalar_cost;
46592 case scalar_to_vec:
46593 return ix86_cost->scalar_to_vec_cost;
46595 case unaligned_load:
46596 case unaligned_store:
46597 return ix86_cost->vec_unalign_load_cost;
46599 case cond_branch_taken:
46600 return ix86_cost->cond_taken_branch_cost;
46602 case cond_branch_not_taken:
46603 return ix86_cost->cond_not_taken_branch_cost;
46605 case vec_perm:
46606 case vec_promote_demote:
46607 return ix86_cost->vec_stmt_cost;
46609 case vec_construct:
46610 elements = TYPE_VECTOR_SUBPARTS (vectype);
46611 return elements / 2 + 1;
46613 default:
46614 gcc_unreachable ();
46618 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
46619 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
46620 insn every time. */
46622 static GTY(()) rtx_insn *vselect_insn;
46624 /* Initialize vselect_insn. */
46626 static void
46627 init_vselect_insn (void)
46629 unsigned i;
46630 rtx x;
46632 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
46633 for (i = 0; i < MAX_VECT_LEN; ++i)
46634 XVECEXP (x, 0, i) = const0_rtx;
46635 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
46636 const0_rtx), x);
46637 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
46638 start_sequence ();
46639 vselect_insn = emit_insn (x);
46640 end_sequence ();
46643 /* Construct (set target (vec_select op0 (parallel perm))) and
46644 return true if that's a valid instruction in the active ISA. */
46646 static bool
46647 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
46648 unsigned nelt, bool testing_p)
46650 unsigned int i;
46651 rtx x, save_vconcat;
46652 int icode;
46654 if (vselect_insn == NULL_RTX)
46655 init_vselect_insn ();
46657 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
46658 PUT_NUM_ELEM (XVEC (x, 0), nelt);
46659 for (i = 0; i < nelt; ++i)
46660 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
46661 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46662 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
46663 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
46664 SET_DEST (PATTERN (vselect_insn)) = target;
46665 icode = recog_memoized (vselect_insn);
46667 if (icode >= 0 && !testing_p)
46668 emit_insn (copy_rtx (PATTERN (vselect_insn)));
46670 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
46671 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
46672 INSN_CODE (vselect_insn) = -1;
46674 return icode >= 0;
46677 /* Similar, but generate a vec_concat from op0 and op1 as well. */
46679 static bool
46680 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
46681 const unsigned char *perm, unsigned nelt,
46682 bool testing_p)
46684 machine_mode v2mode;
46685 rtx x;
46686 bool ok;
46688 if (vselect_insn == NULL_RTX)
46689 init_vselect_insn ();
46691 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
46692 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46693 PUT_MODE (x, v2mode);
46694 XEXP (x, 0) = op0;
46695 XEXP (x, 1) = op1;
46696 ok = expand_vselect (target, x, perm, nelt, testing_p);
46697 XEXP (x, 0) = const0_rtx;
46698 XEXP (x, 1) = const0_rtx;
46699 return ok;
46702 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46703 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
46705 static bool
46706 expand_vec_perm_blend (struct expand_vec_perm_d *d)
46708 machine_mode vmode = d->vmode;
46709 unsigned i, mask, nelt = d->nelt;
46710 rtx target, op0, op1, x;
46711 rtx rperm[32], vperm;
46713 if (d->one_operand_p)
46714 return false;
46715 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
46716 && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)
46718 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
46720 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
46722 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
46724 else
46725 return false;
46727 /* This is a blend, not a permute. Elements must stay in their
46728 respective lanes. */
46729 for (i = 0; i < nelt; ++i)
46731 unsigned e = d->perm[i];
46732 if (!(e == i || e == i + nelt))
46733 return false;
46736 if (d->testing_p)
46737 return true;
46739 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
46740 decision should be extracted elsewhere, so that we only try that
46741 sequence once all budget==3 options have been tried. */
46742 target = d->target;
46743 op0 = d->op0;
46744 op1 = d->op1;
46745 mask = 0;
46747 switch (vmode)
46749 case V8DFmode:
46750 case V16SFmode:
46751 case V4DFmode:
46752 case V8SFmode:
46753 case V2DFmode:
46754 case V4SFmode:
46755 case V8HImode:
46756 case V8SImode:
46757 case V32HImode:
46758 case V64QImode:
46759 case V16SImode:
46760 case V8DImode:
46761 for (i = 0; i < nelt; ++i)
46762 mask |= (d->perm[i] >= nelt) << i;
46763 break;
46765 case V2DImode:
46766 for (i = 0; i < 2; ++i)
46767 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
46768 vmode = V8HImode;
46769 goto do_subreg;
46771 case V4SImode:
46772 for (i = 0; i < 4; ++i)
46773 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46774 vmode = V8HImode;
46775 goto do_subreg;
46777 case V16QImode:
46778 /* See if bytes move in pairs so we can use pblendw with
46779 an immediate argument, rather than pblendvb with a vector
46780 argument. */
46781 for (i = 0; i < 16; i += 2)
46782 if (d->perm[i] + 1 != d->perm[i + 1])
46784 use_pblendvb:
46785 for (i = 0; i < nelt; ++i)
46786 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
46788 finish_pblendvb:
46789 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
46790 vperm = force_reg (vmode, vperm);
46792 if (GET_MODE_SIZE (vmode) == 16)
46793 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
46794 else
46795 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
46796 if (target != d->target)
46797 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46798 return true;
46801 for (i = 0; i < 8; ++i)
46802 mask |= (d->perm[i * 2] >= 16) << i;
46803 vmode = V8HImode;
46804 /* FALLTHRU */
46806 do_subreg:
46807 target = gen_reg_rtx (vmode);
46808 op0 = gen_lowpart (vmode, op0);
46809 op1 = gen_lowpart (vmode, op1);
46810 break;
46812 case V32QImode:
46813 /* See if bytes move in pairs. If not, vpblendvb must be used. */
46814 for (i = 0; i < 32; i += 2)
46815 if (d->perm[i] + 1 != d->perm[i + 1])
46816 goto use_pblendvb;
46817 /* See if bytes move in quadruplets. If yes, vpblendd
46818 with immediate can be used. */
46819 for (i = 0; i < 32; i += 4)
46820 if (d->perm[i] + 2 != d->perm[i + 2])
46821 break;
46822 if (i < 32)
46824 /* See if bytes move the same in both lanes. If yes,
46825 vpblendw with immediate can be used. */
46826 for (i = 0; i < 16; i += 2)
46827 if (d->perm[i] + 16 != d->perm[i + 16])
46828 goto use_pblendvb;
46830 /* Use vpblendw. */
46831 for (i = 0; i < 16; ++i)
46832 mask |= (d->perm[i * 2] >= 32) << i;
46833 vmode = V16HImode;
46834 goto do_subreg;
46837 /* Use vpblendd. */
46838 for (i = 0; i < 8; ++i)
46839 mask |= (d->perm[i * 4] >= 32) << i;
46840 vmode = V8SImode;
46841 goto do_subreg;
46843 case V16HImode:
46844 /* See if words move in pairs. If yes, vpblendd can be used. */
46845 for (i = 0; i < 16; i += 2)
46846 if (d->perm[i] + 1 != d->perm[i + 1])
46847 break;
46848 if (i < 16)
46850 /* See if words move the same in both lanes. If not,
46851 vpblendvb must be used. */
46852 for (i = 0; i < 8; i++)
46853 if (d->perm[i] + 8 != d->perm[i + 8])
46855 /* Use vpblendvb. */
46856 for (i = 0; i < 32; ++i)
46857 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
46859 vmode = V32QImode;
46860 nelt = 32;
46861 target = gen_reg_rtx (vmode);
46862 op0 = gen_lowpart (vmode, op0);
46863 op1 = gen_lowpart (vmode, op1);
46864 goto finish_pblendvb;
46867 /* Use vpblendw. */
46868 for (i = 0; i < 16; ++i)
46869 mask |= (d->perm[i] >= 16) << i;
46870 break;
46873 /* Use vpblendd. */
46874 for (i = 0; i < 8; ++i)
46875 mask |= (d->perm[i * 2] >= 16) << i;
46876 vmode = V8SImode;
46877 goto do_subreg;
46879 case V4DImode:
46880 /* Use vpblendd. */
46881 for (i = 0; i < 4; ++i)
46882 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46883 vmode = V8SImode;
46884 goto do_subreg;
46886 default:
46887 gcc_unreachable ();
46890 /* This matches five different patterns with the different modes. */
46891 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
46892 x = gen_rtx_SET (VOIDmode, target, x);
46893 emit_insn (x);
46894 if (target != d->target)
46895 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46897 return true;
46900 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46901 in terms of the variable form of vpermilps.
46903 Note that we will have already failed the immediate input vpermilps,
46904 which requires that the high and low part shuffle be identical; the
46905 variable form doesn't require that. */
46907 static bool
46908 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
46910 rtx rperm[8], vperm;
46911 unsigned i;
46913 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
46914 return false;
46916 /* We can only permute within the 128-bit lane. */
46917 for (i = 0; i < 8; ++i)
46919 unsigned e = d->perm[i];
46920 if (i < 4 ? e >= 4 : e < 4)
46921 return false;
46924 if (d->testing_p)
46925 return true;
46927 for (i = 0; i < 8; ++i)
46929 unsigned e = d->perm[i];
46931 /* Within each 128-bit lane, the elements of op0 are numbered
46932 from 0 and the elements of op1 are numbered from 4. */
46933 if (e >= 8 + 4)
46934 e -= 8;
46935 else if (e >= 4)
46936 e -= 4;
46938 rperm[i] = GEN_INT (e);
46941 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
46942 vperm = force_reg (V8SImode, vperm);
46943 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
46945 return true;
46948 /* Return true if permutation D can be performed as VMODE permutation
46949 instead. */
46951 static bool
46952 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
46954 unsigned int i, j, chunk;
46956 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
46957 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
46958 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
46959 return false;
46961 if (GET_MODE_NUNITS (vmode) >= d->nelt)
46962 return true;
46964 chunk = d->nelt / GET_MODE_NUNITS (vmode);
46965 for (i = 0; i < d->nelt; i += chunk)
46966 if (d->perm[i] & (chunk - 1))
46967 return false;
46968 else
46969 for (j = 1; j < chunk; ++j)
46970 if (d->perm[i] + j != d->perm[i + j])
46971 return false;
46973 return true;
46976 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46977 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
46979 static bool
46980 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
46982 unsigned i, nelt, eltsz, mask;
46983 unsigned char perm[64];
46984 machine_mode vmode = V16QImode;
46985 rtx rperm[64], vperm, target, op0, op1;
46987 nelt = d->nelt;
46989 if (!d->one_operand_p)
46991 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
46993 if (TARGET_AVX2
46994 && valid_perm_using_mode_p (V2TImode, d))
46996 if (d->testing_p)
46997 return true;
46999 /* Use vperm2i128 insn. The pattern uses
47000 V4DImode instead of V2TImode. */
47001 target = d->target;
47002 if (d->vmode != V4DImode)
47003 target = gen_reg_rtx (V4DImode);
47004 op0 = gen_lowpart (V4DImode, d->op0);
47005 op1 = gen_lowpart (V4DImode, d->op1);
47006 rperm[0]
47007 = GEN_INT ((d->perm[0] / (nelt / 2))
47008 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
47009 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
47010 if (target != d->target)
47011 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47012 return true;
47014 return false;
47017 else
47019 if (GET_MODE_SIZE (d->vmode) == 16)
47021 if (!TARGET_SSSE3)
47022 return false;
47024 else if (GET_MODE_SIZE (d->vmode) == 32)
47026 if (!TARGET_AVX2)
47027 return false;
47029 /* V4DImode should be already handled through
47030 expand_vselect by vpermq instruction. */
47031 gcc_assert (d->vmode != V4DImode);
47033 vmode = V32QImode;
47034 if (d->vmode == V8SImode
47035 || d->vmode == V16HImode
47036 || d->vmode == V32QImode)
47038 /* First see if vpermq can be used for
47039 V8SImode/V16HImode/V32QImode. */
47040 if (valid_perm_using_mode_p (V4DImode, d))
47042 for (i = 0; i < 4; i++)
47043 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
47044 if (d->testing_p)
47045 return true;
47046 target = gen_reg_rtx (V4DImode);
47047 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
47048 perm, 4, false))
47050 emit_move_insn (d->target,
47051 gen_lowpart (d->vmode, target));
47052 return true;
47054 return false;
47057 /* Next see if vpermd can be used. */
47058 if (valid_perm_using_mode_p (V8SImode, d))
47059 vmode = V8SImode;
47061 /* Or if vpermps can be used. */
47062 else if (d->vmode == V8SFmode)
47063 vmode = V8SImode;
47065 if (vmode == V32QImode)
47067 /* vpshufb only works intra lanes, it is not
47068 possible to shuffle bytes in between the lanes. */
47069 for (i = 0; i < nelt; ++i)
47070 if ((d->perm[i] ^ i) & (nelt / 2))
47071 return false;
47074 else if (GET_MODE_SIZE (d->vmode) == 64)
47076 if (!TARGET_AVX512BW)
47077 return false;
47079 /* If vpermq didn't work, vpshufb won't work either. */
47080 if (d->vmode == V8DFmode || d->vmode == V8DImode)
47081 return false;
47083 vmode = V64QImode;
47084 if (d->vmode == V16SImode
47085 || d->vmode == V32HImode
47086 || d->vmode == V64QImode)
47088 /* First see if vpermq can be used for
47089 V16SImode/V32HImode/V64QImode. */
47090 if (valid_perm_using_mode_p (V8DImode, d))
47092 for (i = 0; i < 8; i++)
47093 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
47094 if (d->testing_p)
47095 return true;
47096 target = gen_reg_rtx (V8DImode);
47097 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
47098 perm, 8, false))
47100 emit_move_insn (d->target,
47101 gen_lowpart (d->vmode, target));
47102 return true;
47104 return false;
47107 /* Next see if vpermd can be used. */
47108 if (valid_perm_using_mode_p (V16SImode, d))
47109 vmode = V16SImode;
47111 /* Or if vpermps can be used. */
47112 else if (d->vmode == V16SFmode)
47113 vmode = V16SImode;
47114 if (vmode == V64QImode)
47116 /* vpshufb only works intra lanes, it is not
47117 possible to shuffle bytes in between the lanes. */
47118 for (i = 0; i < nelt; ++i)
47119 if ((d->perm[i] ^ i) & (nelt / 4))
47120 return false;
47123 else
47124 return false;
47127 if (d->testing_p)
47128 return true;
47130 if (vmode == V8SImode)
47131 for (i = 0; i < 8; ++i)
47132 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
47133 else if (vmode == V16SImode)
47134 for (i = 0; i < 16; ++i)
47135 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
47136 else
47138 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47139 if (!d->one_operand_p)
47140 mask = 2 * nelt - 1;
47141 else if (vmode == V16QImode)
47142 mask = nelt - 1;
47143 else if (vmode == V64QImode)
47144 mask = nelt / 4 - 1;
47145 else
47146 mask = nelt / 2 - 1;
47148 for (i = 0; i < nelt; ++i)
47150 unsigned j, e = d->perm[i] & mask;
47151 for (j = 0; j < eltsz; ++j)
47152 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
47156 vperm = gen_rtx_CONST_VECTOR (vmode,
47157 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
47158 vperm = force_reg (vmode, vperm);
47160 target = d->target;
47161 if (d->vmode != vmode)
47162 target = gen_reg_rtx (vmode);
47163 op0 = gen_lowpart (vmode, d->op0);
47164 if (d->one_operand_p)
47166 if (vmode == V16QImode)
47167 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
47168 else if (vmode == V32QImode)
47169 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
47170 else if (vmode == V64QImode)
47171 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
47172 else if (vmode == V8SFmode)
47173 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
47174 else if (vmode == V8SImode)
47175 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
47176 else if (vmode == V16SFmode)
47177 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
47178 else if (vmode == V16SImode)
47179 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
47180 else
47181 gcc_unreachable ();
47183 else
47185 op1 = gen_lowpart (vmode, d->op1);
47186 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
47188 if (target != d->target)
47189 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47191 return true;
47194 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
47195 in a single instruction. */
47197 static bool
47198 expand_vec_perm_1 (struct expand_vec_perm_d *d)
47200 unsigned i, nelt = d->nelt;
47201 unsigned char perm2[MAX_VECT_LEN];
47203 /* Check plain VEC_SELECT first, because AVX has instructions that could
47204 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
47205 input where SEL+CONCAT may not. */
47206 if (d->one_operand_p)
47208 int mask = nelt - 1;
47209 bool identity_perm = true;
47210 bool broadcast_perm = true;
47212 for (i = 0; i < nelt; i++)
47214 perm2[i] = d->perm[i] & mask;
47215 if (perm2[i] != i)
47216 identity_perm = false;
47217 if (perm2[i])
47218 broadcast_perm = false;
47221 if (identity_perm)
47223 if (!d->testing_p)
47224 emit_move_insn (d->target, d->op0);
47225 return true;
47227 else if (broadcast_perm && TARGET_AVX2)
47229 /* Use vpbroadcast{b,w,d}. */
47230 rtx (*gen) (rtx, rtx) = NULL;
47231 switch (d->vmode)
47233 case V64QImode:
47234 if (TARGET_AVX512BW)
47235 gen = gen_avx512bw_vec_dupv64qi_1;
47236 break;
47237 case V32QImode:
47238 gen = gen_avx2_pbroadcastv32qi_1;
47239 break;
47240 case V32HImode:
47241 if (TARGET_AVX512BW)
47242 gen = gen_avx512bw_vec_dupv32hi_1;
47243 break;
47244 case V16HImode:
47245 gen = gen_avx2_pbroadcastv16hi_1;
47246 break;
47247 case V16SImode:
47248 if (TARGET_AVX512F)
47249 gen = gen_avx512f_vec_dupv16si_1;
47250 break;
47251 case V8SImode:
47252 gen = gen_avx2_pbroadcastv8si_1;
47253 break;
47254 case V16QImode:
47255 gen = gen_avx2_pbroadcastv16qi;
47256 break;
47257 case V8HImode:
47258 gen = gen_avx2_pbroadcastv8hi;
47259 break;
47260 case V16SFmode:
47261 if (TARGET_AVX512F)
47262 gen = gen_avx512f_vec_dupv16sf_1;
47263 break;
47264 case V8SFmode:
47265 gen = gen_avx2_vec_dupv8sf_1;
47266 break;
47267 case V8DFmode:
47268 if (TARGET_AVX512F)
47269 gen = gen_avx512f_vec_dupv8df_1;
47270 break;
47271 case V8DImode:
47272 if (TARGET_AVX512F)
47273 gen = gen_avx512f_vec_dupv8di_1;
47274 break;
47275 /* For other modes prefer other shuffles this function creates. */
47276 default: break;
47278 if (gen != NULL)
47280 if (!d->testing_p)
47281 emit_insn (gen (d->target, d->op0));
47282 return true;
47286 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
47287 return true;
47289 /* There are plenty of patterns in sse.md that are written for
47290 SEL+CONCAT and are not replicated for a single op. Perhaps
47291 that should be changed, to avoid the nastiness here. */
47293 /* Recognize interleave style patterns, which means incrementing
47294 every other permutation operand. */
47295 for (i = 0; i < nelt; i += 2)
47297 perm2[i] = d->perm[i] & mask;
47298 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
47300 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47301 d->testing_p))
47302 return true;
47304 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
47305 if (nelt >= 4)
47307 for (i = 0; i < nelt; i += 4)
47309 perm2[i + 0] = d->perm[i + 0] & mask;
47310 perm2[i + 1] = d->perm[i + 1] & mask;
47311 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
47312 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
47315 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47316 d->testing_p))
47317 return true;
47321 /* Finally, try the fully general two operand permute. */
47322 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
47323 d->testing_p))
47324 return true;
47326 /* Recognize interleave style patterns with reversed operands. */
47327 if (!d->one_operand_p)
47329 for (i = 0; i < nelt; ++i)
47331 unsigned e = d->perm[i];
47332 if (e >= nelt)
47333 e -= nelt;
47334 else
47335 e += nelt;
47336 perm2[i] = e;
47339 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
47340 d->testing_p))
47341 return true;
47344 /* Try the SSE4.1 blend variable merge instructions. */
47345 if (expand_vec_perm_blend (d))
47346 return true;
47348 /* Try one of the AVX vpermil variable permutations. */
47349 if (expand_vec_perm_vpermil (d))
47350 return true;
47352 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47353 vpshufb, vpermd, vpermps or vpermq variable permutation. */
47354 if (expand_vec_perm_pshufb (d))
47355 return true;
47357 /* Try the AVX2 vpalignr instruction. */
47358 if (expand_vec_perm_palignr (d, true))
47359 return true;
47361 /* Try the AVX512F vpermi2 instructions. */
47362 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47363 return true;
47365 return false;
47368 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47369 in terms of a pair of pshuflw + pshufhw instructions. */
47371 static bool
47372 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47374 unsigned char perm2[MAX_VECT_LEN];
47375 unsigned i;
47376 bool ok;
47378 if (d->vmode != V8HImode || !d->one_operand_p)
47379 return false;
47381 /* The two permutations only operate in 64-bit lanes. */
47382 for (i = 0; i < 4; ++i)
47383 if (d->perm[i] >= 4)
47384 return false;
47385 for (i = 4; i < 8; ++i)
47386 if (d->perm[i] < 4)
47387 return false;
47389 if (d->testing_p)
47390 return true;
47392 /* Emit the pshuflw. */
47393 memcpy (perm2, d->perm, 4);
47394 for (i = 4; i < 8; ++i)
47395 perm2[i] = i;
47396 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47397 gcc_assert (ok);
47399 /* Emit the pshufhw. */
47400 memcpy (perm2 + 4, d->perm + 4, 4);
47401 for (i = 0; i < 4; ++i)
47402 perm2[i] = i;
47403 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47404 gcc_assert (ok);
47406 return true;
47409 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47410 the permutation using the SSSE3 palignr instruction. This succeeds
47411 when all of the elements in PERM fit within one vector and we merely
47412 need to shift them down so that a single vector permutation has a
47413 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
47414 the vpalignr instruction itself can perform the requested permutation. */
47416 static bool
47417 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47419 unsigned i, nelt = d->nelt;
47420 unsigned min, max, minswap, maxswap;
47421 bool in_order, ok, swap = false;
47422 rtx shift, target;
47423 struct expand_vec_perm_d dcopy;
47425 /* Even with AVX, palignr only operates on 128-bit vectors,
47426 in AVX2 palignr operates on both 128-bit lanes. */
47427 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47428 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47429 return false;
47431 min = 2 * nelt;
47432 max = 0;
47433 minswap = 2 * nelt;
47434 maxswap = 0;
47435 for (i = 0; i < nelt; ++i)
47437 unsigned e = d->perm[i];
47438 unsigned eswap = d->perm[i] ^ nelt;
47439 if (GET_MODE_SIZE (d->vmode) == 32)
47441 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47442 eswap = e ^ (nelt / 2);
47444 if (e < min)
47445 min = e;
47446 if (e > max)
47447 max = e;
47448 if (eswap < minswap)
47449 minswap = eswap;
47450 if (eswap > maxswap)
47451 maxswap = eswap;
47453 if (min == 0
47454 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47456 if (d->one_operand_p
47457 || minswap == 0
47458 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47459 ? nelt / 2 : nelt))
47460 return false;
47461 swap = true;
47462 min = minswap;
47463 max = maxswap;
47466 /* Given that we have SSSE3, we know we'll be able to implement the
47467 single operand permutation after the palignr with pshufb for
47468 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
47469 first. */
47470 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47471 return true;
47473 dcopy = *d;
47474 if (swap)
47476 dcopy.op0 = d->op1;
47477 dcopy.op1 = d->op0;
47478 for (i = 0; i < nelt; ++i)
47479 dcopy.perm[i] ^= nelt;
47482 in_order = true;
47483 for (i = 0; i < nelt; ++i)
47485 unsigned e = dcopy.perm[i];
47486 if (GET_MODE_SIZE (d->vmode) == 32
47487 && e >= nelt
47488 && (e & (nelt / 2 - 1)) < min)
47489 e = e - min - (nelt / 2);
47490 else
47491 e = e - min;
47492 if (e != i)
47493 in_order = false;
47494 dcopy.perm[i] = e;
47496 dcopy.one_operand_p = true;
47498 if (single_insn_only_p && !in_order)
47499 return false;
47501 /* For AVX2, test whether we can permute the result in one instruction. */
47502 if (d->testing_p)
47504 if (in_order)
47505 return true;
47506 dcopy.op1 = dcopy.op0;
47507 return expand_vec_perm_1 (&dcopy);
47510 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
47511 if (GET_MODE_SIZE (d->vmode) == 16)
47513 target = gen_reg_rtx (TImode);
47514 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
47515 gen_lowpart (TImode, dcopy.op0), shift));
47517 else
47519 target = gen_reg_rtx (V2TImode);
47520 emit_insn (gen_avx2_palignrv2ti (target,
47521 gen_lowpart (V2TImode, dcopy.op1),
47522 gen_lowpart (V2TImode, dcopy.op0),
47523 shift));
47526 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
47528 /* Test for the degenerate case where the alignment by itself
47529 produces the desired permutation. */
47530 if (in_order)
47532 emit_move_insn (d->target, dcopy.op0);
47533 return true;
47536 ok = expand_vec_perm_1 (&dcopy);
47537 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
47539 return ok;
47542 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
47543 the permutation using the SSE4_1 pblendv instruction. Potentially
47544 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
47546 static bool
47547 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
47549 unsigned i, which, nelt = d->nelt;
47550 struct expand_vec_perm_d dcopy, dcopy1;
47551 machine_mode vmode = d->vmode;
47552 bool ok;
47554 /* Use the same checks as in expand_vec_perm_blend. */
47555 if (d->one_operand_p)
47556 return false;
47557 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47559 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47561 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47563 else
47564 return false;
47566 /* Figure out where permutation elements stay not in their
47567 respective lanes. */
47568 for (i = 0, which = 0; i < nelt; ++i)
47570 unsigned e = d->perm[i];
47571 if (e != i)
47572 which |= (e < nelt ? 1 : 2);
47574 /* We can pblend the part where elements stay not in their
47575 respective lanes only when these elements are all in one
47576 half of a permutation.
47577 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
47578 lanes, but both 8 and 9 >= 8
47579 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
47580 respective lanes and 8 >= 8, but 2 not. */
47581 if (which != 1 && which != 2)
47582 return false;
47583 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
47584 return true;
47586 /* First we apply one operand permutation to the part where
47587 elements stay not in their respective lanes. */
47588 dcopy = *d;
47589 if (which == 2)
47590 dcopy.op0 = dcopy.op1 = d->op1;
47591 else
47592 dcopy.op0 = dcopy.op1 = d->op0;
47593 if (!d->testing_p)
47594 dcopy.target = gen_reg_rtx (vmode);
47595 dcopy.one_operand_p = true;
47597 for (i = 0; i < nelt; ++i)
47598 dcopy.perm[i] = d->perm[i] & (nelt - 1);
47600 ok = expand_vec_perm_1 (&dcopy);
47601 if (GET_MODE_SIZE (vmode) != 16 && !ok)
47602 return false;
47603 else
47604 gcc_assert (ok);
47605 if (d->testing_p)
47606 return true;
47608 /* Next we put permuted elements into their positions. */
47609 dcopy1 = *d;
47610 if (which == 2)
47611 dcopy1.op1 = dcopy.target;
47612 else
47613 dcopy1.op0 = dcopy.target;
47615 for (i = 0; i < nelt; ++i)
47616 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
47618 ok = expand_vec_perm_blend (&dcopy1);
47619 gcc_assert (ok);
47621 return true;
47624 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
47626 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47627 a two vector permutation into a single vector permutation by using
47628 an interleave operation to merge the vectors. */
47630 static bool
47631 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
47633 struct expand_vec_perm_d dremap, dfinal;
47634 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
47635 unsigned HOST_WIDE_INT contents;
47636 unsigned char remap[2 * MAX_VECT_LEN];
47637 rtx_insn *seq;
47638 bool ok, same_halves = false;
47640 if (GET_MODE_SIZE (d->vmode) == 16)
47642 if (d->one_operand_p)
47643 return false;
47645 else if (GET_MODE_SIZE (d->vmode) == 32)
47647 if (!TARGET_AVX)
47648 return false;
47649 /* For 32-byte modes allow even d->one_operand_p.
47650 The lack of cross-lane shuffling in some instructions
47651 might prevent a single insn shuffle. */
47652 dfinal = *d;
47653 dfinal.testing_p = true;
47654 /* If expand_vec_perm_interleave3 can expand this into
47655 a 3 insn sequence, give up and let it be expanded as
47656 3 insn sequence. While that is one insn longer,
47657 it doesn't need a memory operand and in the common
47658 case that both interleave low and high permutations
47659 with the same operands are adjacent needs 4 insns
47660 for both after CSE. */
47661 if (expand_vec_perm_interleave3 (&dfinal))
47662 return false;
47664 else
47665 return false;
47667 /* Examine from whence the elements come. */
47668 contents = 0;
47669 for (i = 0; i < nelt; ++i)
47670 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
47672 memset (remap, 0xff, sizeof (remap));
47673 dremap = *d;
47675 if (GET_MODE_SIZE (d->vmode) == 16)
47677 unsigned HOST_WIDE_INT h1, h2, h3, h4;
47679 /* Split the two input vectors into 4 halves. */
47680 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
47681 h2 = h1 << nelt2;
47682 h3 = h2 << nelt2;
47683 h4 = h3 << nelt2;
47685 /* If the elements from the low halves use interleave low, and similarly
47686 for interleave high. If the elements are from mis-matched halves, we
47687 can use shufps for V4SF/V4SI or do a DImode shuffle. */
47688 if ((contents & (h1 | h3)) == contents)
47690 /* punpckl* */
47691 for (i = 0; i < nelt2; ++i)
47693 remap[i] = i * 2;
47694 remap[i + nelt] = i * 2 + 1;
47695 dremap.perm[i * 2] = i;
47696 dremap.perm[i * 2 + 1] = i + nelt;
47698 if (!TARGET_SSE2 && d->vmode == V4SImode)
47699 dremap.vmode = V4SFmode;
47701 else if ((contents & (h2 | h4)) == contents)
47703 /* punpckh* */
47704 for (i = 0; i < nelt2; ++i)
47706 remap[i + nelt2] = i * 2;
47707 remap[i + nelt + nelt2] = i * 2 + 1;
47708 dremap.perm[i * 2] = i + nelt2;
47709 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
47711 if (!TARGET_SSE2 && d->vmode == V4SImode)
47712 dremap.vmode = V4SFmode;
47714 else if ((contents & (h1 | h4)) == contents)
47716 /* shufps */
47717 for (i = 0; i < nelt2; ++i)
47719 remap[i] = i;
47720 remap[i + nelt + nelt2] = i + nelt2;
47721 dremap.perm[i] = i;
47722 dremap.perm[i + nelt2] = i + nelt + nelt2;
47724 if (nelt != 4)
47726 /* shufpd */
47727 dremap.vmode = V2DImode;
47728 dremap.nelt = 2;
47729 dremap.perm[0] = 0;
47730 dremap.perm[1] = 3;
47733 else if ((contents & (h2 | h3)) == contents)
47735 /* shufps */
47736 for (i = 0; i < nelt2; ++i)
47738 remap[i + nelt2] = i;
47739 remap[i + nelt] = i + nelt2;
47740 dremap.perm[i] = i + nelt2;
47741 dremap.perm[i + nelt2] = i + nelt;
47743 if (nelt != 4)
47745 /* shufpd */
47746 dremap.vmode = V2DImode;
47747 dremap.nelt = 2;
47748 dremap.perm[0] = 1;
47749 dremap.perm[1] = 2;
47752 else
47753 return false;
47755 else
47757 unsigned int nelt4 = nelt / 4, nzcnt = 0;
47758 unsigned HOST_WIDE_INT q[8];
47759 unsigned int nonzero_halves[4];
47761 /* Split the two input vectors into 8 quarters. */
47762 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
47763 for (i = 1; i < 8; ++i)
47764 q[i] = q[0] << (nelt4 * i);
47765 for (i = 0; i < 4; ++i)
47766 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
47768 nonzero_halves[nzcnt] = i;
47769 ++nzcnt;
47772 if (nzcnt == 1)
47774 gcc_assert (d->one_operand_p);
47775 nonzero_halves[1] = nonzero_halves[0];
47776 same_halves = true;
47778 else if (d->one_operand_p)
47780 gcc_assert (nonzero_halves[0] == 0);
47781 gcc_assert (nonzero_halves[1] == 1);
47784 if (nzcnt <= 2)
47786 if (d->perm[0] / nelt2 == nonzero_halves[1])
47788 /* Attempt to increase the likelihood that dfinal
47789 shuffle will be intra-lane. */
47790 char tmph = nonzero_halves[0];
47791 nonzero_halves[0] = nonzero_halves[1];
47792 nonzero_halves[1] = tmph;
47795 /* vperm2f128 or vperm2i128. */
47796 for (i = 0; i < nelt2; ++i)
47798 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
47799 remap[i + nonzero_halves[0] * nelt2] = i;
47800 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
47801 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
47804 if (d->vmode != V8SFmode
47805 && d->vmode != V4DFmode
47806 && d->vmode != V8SImode)
47808 dremap.vmode = V8SImode;
47809 dremap.nelt = 8;
47810 for (i = 0; i < 4; ++i)
47812 dremap.perm[i] = i + nonzero_halves[0] * 4;
47813 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
47817 else if (d->one_operand_p)
47818 return false;
47819 else if (TARGET_AVX2
47820 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
47822 /* vpunpckl* */
47823 for (i = 0; i < nelt4; ++i)
47825 remap[i] = i * 2;
47826 remap[i + nelt] = i * 2 + 1;
47827 remap[i + nelt2] = i * 2 + nelt2;
47828 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
47829 dremap.perm[i * 2] = i;
47830 dremap.perm[i * 2 + 1] = i + nelt;
47831 dremap.perm[i * 2 + nelt2] = i + nelt2;
47832 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
47835 else if (TARGET_AVX2
47836 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
47838 /* vpunpckh* */
47839 for (i = 0; i < nelt4; ++i)
47841 remap[i + nelt4] = i * 2;
47842 remap[i + nelt + nelt4] = i * 2 + 1;
47843 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
47844 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
47845 dremap.perm[i * 2] = i + nelt4;
47846 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
47847 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
47848 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
47851 else
47852 return false;
47855 /* Use the remapping array set up above to move the elements from their
47856 swizzled locations into their final destinations. */
47857 dfinal = *d;
47858 for (i = 0; i < nelt; ++i)
47860 unsigned e = remap[d->perm[i]];
47861 gcc_assert (e < nelt);
47862 /* If same_halves is true, both halves of the remapped vector are the
47863 same. Avoid cross-lane accesses if possible. */
47864 if (same_halves && i >= nelt2)
47866 gcc_assert (e < nelt2);
47867 dfinal.perm[i] = e + nelt2;
47869 else
47870 dfinal.perm[i] = e;
47872 if (!d->testing_p)
47874 dremap.target = gen_reg_rtx (dremap.vmode);
47875 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47877 dfinal.op1 = dfinal.op0;
47878 dfinal.one_operand_p = true;
47880 /* Test if the final remap can be done with a single insn. For V4SFmode or
47881 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
47882 start_sequence ();
47883 ok = expand_vec_perm_1 (&dfinal);
47884 seq = get_insns ();
47885 end_sequence ();
47887 if (!ok)
47888 return false;
47890 if (d->testing_p)
47891 return true;
47893 if (dremap.vmode != dfinal.vmode)
47895 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
47896 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
47899 ok = expand_vec_perm_1 (&dremap);
47900 gcc_assert (ok);
47902 emit_insn (seq);
47903 return true;
47906 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47907 a single vector cross-lane permutation into vpermq followed
47908 by any of the single insn permutations. */
47910 static bool
47911 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
47913 struct expand_vec_perm_d dremap, dfinal;
47914 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
47915 unsigned contents[2];
47916 bool ok;
47918 if (!(TARGET_AVX2
47919 && (d->vmode == V32QImode || d->vmode == V16HImode)
47920 && d->one_operand_p))
47921 return false;
47923 contents[0] = 0;
47924 contents[1] = 0;
47925 for (i = 0; i < nelt2; ++i)
47927 contents[0] |= 1u << (d->perm[i] / nelt4);
47928 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
47931 for (i = 0; i < 2; ++i)
47933 unsigned int cnt = 0;
47934 for (j = 0; j < 4; ++j)
47935 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
47936 return false;
47939 if (d->testing_p)
47940 return true;
47942 dremap = *d;
47943 dremap.vmode = V4DImode;
47944 dremap.nelt = 4;
47945 dremap.target = gen_reg_rtx (V4DImode);
47946 dremap.op0 = gen_lowpart (V4DImode, d->op0);
47947 dremap.op1 = dremap.op0;
47948 dremap.one_operand_p = true;
47949 for (i = 0; i < 2; ++i)
47951 unsigned int cnt = 0;
47952 for (j = 0; j < 4; ++j)
47953 if ((contents[i] & (1u << j)) != 0)
47954 dremap.perm[2 * i + cnt++] = j;
47955 for (; cnt < 2; ++cnt)
47956 dremap.perm[2 * i + cnt] = 0;
47959 dfinal = *d;
47960 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47961 dfinal.op1 = dfinal.op0;
47962 dfinal.one_operand_p = true;
47963 for (i = 0, j = 0; i < nelt; ++i)
47965 if (i == nelt2)
47966 j = 2;
47967 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
47968 if ((d->perm[i] / nelt4) == dremap.perm[j])
47970 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
47971 dfinal.perm[i] |= nelt4;
47972 else
47973 gcc_unreachable ();
47976 ok = expand_vec_perm_1 (&dremap);
47977 gcc_assert (ok);
47979 ok = expand_vec_perm_1 (&dfinal);
47980 gcc_assert (ok);
47982 return true;
47985 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
47986 a vector permutation using two instructions, vperm2f128 resp.
47987 vperm2i128 followed by any single in-lane permutation. */
47989 static bool
47990 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
47992 struct expand_vec_perm_d dfirst, dsecond;
47993 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
47994 bool ok;
47996 if (!TARGET_AVX
47997 || GET_MODE_SIZE (d->vmode) != 32
47998 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
47999 return false;
48001 dsecond = *d;
48002 dsecond.one_operand_p = false;
48003 dsecond.testing_p = true;
48005 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
48006 immediate. For perm < 16 the second permutation uses
48007 d->op0 as first operand, for perm >= 16 it uses d->op1
48008 as first operand. The second operand is the result of
48009 vperm2[fi]128. */
48010 for (perm = 0; perm < 32; perm++)
48012 /* Ignore permutations which do not move anything cross-lane. */
48013 if (perm < 16)
48015 /* The second shuffle for e.g. V4DFmode has
48016 0123 and ABCD operands.
48017 Ignore AB23, as 23 is already in the second lane
48018 of the first operand. */
48019 if ((perm & 0xc) == (1 << 2)) continue;
48020 /* And 01CD, as 01 is in the first lane of the first
48021 operand. */
48022 if ((perm & 3) == 0) continue;
48023 /* And 4567, as then the vperm2[fi]128 doesn't change
48024 anything on the original 4567 second operand. */
48025 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
48027 else
48029 /* The second shuffle for e.g. V4DFmode has
48030 4567 and ABCD operands.
48031 Ignore AB67, as 67 is already in the second lane
48032 of the first operand. */
48033 if ((perm & 0xc) == (3 << 2)) continue;
48034 /* And 45CD, as 45 is in the first lane of the first
48035 operand. */
48036 if ((perm & 3) == 2) continue;
48037 /* And 0123, as then the vperm2[fi]128 doesn't change
48038 anything on the original 0123 first operand. */
48039 if ((perm & 0xf) == (1 << 2)) continue;
48042 for (i = 0; i < nelt; i++)
48044 j = d->perm[i] / nelt2;
48045 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
48046 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
48047 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
48048 dsecond.perm[i] = d->perm[i] & (nelt - 1);
48049 else
48050 break;
48053 if (i == nelt)
48055 start_sequence ();
48056 ok = expand_vec_perm_1 (&dsecond);
48057 end_sequence ();
48059 else
48060 ok = false;
48062 if (ok)
48064 if (d->testing_p)
48065 return true;
48067 /* Found a usable second shuffle. dfirst will be
48068 vperm2f128 on d->op0 and d->op1. */
48069 dsecond.testing_p = false;
48070 dfirst = *d;
48071 dfirst.target = gen_reg_rtx (d->vmode);
48072 for (i = 0; i < nelt; i++)
48073 dfirst.perm[i] = (i & (nelt2 - 1))
48074 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
48076 canonicalize_perm (&dfirst);
48077 ok = expand_vec_perm_1 (&dfirst);
48078 gcc_assert (ok);
48080 /* And dsecond is some single insn shuffle, taking
48081 d->op0 and result of vperm2f128 (if perm < 16) or
48082 d->op1 and result of vperm2f128 (otherwise). */
48083 if (perm >= 16)
48084 dsecond.op0 = dsecond.op1;
48085 dsecond.op1 = dfirst.target;
48087 ok = expand_vec_perm_1 (&dsecond);
48088 gcc_assert (ok);
48090 return true;
48093 /* For one operand, the only useful vperm2f128 permutation is 0x01
48094 aka lanes swap. */
48095 if (d->one_operand_p)
48096 return false;
48099 return false;
48102 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48103 a two vector permutation using 2 intra-lane interleave insns
48104 and cross-lane shuffle for 32-byte vectors. */
48106 static bool
48107 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
48109 unsigned i, nelt;
48110 rtx (*gen) (rtx, rtx, rtx);
48112 if (d->one_operand_p)
48113 return false;
48114 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
48116 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
48118 else
48119 return false;
48121 nelt = d->nelt;
48122 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
48123 return false;
48124 for (i = 0; i < nelt; i += 2)
48125 if (d->perm[i] != d->perm[0] + i / 2
48126 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
48127 return false;
48129 if (d->testing_p)
48130 return true;
48132 switch (d->vmode)
48134 case V32QImode:
48135 if (d->perm[0])
48136 gen = gen_vec_interleave_highv32qi;
48137 else
48138 gen = gen_vec_interleave_lowv32qi;
48139 break;
48140 case V16HImode:
48141 if (d->perm[0])
48142 gen = gen_vec_interleave_highv16hi;
48143 else
48144 gen = gen_vec_interleave_lowv16hi;
48145 break;
48146 case V8SImode:
48147 if (d->perm[0])
48148 gen = gen_vec_interleave_highv8si;
48149 else
48150 gen = gen_vec_interleave_lowv8si;
48151 break;
48152 case V4DImode:
48153 if (d->perm[0])
48154 gen = gen_vec_interleave_highv4di;
48155 else
48156 gen = gen_vec_interleave_lowv4di;
48157 break;
48158 case V8SFmode:
48159 if (d->perm[0])
48160 gen = gen_vec_interleave_highv8sf;
48161 else
48162 gen = gen_vec_interleave_lowv8sf;
48163 break;
48164 case V4DFmode:
48165 if (d->perm[0])
48166 gen = gen_vec_interleave_highv4df;
48167 else
48168 gen = gen_vec_interleave_lowv4df;
48169 break;
48170 default:
48171 gcc_unreachable ();
48174 emit_insn (gen (d->target, d->op0, d->op1));
48175 return true;
48178 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
48179 a single vector permutation using a single intra-lane vector
48180 permutation, vperm2f128 swapping the lanes and vblend* insn blending
48181 the non-swapped and swapped vectors together. */
48183 static bool
48184 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
48186 struct expand_vec_perm_d dfirst, dsecond;
48187 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
48188 rtx_insn *seq;
48189 bool ok;
48190 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
48192 if (!TARGET_AVX
48193 || TARGET_AVX2
48194 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
48195 || !d->one_operand_p)
48196 return false;
48198 dfirst = *d;
48199 for (i = 0; i < nelt; i++)
48200 dfirst.perm[i] = 0xff;
48201 for (i = 0, msk = 0; i < nelt; i++)
48203 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
48204 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
48205 return false;
48206 dfirst.perm[j] = d->perm[i];
48207 if (j != i)
48208 msk |= (1 << i);
48210 for (i = 0; i < nelt; i++)
48211 if (dfirst.perm[i] == 0xff)
48212 dfirst.perm[i] = i;
48214 if (!d->testing_p)
48215 dfirst.target = gen_reg_rtx (dfirst.vmode);
48217 start_sequence ();
48218 ok = expand_vec_perm_1 (&dfirst);
48219 seq = get_insns ();
48220 end_sequence ();
48222 if (!ok)
48223 return false;
48225 if (d->testing_p)
48226 return true;
48228 emit_insn (seq);
48230 dsecond = *d;
48231 dsecond.op0 = dfirst.target;
48232 dsecond.op1 = dfirst.target;
48233 dsecond.one_operand_p = true;
48234 dsecond.target = gen_reg_rtx (dsecond.vmode);
48235 for (i = 0; i < nelt; i++)
48236 dsecond.perm[i] = i ^ nelt2;
48238 ok = expand_vec_perm_1 (&dsecond);
48239 gcc_assert (ok);
48241 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
48242 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
48243 return true;
48246 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
48247 permutation using two vperm2f128, followed by a vshufpd insn blending
48248 the two vectors together. */
48250 static bool
48251 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
48253 struct expand_vec_perm_d dfirst, dsecond, dthird;
48254 bool ok;
48256 if (!TARGET_AVX || (d->vmode != V4DFmode))
48257 return false;
48259 if (d->testing_p)
48260 return true;
48262 dfirst = *d;
48263 dsecond = *d;
48264 dthird = *d;
48266 dfirst.perm[0] = (d->perm[0] & ~1);
48267 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
48268 dfirst.perm[2] = (d->perm[2] & ~1);
48269 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
48270 dsecond.perm[0] = (d->perm[1] & ~1);
48271 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
48272 dsecond.perm[2] = (d->perm[3] & ~1);
48273 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
48274 dthird.perm[0] = (d->perm[0] % 2);
48275 dthird.perm[1] = (d->perm[1] % 2) + 4;
48276 dthird.perm[2] = (d->perm[2] % 2) + 2;
48277 dthird.perm[3] = (d->perm[3] % 2) + 6;
48279 dfirst.target = gen_reg_rtx (dfirst.vmode);
48280 dsecond.target = gen_reg_rtx (dsecond.vmode);
48281 dthird.op0 = dfirst.target;
48282 dthird.op1 = dsecond.target;
48283 dthird.one_operand_p = false;
48285 canonicalize_perm (&dfirst);
48286 canonicalize_perm (&dsecond);
48288 ok = expand_vec_perm_1 (&dfirst)
48289 && expand_vec_perm_1 (&dsecond)
48290 && expand_vec_perm_1 (&dthird);
48292 gcc_assert (ok);
48294 return true;
48297 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
48298 permutation with two pshufb insns and an ior. We should have already
48299 failed all two instruction sequences. */
48301 static bool
48302 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
48304 rtx rperm[2][16], vperm, l, h, op, m128;
48305 unsigned int i, nelt, eltsz;
48307 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
48308 return false;
48309 gcc_assert (!d->one_operand_p);
48311 if (d->testing_p)
48312 return true;
48314 nelt = d->nelt;
48315 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48317 /* Generate two permutation masks. If the required element is within
48318 the given vector it is shuffled into the proper lane. If the required
48319 element is in the other vector, force a zero into the lane by setting
48320 bit 7 in the permutation mask. */
48321 m128 = GEN_INT (-128);
48322 for (i = 0; i < nelt; ++i)
48324 unsigned j, e = d->perm[i];
48325 unsigned which = (e >= nelt);
48326 if (e >= nelt)
48327 e -= nelt;
48329 for (j = 0; j < eltsz; ++j)
48331 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
48332 rperm[1-which][i*eltsz + j] = m128;
48336 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
48337 vperm = force_reg (V16QImode, vperm);
48339 l = gen_reg_rtx (V16QImode);
48340 op = gen_lowpart (V16QImode, d->op0);
48341 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
48343 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48344 vperm = force_reg (V16QImode, vperm);
48346 h = gen_reg_rtx (V16QImode);
48347 op = gen_lowpart (V16QImode, d->op1);
48348 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48350 op = d->target;
48351 if (d->vmode != V16QImode)
48352 op = gen_reg_rtx (V16QImode);
48353 emit_insn (gen_iorv16qi3 (op, l, h));
48354 if (op != d->target)
48355 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48357 return true;
48360 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48361 with two vpshufb insns, vpermq and vpor. We should have already failed
48362 all two or three instruction sequences. */
48364 static bool
48365 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48367 rtx rperm[2][32], vperm, l, h, hp, op, m128;
48368 unsigned int i, nelt, eltsz;
48370 if (!TARGET_AVX2
48371 || !d->one_operand_p
48372 || (d->vmode != V32QImode && d->vmode != V16HImode))
48373 return false;
48375 if (d->testing_p)
48376 return true;
48378 nelt = d->nelt;
48379 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48381 /* Generate two permutation masks. If the required element is within
48382 the same lane, it is shuffled in. If the required element from the
48383 other lane, force a zero by setting bit 7 in the permutation mask.
48384 In the other mask the mask has non-negative elements if element
48385 is requested from the other lane, but also moved to the other lane,
48386 so that the result of vpshufb can have the two V2TImode halves
48387 swapped. */
48388 m128 = GEN_INT (-128);
48389 for (i = 0; i < nelt; ++i)
48391 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48392 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48394 for (j = 0; j < eltsz; ++j)
48396 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48397 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48401 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48402 vperm = force_reg (V32QImode, vperm);
48404 h = gen_reg_rtx (V32QImode);
48405 op = gen_lowpart (V32QImode, d->op0);
48406 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48408 /* Swap the 128-byte lanes of h into hp. */
48409 hp = gen_reg_rtx (V4DImode);
48410 op = gen_lowpart (V4DImode, h);
48411 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48412 const1_rtx));
48414 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48415 vperm = force_reg (V32QImode, vperm);
48417 l = gen_reg_rtx (V32QImode);
48418 op = gen_lowpart (V32QImode, d->op0);
48419 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48421 op = d->target;
48422 if (d->vmode != V32QImode)
48423 op = gen_reg_rtx (V32QImode);
48424 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48425 if (op != d->target)
48426 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48428 return true;
48431 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48432 and extract-odd permutations of two V32QImode and V16QImode operand
48433 with two vpshufb insns, vpor and vpermq. We should have already
48434 failed all two or three instruction sequences. */
48436 static bool
48437 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48439 rtx rperm[2][32], vperm, l, h, ior, op, m128;
48440 unsigned int i, nelt, eltsz;
48442 if (!TARGET_AVX2
48443 || d->one_operand_p
48444 || (d->vmode != V32QImode && d->vmode != V16HImode))
48445 return false;
48447 for (i = 0; i < d->nelt; ++i)
48448 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48449 return false;
48451 if (d->testing_p)
48452 return true;
48454 nelt = d->nelt;
48455 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48457 /* Generate two permutation masks. In the first permutation mask
48458 the first quarter will contain indexes for the first half
48459 of the op0, the second quarter will contain bit 7 set, third quarter
48460 will contain indexes for the second half of the op0 and the
48461 last quarter bit 7 set. In the second permutation mask
48462 the first quarter will contain bit 7 set, the second quarter
48463 indexes for the first half of the op1, the third quarter bit 7 set
48464 and last quarter indexes for the second half of the op1.
48465 I.e. the first mask e.g. for V32QImode extract even will be:
48466 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48467 (all values masked with 0xf except for -128) and second mask
48468 for extract even will be
48469 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
48470 m128 = GEN_INT (-128);
48471 for (i = 0; i < nelt; ++i)
48473 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48474 unsigned which = d->perm[i] >= nelt;
48475 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48477 for (j = 0; j < eltsz; ++j)
48479 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48480 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48484 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48485 vperm = force_reg (V32QImode, vperm);
48487 l = gen_reg_rtx (V32QImode);
48488 op = gen_lowpart (V32QImode, d->op0);
48489 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48491 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48492 vperm = force_reg (V32QImode, vperm);
48494 h = gen_reg_rtx (V32QImode);
48495 op = gen_lowpart (V32QImode, d->op1);
48496 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48498 ior = gen_reg_rtx (V32QImode);
48499 emit_insn (gen_iorv32qi3 (ior, l, h));
48501 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
48502 op = gen_reg_rtx (V4DImode);
48503 ior = gen_lowpart (V4DImode, ior);
48504 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
48505 const1_rtx, GEN_INT (3)));
48506 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48508 return true;
48511 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48512 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
48513 with two "and" and "pack" or two "shift" and "pack" insns. We should
48514 have already failed all two instruction sequences. */
48516 static bool
48517 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
48519 rtx op, dop0, dop1, t, rperm[16];
48520 unsigned i, odd, c, s, nelt = d->nelt;
48521 bool end_perm = false;
48522 machine_mode half_mode;
48523 rtx (*gen_and) (rtx, rtx, rtx);
48524 rtx (*gen_pack) (rtx, rtx, rtx);
48525 rtx (*gen_shift) (rtx, rtx, rtx);
48527 if (d->one_operand_p)
48528 return false;
48530 switch (d->vmode)
48532 case V8HImode:
48533 /* Required for "pack". */
48534 if (!TARGET_SSE4_1)
48535 return false;
48536 c = 0xffff;
48537 s = 16;
48538 half_mode = V4SImode;
48539 gen_and = gen_andv4si3;
48540 gen_pack = gen_sse4_1_packusdw;
48541 gen_shift = gen_lshrv4si3;
48542 break;
48543 case V16QImode:
48544 /* No check as all instructions are SSE2. */
48545 c = 0xff;
48546 s = 8;
48547 half_mode = V8HImode;
48548 gen_and = gen_andv8hi3;
48549 gen_pack = gen_sse2_packuswb;
48550 gen_shift = gen_lshrv8hi3;
48551 break;
48552 case V16HImode:
48553 if (!TARGET_AVX2)
48554 return false;
48555 c = 0xffff;
48556 s = 16;
48557 half_mode = V8SImode;
48558 gen_and = gen_andv8si3;
48559 gen_pack = gen_avx2_packusdw;
48560 gen_shift = gen_lshrv8si3;
48561 end_perm = true;
48562 break;
48563 case V32QImode:
48564 if (!TARGET_AVX2)
48565 return false;
48566 c = 0xff;
48567 s = 8;
48568 half_mode = V16HImode;
48569 gen_and = gen_andv16hi3;
48570 gen_pack = gen_avx2_packuswb;
48571 gen_shift = gen_lshrv16hi3;
48572 end_perm = true;
48573 break;
48574 default:
48575 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
48576 general shuffles. */
48577 return false;
48580 /* Check that permutation is even or odd. */
48581 odd = d->perm[0];
48582 if (odd > 1)
48583 return false;
48585 for (i = 1; i < nelt; ++i)
48586 if (d->perm[i] != 2 * i + odd)
48587 return false;
48589 if (d->testing_p)
48590 return true;
48592 dop0 = gen_reg_rtx (half_mode);
48593 dop1 = gen_reg_rtx (half_mode);
48594 if (odd == 0)
48596 for (i = 0; i < nelt / 2; i++)
48597 rperm[i] = GEN_INT (c);
48598 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
48599 t = force_reg (half_mode, t);
48600 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
48601 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
48603 else
48605 emit_insn (gen_shift (dop0,
48606 gen_lowpart (half_mode, d->op0),
48607 GEN_INT (s)));
48608 emit_insn (gen_shift (dop1,
48609 gen_lowpart (half_mode, d->op1),
48610 GEN_INT (s)));
48612 /* In AVX2 for 256 bit case we need to permute pack result. */
48613 if (TARGET_AVX2 && end_perm)
48615 op = gen_reg_rtx (d->vmode);
48616 t = gen_reg_rtx (V4DImode);
48617 emit_insn (gen_pack (op, dop0, dop1));
48618 emit_insn (gen_avx2_permv4di_1 (t,
48619 gen_lowpart (V4DImode, op),
48620 const0_rtx,
48621 const2_rtx,
48622 const1_rtx,
48623 GEN_INT (3)));
48624 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
48626 else
48627 emit_insn (gen_pack (d->target, dop0, dop1));
48629 return true;
48632 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
48633 and extract-odd permutations. */
48635 static bool
48636 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
48638 rtx t1, t2, t3, t4, t5;
48640 switch (d->vmode)
48642 case V4DFmode:
48643 if (d->testing_p)
48644 break;
48645 t1 = gen_reg_rtx (V4DFmode);
48646 t2 = gen_reg_rtx (V4DFmode);
48648 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48649 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
48650 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
48652 /* Now an unpck[lh]pd will produce the result required. */
48653 if (odd)
48654 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
48655 else
48656 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
48657 emit_insn (t3);
48658 break;
48660 case V8SFmode:
48662 int mask = odd ? 0xdd : 0x88;
48664 if (d->testing_p)
48665 break;
48666 t1 = gen_reg_rtx (V8SFmode);
48667 t2 = gen_reg_rtx (V8SFmode);
48668 t3 = gen_reg_rtx (V8SFmode);
48670 /* Shuffle within the 128-bit lanes to produce:
48671 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
48672 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
48673 GEN_INT (mask)));
48675 /* Shuffle the lanes around to produce:
48676 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
48677 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
48678 GEN_INT (0x3)));
48680 /* Shuffle within the 128-bit lanes to produce:
48681 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
48682 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
48684 /* Shuffle within the 128-bit lanes to produce:
48685 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
48686 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
48688 /* Shuffle the lanes around to produce:
48689 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
48690 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
48691 GEN_INT (0x20)));
48693 break;
48695 case V2DFmode:
48696 case V4SFmode:
48697 case V2DImode:
48698 case V4SImode:
48699 /* These are always directly implementable by expand_vec_perm_1. */
48700 gcc_unreachable ();
48702 case V8HImode:
48703 if (TARGET_SSE4_1)
48704 return expand_vec_perm_even_odd_pack (d);
48705 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
48706 return expand_vec_perm_pshufb2 (d);
48707 else
48709 if (d->testing_p)
48710 break;
48711 /* We need 2*log2(N)-1 operations to achieve odd/even
48712 with interleave. */
48713 t1 = gen_reg_rtx (V8HImode);
48714 t2 = gen_reg_rtx (V8HImode);
48715 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
48716 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
48717 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
48718 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
48719 if (odd)
48720 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
48721 else
48722 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
48723 emit_insn (t3);
48725 break;
48727 case V16QImode:
48728 return expand_vec_perm_even_odd_pack (d);
48730 case V16HImode:
48731 case V32QImode:
48732 return expand_vec_perm_even_odd_pack (d);
48734 case V4DImode:
48735 if (!TARGET_AVX2)
48737 struct expand_vec_perm_d d_copy = *d;
48738 d_copy.vmode = V4DFmode;
48739 if (d->testing_p)
48740 d_copy.target = gen_lowpart (V4DFmode, d->target);
48741 else
48742 d_copy.target = gen_reg_rtx (V4DFmode);
48743 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
48744 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
48745 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48747 if (!d->testing_p)
48748 emit_move_insn (d->target,
48749 gen_lowpart (V4DImode, d_copy.target));
48750 return true;
48752 return false;
48755 if (d->testing_p)
48756 break;
48758 t1 = gen_reg_rtx (V4DImode);
48759 t2 = gen_reg_rtx (V4DImode);
48761 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48762 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
48763 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
48765 /* Now an vpunpck[lh]qdq will produce the result required. */
48766 if (odd)
48767 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
48768 else
48769 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
48770 emit_insn (t3);
48771 break;
48773 case V8SImode:
48774 if (!TARGET_AVX2)
48776 struct expand_vec_perm_d d_copy = *d;
48777 d_copy.vmode = V8SFmode;
48778 if (d->testing_p)
48779 d_copy.target = gen_lowpart (V8SFmode, d->target);
48780 else
48781 d_copy.target = gen_reg_rtx (V8SFmode);
48782 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
48783 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
48784 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48786 if (!d->testing_p)
48787 emit_move_insn (d->target,
48788 gen_lowpart (V8SImode, d_copy.target));
48789 return true;
48791 return false;
48794 if (d->testing_p)
48795 break;
48797 t1 = gen_reg_rtx (V8SImode);
48798 t2 = gen_reg_rtx (V8SImode);
48799 t3 = gen_reg_rtx (V4DImode);
48800 t4 = gen_reg_rtx (V4DImode);
48801 t5 = gen_reg_rtx (V4DImode);
48803 /* Shuffle the lanes around into
48804 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
48805 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
48806 gen_lowpart (V4DImode, d->op1),
48807 GEN_INT (0x20)));
48808 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
48809 gen_lowpart (V4DImode, d->op1),
48810 GEN_INT (0x31)));
48812 /* Swap the 2nd and 3rd position in each lane into
48813 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
48814 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
48815 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48816 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
48817 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48819 /* Now an vpunpck[lh]qdq will produce
48820 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
48821 if (odd)
48822 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
48823 gen_lowpart (V4DImode, t2));
48824 else
48825 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
48826 gen_lowpart (V4DImode, t2));
48827 emit_insn (t3);
48828 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
48829 break;
48831 default:
48832 gcc_unreachable ();
48835 return true;
48838 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48839 extract-even and extract-odd permutations. */
48841 static bool
48842 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
48844 unsigned i, odd, nelt = d->nelt;
48846 odd = d->perm[0];
48847 if (odd != 0 && odd != 1)
48848 return false;
48850 for (i = 1; i < nelt; ++i)
48851 if (d->perm[i] != 2 * i + odd)
48852 return false;
48854 return expand_vec_perm_even_odd_1 (d, odd);
48857 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
48858 permutations. We assume that expand_vec_perm_1 has already failed. */
48860 static bool
48861 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
48863 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
48864 machine_mode vmode = d->vmode;
48865 unsigned char perm2[4];
48866 rtx op0 = d->op0, dest;
48867 bool ok;
48869 switch (vmode)
48871 case V4DFmode:
48872 case V8SFmode:
48873 /* These are special-cased in sse.md so that we can optionally
48874 use the vbroadcast instruction. They expand to two insns
48875 if the input happens to be in a register. */
48876 gcc_unreachable ();
48878 case V2DFmode:
48879 case V2DImode:
48880 case V4SFmode:
48881 case V4SImode:
48882 /* These are always implementable using standard shuffle patterns. */
48883 gcc_unreachable ();
48885 case V8HImode:
48886 case V16QImode:
48887 /* These can be implemented via interleave. We save one insn by
48888 stopping once we have promoted to V4SImode and then use pshufd. */
48889 if (d->testing_p)
48890 return true;
48893 rtx dest;
48894 rtx (*gen) (rtx, rtx, rtx)
48895 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
48896 : gen_vec_interleave_lowv8hi;
48898 if (elt >= nelt2)
48900 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
48901 : gen_vec_interleave_highv8hi;
48902 elt -= nelt2;
48904 nelt2 /= 2;
48906 dest = gen_reg_rtx (vmode);
48907 emit_insn (gen (dest, op0, op0));
48908 vmode = get_mode_wider_vector (vmode);
48909 op0 = gen_lowpart (vmode, dest);
48911 while (vmode != V4SImode);
48913 memset (perm2, elt, 4);
48914 dest = gen_reg_rtx (V4SImode);
48915 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
48916 gcc_assert (ok);
48917 if (!d->testing_p)
48918 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
48919 return true;
48921 case V64QImode:
48922 case V32QImode:
48923 case V16HImode:
48924 case V8SImode:
48925 case V4DImode:
48926 /* For AVX2 broadcasts of the first element vpbroadcast* or
48927 vpermq should be used by expand_vec_perm_1. */
48928 gcc_assert (!TARGET_AVX2 || d->perm[0]);
48929 return false;
48931 default:
48932 gcc_unreachable ();
48936 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48937 broadcast permutations. */
48939 static bool
48940 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
48942 unsigned i, elt, nelt = d->nelt;
48944 if (!d->one_operand_p)
48945 return false;
48947 elt = d->perm[0];
48948 for (i = 1; i < nelt; ++i)
48949 if (d->perm[i] != elt)
48950 return false;
48952 return expand_vec_perm_broadcast_1 (d);
48955 /* Implement arbitrary permutations of two V64QImode operands
48956 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
48957 static bool
48958 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
48960 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
48961 return false;
48963 if (d->testing_p)
48964 return true;
48966 struct expand_vec_perm_d ds[2];
48967 rtx rperm[128], vperm, target0, target1;
48968 unsigned int i, nelt;
48969 machine_mode vmode;
48971 nelt = d->nelt;
48972 vmode = V64QImode;
48974 for (i = 0; i < 2; i++)
48976 ds[i] = *d;
48977 ds[i].vmode = V32HImode;
48978 ds[i].nelt = 32;
48979 ds[i].target = gen_reg_rtx (V32HImode);
48980 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
48981 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
48984 /* Prepare permutations such that the first one takes care of
48985 putting the even bytes into the right positions or one higher
48986 positions (ds[0]) and the second one takes care of
48987 putting the odd bytes into the right positions or one below
48988 (ds[1]). */
48990 for (i = 0; i < nelt; i++)
48992 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
48993 if (i & 1)
48995 rperm[i] = constm1_rtx;
48996 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
48998 else
49000 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49001 rperm[i + 64] = constm1_rtx;
49005 bool ok = expand_vec_perm_1 (&ds[0]);
49006 gcc_assert (ok);
49007 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
49009 ok = expand_vec_perm_1 (&ds[1]);
49010 gcc_assert (ok);
49011 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
49013 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
49014 vperm = force_reg (vmode, vperm);
49015 target0 = gen_reg_rtx (V64QImode);
49016 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
49018 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
49019 vperm = force_reg (vmode, vperm);
49020 target1 = gen_reg_rtx (V64QImode);
49021 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
49023 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
49024 return true;
49027 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
49028 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
49029 all the shorter instruction sequences. */
49031 static bool
49032 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
49034 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
49035 unsigned int i, nelt, eltsz;
49036 bool used[4];
49038 if (!TARGET_AVX2
49039 || d->one_operand_p
49040 || (d->vmode != V32QImode && d->vmode != V16HImode))
49041 return false;
49043 if (d->testing_p)
49044 return true;
49046 nelt = d->nelt;
49047 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
49049 /* Generate 4 permutation masks. If the required element is within
49050 the same lane, it is shuffled in. If the required element from the
49051 other lane, force a zero by setting bit 7 in the permutation mask.
49052 In the other mask the mask has non-negative elements if element
49053 is requested from the other lane, but also moved to the other lane,
49054 so that the result of vpshufb can have the two V2TImode halves
49055 swapped. */
49056 m128 = GEN_INT (-128);
49057 for (i = 0; i < 32; ++i)
49059 rperm[0][i] = m128;
49060 rperm[1][i] = m128;
49061 rperm[2][i] = m128;
49062 rperm[3][i] = m128;
49064 used[0] = false;
49065 used[1] = false;
49066 used[2] = false;
49067 used[3] = false;
49068 for (i = 0; i < nelt; ++i)
49070 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
49071 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
49072 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
49074 for (j = 0; j < eltsz; ++j)
49075 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
49076 used[which] = true;
49079 for (i = 0; i < 2; ++i)
49081 if (!used[2 * i + 1])
49083 h[i] = NULL_RTX;
49084 continue;
49086 vperm = gen_rtx_CONST_VECTOR (V32QImode,
49087 gen_rtvec_v (32, rperm[2 * i + 1]));
49088 vperm = force_reg (V32QImode, vperm);
49089 h[i] = gen_reg_rtx (V32QImode);
49090 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49091 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
49094 /* Swap the 128-byte lanes of h[X]. */
49095 for (i = 0; i < 2; ++i)
49097 if (h[i] == NULL_RTX)
49098 continue;
49099 op = gen_reg_rtx (V4DImode);
49100 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
49101 const2_rtx, GEN_INT (3), const0_rtx,
49102 const1_rtx));
49103 h[i] = gen_lowpart (V32QImode, op);
49106 for (i = 0; i < 2; ++i)
49108 if (!used[2 * i])
49110 l[i] = NULL_RTX;
49111 continue;
49113 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
49114 vperm = force_reg (V32QImode, vperm);
49115 l[i] = gen_reg_rtx (V32QImode);
49116 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49117 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
49120 for (i = 0; i < 2; ++i)
49122 if (h[i] && l[i])
49124 op = gen_reg_rtx (V32QImode);
49125 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
49126 l[i] = op;
49128 else if (h[i])
49129 l[i] = h[i];
49132 gcc_assert (l[0] && l[1]);
49133 op = d->target;
49134 if (d->vmode != V32QImode)
49135 op = gen_reg_rtx (V32QImode);
49136 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
49137 if (op != d->target)
49138 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
49139 return true;
49142 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
49143 With all of the interface bits taken care of, perform the expansion
49144 in D and return true on success. */
49146 static bool
49147 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
49149 /* Try a single instruction expansion. */
49150 if (expand_vec_perm_1 (d))
49151 return true;
49153 /* Try sequences of two instructions. */
49155 if (expand_vec_perm_pshuflw_pshufhw (d))
49156 return true;
49158 if (expand_vec_perm_palignr (d, false))
49159 return true;
49161 if (expand_vec_perm_interleave2 (d))
49162 return true;
49164 if (expand_vec_perm_broadcast (d))
49165 return true;
49167 if (expand_vec_perm_vpermq_perm_1 (d))
49168 return true;
49170 if (expand_vec_perm_vperm2f128 (d))
49171 return true;
49173 if (expand_vec_perm_pblendv (d))
49174 return true;
49176 /* Try sequences of three instructions. */
49178 if (expand_vec_perm_even_odd_pack (d))
49179 return true;
49181 if (expand_vec_perm_2vperm2f128_vshuf (d))
49182 return true;
49184 if (expand_vec_perm_pshufb2 (d))
49185 return true;
49187 if (expand_vec_perm_interleave3 (d))
49188 return true;
49190 if (expand_vec_perm_vperm2f128_vblend (d))
49191 return true;
49193 /* Try sequences of four instructions. */
49195 if (expand_vec_perm_vpshufb2_vpermq (d))
49196 return true;
49198 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
49199 return true;
49201 if (expand_vec_perm_vpermi2_vpshub2 (d))
49202 return true;
49204 /* ??? Look for narrow permutations whose element orderings would
49205 allow the promotion to a wider mode. */
49207 /* ??? Look for sequences of interleave or a wider permute that place
49208 the data into the correct lanes for a half-vector shuffle like
49209 pshuf[lh]w or vpermilps. */
49211 /* ??? Look for sequences of interleave that produce the desired results.
49212 The combinatorics of punpck[lh] get pretty ugly... */
49214 if (expand_vec_perm_even_odd (d))
49215 return true;
49217 /* Even longer sequences. */
49218 if (expand_vec_perm_vpshufb4_vpermq2 (d))
49219 return true;
49221 return false;
49224 /* If a permutation only uses one operand, make it clear. Returns true
49225 if the permutation references both operands. */
49227 static bool
49228 canonicalize_perm (struct expand_vec_perm_d *d)
49230 int i, which, nelt = d->nelt;
49232 for (i = which = 0; i < nelt; ++i)
49233 which |= (d->perm[i] < nelt ? 1 : 2);
49235 d->one_operand_p = true;
49236 switch (which)
49238 default:
49239 gcc_unreachable();
49241 case 3:
49242 if (!rtx_equal_p (d->op0, d->op1))
49244 d->one_operand_p = false;
49245 break;
49247 /* The elements of PERM do not suggest that only the first operand
49248 is used, but both operands are identical. Allow easier matching
49249 of the permutation by folding the permutation into the single
49250 input vector. */
49251 /* FALLTHRU */
49253 case 2:
49254 for (i = 0; i < nelt; ++i)
49255 d->perm[i] &= nelt - 1;
49256 d->op0 = d->op1;
49257 break;
49259 case 1:
49260 d->op1 = d->op0;
49261 break;
49264 return (which == 3);
49267 bool
49268 ix86_expand_vec_perm_const (rtx operands[4])
49270 struct expand_vec_perm_d d;
49271 unsigned char perm[MAX_VECT_LEN];
49272 int i, nelt;
49273 bool two_args;
49274 rtx sel;
49276 d.target = operands[0];
49277 d.op0 = operands[1];
49278 d.op1 = operands[2];
49279 sel = operands[3];
49281 d.vmode = GET_MODE (d.target);
49282 gcc_assert (VECTOR_MODE_P (d.vmode));
49283 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49284 d.testing_p = false;
49286 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
49287 gcc_assert (XVECLEN (sel, 0) == nelt);
49288 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
49290 for (i = 0; i < nelt; ++i)
49292 rtx e = XVECEXP (sel, 0, i);
49293 int ei = INTVAL (e) & (2 * nelt - 1);
49294 d.perm[i] = ei;
49295 perm[i] = ei;
49298 two_args = canonicalize_perm (&d);
49300 if (ix86_expand_vec_perm_const_1 (&d))
49301 return true;
49303 /* If the selector says both arguments are needed, but the operands are the
49304 same, the above tried to expand with one_operand_p and flattened selector.
49305 If that didn't work, retry without one_operand_p; we succeeded with that
49306 during testing. */
49307 if (two_args && d.one_operand_p)
49309 d.one_operand_p = false;
49310 memcpy (d.perm, perm, sizeof (perm));
49311 return ix86_expand_vec_perm_const_1 (&d);
49314 return false;
49317 /* Implement targetm.vectorize.vec_perm_const_ok. */
49319 static bool
49320 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
49321 const unsigned char *sel)
49323 struct expand_vec_perm_d d;
49324 unsigned int i, nelt, which;
49325 bool ret;
49327 d.vmode = vmode;
49328 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49329 d.testing_p = true;
49331 /* Given sufficient ISA support we can just return true here
49332 for selected vector modes. */
49333 switch (d.vmode)
49335 case V16SFmode:
49336 case V16SImode:
49337 case V8DImode:
49338 case V8DFmode:
49339 if (TARGET_AVX512F)
49340 /* All implementable with a single vpermi2 insn. */
49341 return true;
49342 break;
49343 case V32HImode:
49344 if (TARGET_AVX512BW)
49345 /* All implementable with a single vpermi2 insn. */
49346 return true;
49347 break;
49348 case V64QImode:
49349 if (TARGET_AVX512BW)
49350 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
49351 return true;
49352 break;
49353 case V8SImode:
49354 case V8SFmode:
49355 case V4DFmode:
49356 case V4DImode:
49357 if (TARGET_AVX512VL)
49358 /* All implementable with a single vpermi2 insn. */
49359 return true;
49360 break;
49361 case V16HImode:
49362 if (TARGET_AVX2)
49363 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49364 return true;
49365 break;
49366 case V32QImode:
49367 if (TARGET_AVX2)
49368 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49369 return true;
49370 break;
49371 case V4SImode:
49372 case V4SFmode:
49373 case V8HImode:
49374 case V16QImode:
49375 /* All implementable with a single vpperm insn. */
49376 if (TARGET_XOP)
49377 return true;
49378 /* All implementable with 2 pshufb + 1 ior. */
49379 if (TARGET_SSSE3)
49380 return true;
49381 break;
49382 case V2DImode:
49383 case V2DFmode:
49384 /* All implementable with shufpd or unpck[lh]pd. */
49385 return true;
49386 default:
49387 return false;
49390 /* Extract the values from the vector CST into the permutation
49391 array in D. */
49392 memcpy (d.perm, sel, nelt);
49393 for (i = which = 0; i < nelt; ++i)
49395 unsigned char e = d.perm[i];
49396 gcc_assert (e < 2 * nelt);
49397 which |= (e < nelt ? 1 : 2);
49400 /* For all elements from second vector, fold the elements to first. */
49401 if (which == 2)
49402 for (i = 0; i < nelt; ++i)
49403 d.perm[i] -= nelt;
49405 /* Check whether the mask can be applied to the vector type. */
49406 d.one_operand_p = (which != 3);
49408 /* Implementable with shufps or pshufd. */
49409 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49410 return true;
49412 /* Otherwise we have to go through the motions and see if we can
49413 figure out how to generate the requested permutation. */
49414 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49415 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49416 if (!d.one_operand_p)
49417 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49419 start_sequence ();
49420 ret = ix86_expand_vec_perm_const_1 (&d);
49421 end_sequence ();
49423 return ret;
49426 void
49427 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49429 struct expand_vec_perm_d d;
49430 unsigned i, nelt;
49432 d.target = targ;
49433 d.op0 = op0;
49434 d.op1 = op1;
49435 d.vmode = GET_MODE (targ);
49436 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49437 d.one_operand_p = false;
49438 d.testing_p = false;
49440 for (i = 0; i < nelt; ++i)
49441 d.perm[i] = i * 2 + odd;
49443 /* We'll either be able to implement the permutation directly... */
49444 if (expand_vec_perm_1 (&d))
49445 return;
49447 /* ... or we use the special-case patterns. */
49448 expand_vec_perm_even_odd_1 (&d, odd);
49451 static void
49452 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49454 struct expand_vec_perm_d d;
49455 unsigned i, nelt, base;
49456 bool ok;
49458 d.target = targ;
49459 d.op0 = op0;
49460 d.op1 = op1;
49461 d.vmode = GET_MODE (targ);
49462 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49463 d.one_operand_p = false;
49464 d.testing_p = false;
49466 base = high_p ? nelt / 2 : 0;
49467 for (i = 0; i < nelt / 2; ++i)
49469 d.perm[i * 2] = i + base;
49470 d.perm[i * 2 + 1] = i + base + nelt;
49473 /* Note that for AVX this isn't one instruction. */
49474 ok = ix86_expand_vec_perm_const_1 (&d);
49475 gcc_assert (ok);
49479 /* Expand a vector operation CODE for a V*QImode in terms of the
49480 same operation on V*HImode. */
49482 void
49483 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49485 machine_mode qimode = GET_MODE (dest);
49486 machine_mode himode;
49487 rtx (*gen_il) (rtx, rtx, rtx);
49488 rtx (*gen_ih) (rtx, rtx, rtx);
49489 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49490 struct expand_vec_perm_d d;
49491 bool ok, full_interleave;
49492 bool uns_p = false;
49493 int i;
49495 switch (qimode)
49497 case V16QImode:
49498 himode = V8HImode;
49499 gen_il = gen_vec_interleave_lowv16qi;
49500 gen_ih = gen_vec_interleave_highv16qi;
49501 break;
49502 case V32QImode:
49503 himode = V16HImode;
49504 gen_il = gen_avx2_interleave_lowv32qi;
49505 gen_ih = gen_avx2_interleave_highv32qi;
49506 break;
49507 case V64QImode:
49508 himode = V32HImode;
49509 gen_il = gen_avx512bw_interleave_lowv64qi;
49510 gen_ih = gen_avx512bw_interleave_highv64qi;
49511 break;
49512 default:
49513 gcc_unreachable ();
49516 op2_l = op2_h = op2;
49517 switch (code)
49519 case MULT:
49520 /* Unpack data such that we've got a source byte in each low byte of
49521 each word. We don't care what goes into the high byte of each word.
49522 Rather than trying to get zero in there, most convenient is to let
49523 it be a copy of the low byte. */
49524 op2_l = gen_reg_rtx (qimode);
49525 op2_h = gen_reg_rtx (qimode);
49526 emit_insn (gen_il (op2_l, op2, op2));
49527 emit_insn (gen_ih (op2_h, op2, op2));
49528 /* FALLTHRU */
49530 op1_l = gen_reg_rtx (qimode);
49531 op1_h = gen_reg_rtx (qimode);
49532 emit_insn (gen_il (op1_l, op1, op1));
49533 emit_insn (gen_ih (op1_h, op1, op1));
49534 full_interleave = qimode == V16QImode;
49535 break;
49537 case ASHIFT:
49538 case LSHIFTRT:
49539 uns_p = true;
49540 /* FALLTHRU */
49541 case ASHIFTRT:
49542 op1_l = gen_reg_rtx (himode);
49543 op1_h = gen_reg_rtx (himode);
49544 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
49545 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
49546 full_interleave = true;
49547 break;
49548 default:
49549 gcc_unreachable ();
49552 /* Perform the operation. */
49553 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
49554 1, OPTAB_DIRECT);
49555 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
49556 1, OPTAB_DIRECT);
49557 gcc_assert (res_l && res_h);
49559 /* Merge the data back into the right place. */
49560 d.target = dest;
49561 d.op0 = gen_lowpart (qimode, res_l);
49562 d.op1 = gen_lowpart (qimode, res_h);
49563 d.vmode = qimode;
49564 d.nelt = GET_MODE_NUNITS (qimode);
49565 d.one_operand_p = false;
49566 d.testing_p = false;
49568 if (full_interleave)
49570 /* For SSE2, we used an full interleave, so the desired
49571 results are in the even elements. */
49572 for (i = 0; i < 64; ++i)
49573 d.perm[i] = i * 2;
49575 else
49577 /* For AVX, the interleave used above was not cross-lane. So the
49578 extraction is evens but with the second and third quarter swapped.
49579 Happily, that is even one insn shorter than even extraction. */
49580 for (i = 0; i < 64; ++i)
49581 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
49584 ok = ix86_expand_vec_perm_const_1 (&d);
49585 gcc_assert (ok);
49587 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49588 gen_rtx_fmt_ee (code, qimode, op1, op2));
49591 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
49592 if op is CONST_VECTOR with all odd elements equal to their
49593 preceding element. */
49595 static bool
49596 const_vector_equal_evenodd_p (rtx op)
49598 machine_mode mode = GET_MODE (op);
49599 int i, nunits = GET_MODE_NUNITS (mode);
49600 if (GET_CODE (op) != CONST_VECTOR
49601 || nunits != CONST_VECTOR_NUNITS (op))
49602 return false;
49603 for (i = 0; i < nunits; i += 2)
49604 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
49605 return false;
49606 return true;
49609 void
49610 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
49611 bool uns_p, bool odd_p)
49613 machine_mode mode = GET_MODE (op1);
49614 machine_mode wmode = GET_MODE (dest);
49615 rtx x;
49616 rtx orig_op1 = op1, orig_op2 = op2;
49618 if (!nonimmediate_operand (op1, mode))
49619 op1 = force_reg (mode, op1);
49620 if (!nonimmediate_operand (op2, mode))
49621 op2 = force_reg (mode, op2);
49623 /* We only play even/odd games with vectors of SImode. */
49624 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
49626 /* If we're looking for the odd results, shift those members down to
49627 the even slots. For some cpus this is faster than a PSHUFD. */
49628 if (odd_p)
49630 /* For XOP use vpmacsdqh, but only for smult, as it is only
49631 signed. */
49632 if (TARGET_XOP && mode == V4SImode && !uns_p)
49634 x = force_reg (wmode, CONST0_RTX (wmode));
49635 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
49636 return;
49639 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
49640 if (!const_vector_equal_evenodd_p (orig_op1))
49641 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
49642 x, NULL, 1, OPTAB_DIRECT);
49643 if (!const_vector_equal_evenodd_p (orig_op2))
49644 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
49645 x, NULL, 1, OPTAB_DIRECT);
49646 op1 = gen_lowpart (mode, op1);
49647 op2 = gen_lowpart (mode, op2);
49650 if (mode == V16SImode)
49652 if (uns_p)
49653 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
49654 else
49655 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
49657 else if (mode == V8SImode)
49659 if (uns_p)
49660 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
49661 else
49662 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
49664 else if (uns_p)
49665 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
49666 else if (TARGET_SSE4_1)
49667 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
49668 else
49670 rtx s1, s2, t0, t1, t2;
49672 /* The easiest way to implement this without PMULDQ is to go through
49673 the motions as if we are performing a full 64-bit multiply. With
49674 the exception that we need to do less shuffling of the elements. */
49676 /* Compute the sign-extension, aka highparts, of the two operands. */
49677 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49678 op1, pc_rtx, pc_rtx);
49679 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49680 op2, pc_rtx, pc_rtx);
49682 /* Multiply LO(A) * HI(B), and vice-versa. */
49683 t1 = gen_reg_rtx (wmode);
49684 t2 = gen_reg_rtx (wmode);
49685 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
49686 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
49688 /* Multiply LO(A) * LO(B). */
49689 t0 = gen_reg_rtx (wmode);
49690 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
49692 /* Combine and shift the highparts into place. */
49693 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
49694 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
49695 1, OPTAB_DIRECT);
49697 /* Combine high and low parts. */
49698 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
49699 return;
49701 emit_insn (x);
49704 void
49705 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
49706 bool uns_p, bool high_p)
49708 machine_mode wmode = GET_MODE (dest);
49709 machine_mode mode = GET_MODE (op1);
49710 rtx t1, t2, t3, t4, mask;
49712 switch (mode)
49714 case V4SImode:
49715 t1 = gen_reg_rtx (mode);
49716 t2 = gen_reg_rtx (mode);
49717 if (TARGET_XOP && !uns_p)
49719 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
49720 shuffle the elements once so that all elements are in the right
49721 place for immediate use: { A C B D }. */
49722 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
49723 const1_rtx, GEN_INT (3)));
49724 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
49725 const1_rtx, GEN_INT (3)));
49727 else
49729 /* Put the elements into place for the multiply. */
49730 ix86_expand_vec_interleave (t1, op1, op1, high_p);
49731 ix86_expand_vec_interleave (t2, op2, op2, high_p);
49732 high_p = false;
49734 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
49735 break;
49737 case V8SImode:
49738 /* Shuffle the elements between the lanes. After this we
49739 have { A B E F | C D G H } for each operand. */
49740 t1 = gen_reg_rtx (V4DImode);
49741 t2 = gen_reg_rtx (V4DImode);
49742 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
49743 const0_rtx, const2_rtx,
49744 const1_rtx, GEN_INT (3)));
49745 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
49746 const0_rtx, const2_rtx,
49747 const1_rtx, GEN_INT (3)));
49749 /* Shuffle the elements within the lanes. After this we
49750 have { A A B B | C C D D } or { E E F F | G G H H }. */
49751 t3 = gen_reg_rtx (V8SImode);
49752 t4 = gen_reg_rtx (V8SImode);
49753 mask = GEN_INT (high_p
49754 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
49755 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
49756 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
49757 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
49759 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
49760 break;
49762 case V8HImode:
49763 case V16HImode:
49764 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
49765 uns_p, OPTAB_DIRECT);
49766 t2 = expand_binop (mode,
49767 uns_p ? umul_highpart_optab : smul_highpart_optab,
49768 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
49769 gcc_assert (t1 && t2);
49771 t3 = gen_reg_rtx (mode);
49772 ix86_expand_vec_interleave (t3, t1, t2, high_p);
49773 emit_move_insn (dest, gen_lowpart (wmode, t3));
49774 break;
49776 case V16QImode:
49777 case V32QImode:
49778 case V32HImode:
49779 case V16SImode:
49780 case V64QImode:
49781 t1 = gen_reg_rtx (wmode);
49782 t2 = gen_reg_rtx (wmode);
49783 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
49784 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
49786 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
49787 break;
49789 default:
49790 gcc_unreachable ();
49794 void
49795 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
49797 rtx res_1, res_2, res_3, res_4;
49799 res_1 = gen_reg_rtx (V4SImode);
49800 res_2 = gen_reg_rtx (V4SImode);
49801 res_3 = gen_reg_rtx (V2DImode);
49802 res_4 = gen_reg_rtx (V2DImode);
49803 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
49804 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
49806 /* Move the results in element 2 down to element 1; we don't care
49807 what goes in elements 2 and 3. Then we can merge the parts
49808 back together with an interleave.
49810 Note that two other sequences were tried:
49811 (1) Use interleaves at the start instead of psrldq, which allows
49812 us to use a single shufps to merge things back at the end.
49813 (2) Use shufps here to combine the two vectors, then pshufd to
49814 put the elements in the correct order.
49815 In both cases the cost of the reformatting stall was too high
49816 and the overall sequence slower. */
49818 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
49819 const0_rtx, const2_rtx,
49820 const0_rtx, const0_rtx));
49821 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
49822 const0_rtx, const2_rtx,
49823 const0_rtx, const0_rtx));
49824 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
49826 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
49829 void
49830 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
49832 machine_mode mode = GET_MODE (op0);
49833 rtx t1, t2, t3, t4, t5, t6;
49835 if (TARGET_AVX512DQ && mode == V8DImode)
49836 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
49837 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
49838 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
49839 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
49840 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
49841 else if (TARGET_XOP && mode == V2DImode)
49843 /* op1: A,B,C,D, op2: E,F,G,H */
49844 op1 = gen_lowpart (V4SImode, op1);
49845 op2 = gen_lowpart (V4SImode, op2);
49847 t1 = gen_reg_rtx (V4SImode);
49848 t2 = gen_reg_rtx (V4SImode);
49849 t3 = gen_reg_rtx (V2DImode);
49850 t4 = gen_reg_rtx (V2DImode);
49852 /* t1: B,A,D,C */
49853 emit_insn (gen_sse2_pshufd_1 (t1, op1,
49854 GEN_INT (1),
49855 GEN_INT (0),
49856 GEN_INT (3),
49857 GEN_INT (2)));
49859 /* t2: (B*E),(A*F),(D*G),(C*H) */
49860 emit_insn (gen_mulv4si3 (t2, t1, op2));
49862 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
49863 emit_insn (gen_xop_phadddq (t3, t2));
49865 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
49866 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
49868 /* Multiply lower parts and add all */
49869 t5 = gen_reg_rtx (V2DImode);
49870 emit_insn (gen_vec_widen_umult_even_v4si (t5,
49871 gen_lowpart (V4SImode, op1),
49872 gen_lowpart (V4SImode, op2)));
49873 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
49876 else
49878 machine_mode nmode;
49879 rtx (*umul) (rtx, rtx, rtx);
49881 if (mode == V2DImode)
49883 umul = gen_vec_widen_umult_even_v4si;
49884 nmode = V4SImode;
49886 else if (mode == V4DImode)
49888 umul = gen_vec_widen_umult_even_v8si;
49889 nmode = V8SImode;
49891 else if (mode == V8DImode)
49893 umul = gen_vec_widen_umult_even_v16si;
49894 nmode = V16SImode;
49896 else
49897 gcc_unreachable ();
49900 /* Multiply low parts. */
49901 t1 = gen_reg_rtx (mode);
49902 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
49904 /* Shift input vectors right 32 bits so we can multiply high parts. */
49905 t6 = GEN_INT (32);
49906 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
49907 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
49909 /* Multiply high parts by low parts. */
49910 t4 = gen_reg_rtx (mode);
49911 t5 = gen_reg_rtx (mode);
49912 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
49913 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
49915 /* Combine and shift the highparts back. */
49916 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
49917 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
49919 /* Combine high and low parts. */
49920 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
49923 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49924 gen_rtx_MULT (mode, op1, op2));
49927 /* Return 1 if control tansfer instruction INSN
49928 should be encoded with bnd prefix.
49929 If insn is NULL then return 1 when control
49930 transfer instructions should be prefixed with
49931 bnd by default for current function. */
49933 bool
49934 ix86_bnd_prefixed_insn_p (rtx insn)
49936 /* For call insns check special flag. */
49937 if (insn && CALL_P (insn))
49939 rtx call = get_call_rtx_from (insn);
49940 if (call)
49941 return CALL_EXPR_WITH_BOUNDS_P (call);
49944 /* All other insns are prefixed only if function is instrumented. */
49945 return chkp_function_instrumented_p (current_function_decl);
49948 /* Calculate integer abs() using only SSE2 instructions. */
49950 void
49951 ix86_expand_sse2_abs (rtx target, rtx input)
49953 machine_mode mode = GET_MODE (target);
49954 rtx tmp0, tmp1, x;
49956 switch (mode)
49958 /* For 32-bit signed integer X, the best way to calculate the absolute
49959 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
49960 case V4SImode:
49961 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
49962 GEN_INT (GET_MODE_BITSIZE
49963 (GET_MODE_INNER (mode)) - 1),
49964 NULL, 0, OPTAB_DIRECT);
49965 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
49966 NULL, 0, OPTAB_DIRECT);
49967 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
49968 target, 0, OPTAB_DIRECT);
49969 break;
49971 /* For 16-bit signed integer X, the best way to calculate the absolute
49972 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
49973 case V8HImode:
49974 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
49976 x = expand_simple_binop (mode, SMAX, tmp0, input,
49977 target, 0, OPTAB_DIRECT);
49978 break;
49980 /* For 8-bit signed integer X, the best way to calculate the absolute
49981 value of X is min ((unsigned char) X, (unsigned char) (-X)),
49982 as SSE2 provides the PMINUB insn. */
49983 case V16QImode:
49984 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
49986 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
49987 target, 0, OPTAB_DIRECT);
49988 break;
49990 default:
49991 gcc_unreachable ();
49994 if (x != target)
49995 emit_move_insn (target, x);
49998 /* Expand an insert into a vector register through pinsr insn.
49999 Return true if successful. */
50001 bool
50002 ix86_expand_pinsr (rtx *operands)
50004 rtx dst = operands[0];
50005 rtx src = operands[3];
50007 unsigned int size = INTVAL (operands[1]);
50008 unsigned int pos = INTVAL (operands[2]);
50010 if (GET_CODE (dst) == SUBREG)
50012 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
50013 dst = SUBREG_REG (dst);
50016 if (GET_CODE (src) == SUBREG)
50017 src = SUBREG_REG (src);
50019 switch (GET_MODE (dst))
50021 case V16QImode:
50022 case V8HImode:
50023 case V4SImode:
50024 case V2DImode:
50026 machine_mode srcmode, dstmode;
50027 rtx (*pinsr)(rtx, rtx, rtx, rtx);
50029 srcmode = mode_for_size (size, MODE_INT, 0);
50031 switch (srcmode)
50033 case QImode:
50034 if (!TARGET_SSE4_1)
50035 return false;
50036 dstmode = V16QImode;
50037 pinsr = gen_sse4_1_pinsrb;
50038 break;
50040 case HImode:
50041 if (!TARGET_SSE2)
50042 return false;
50043 dstmode = V8HImode;
50044 pinsr = gen_sse2_pinsrw;
50045 break;
50047 case SImode:
50048 if (!TARGET_SSE4_1)
50049 return false;
50050 dstmode = V4SImode;
50051 pinsr = gen_sse4_1_pinsrd;
50052 break;
50054 case DImode:
50055 gcc_assert (TARGET_64BIT);
50056 if (!TARGET_SSE4_1)
50057 return false;
50058 dstmode = V2DImode;
50059 pinsr = gen_sse4_1_pinsrq;
50060 break;
50062 default:
50063 return false;
50066 rtx d = dst;
50067 if (GET_MODE (dst) != dstmode)
50068 d = gen_reg_rtx (dstmode);
50069 src = gen_lowpart (srcmode, src);
50071 pos /= size;
50073 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
50074 GEN_INT (1 << pos)));
50075 if (d != dst)
50076 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
50077 return true;
50080 default:
50081 return false;
50085 /* This function returns the calling abi specific va_list type node.
50086 It returns the FNDECL specific va_list type. */
50088 static tree
50089 ix86_fn_abi_va_list (tree fndecl)
50091 if (!TARGET_64BIT)
50092 return va_list_type_node;
50093 gcc_assert (fndecl != NULL_TREE);
50095 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
50096 return ms_va_list_type_node;
50097 else
50098 return sysv_va_list_type_node;
50101 /* Returns the canonical va_list type specified by TYPE. If there
50102 is no valid TYPE provided, it return NULL_TREE. */
50104 static tree
50105 ix86_canonical_va_list_type (tree type)
50107 tree wtype, htype;
50109 /* Resolve references and pointers to va_list type. */
50110 if (TREE_CODE (type) == MEM_REF)
50111 type = TREE_TYPE (type);
50112 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
50113 type = TREE_TYPE (type);
50114 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
50115 type = TREE_TYPE (type);
50117 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
50119 wtype = va_list_type_node;
50120 gcc_assert (wtype != NULL_TREE);
50121 htype = type;
50122 if (TREE_CODE (wtype) == ARRAY_TYPE)
50124 /* If va_list is an array type, the argument may have decayed
50125 to a pointer type, e.g. by being passed to another function.
50126 In that case, unwrap both types so that we can compare the
50127 underlying records. */
50128 if (TREE_CODE (htype) == ARRAY_TYPE
50129 || POINTER_TYPE_P (htype))
50131 wtype = TREE_TYPE (wtype);
50132 htype = TREE_TYPE (htype);
50135 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50136 return va_list_type_node;
50137 wtype = sysv_va_list_type_node;
50138 gcc_assert (wtype != NULL_TREE);
50139 htype = type;
50140 if (TREE_CODE (wtype) == ARRAY_TYPE)
50142 /* If va_list is an array type, the argument may have decayed
50143 to a pointer type, e.g. by being passed to another function.
50144 In that case, unwrap both types so that we can compare the
50145 underlying records. */
50146 if (TREE_CODE (htype) == ARRAY_TYPE
50147 || POINTER_TYPE_P (htype))
50149 wtype = TREE_TYPE (wtype);
50150 htype = TREE_TYPE (htype);
50153 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50154 return sysv_va_list_type_node;
50155 wtype = ms_va_list_type_node;
50156 gcc_assert (wtype != NULL_TREE);
50157 htype = type;
50158 if (TREE_CODE (wtype) == ARRAY_TYPE)
50160 /* If va_list is an array type, the argument may have decayed
50161 to a pointer type, e.g. by being passed to another function.
50162 In that case, unwrap both types so that we can compare the
50163 underlying records. */
50164 if (TREE_CODE (htype) == ARRAY_TYPE
50165 || POINTER_TYPE_P (htype))
50167 wtype = TREE_TYPE (wtype);
50168 htype = TREE_TYPE (htype);
50171 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50172 return ms_va_list_type_node;
50173 return NULL_TREE;
50175 return std_canonical_va_list_type (type);
50178 /* Iterate through the target-specific builtin types for va_list.
50179 IDX denotes the iterator, *PTREE is set to the result type of
50180 the va_list builtin, and *PNAME to its internal type.
50181 Returns zero if there is no element for this index, otherwise
50182 IDX should be increased upon the next call.
50183 Note, do not iterate a base builtin's name like __builtin_va_list.
50184 Used from c_common_nodes_and_builtins. */
50186 static int
50187 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
50189 if (TARGET_64BIT)
50191 switch (idx)
50193 default:
50194 break;
50196 case 0:
50197 *ptree = ms_va_list_type_node;
50198 *pname = "__builtin_ms_va_list";
50199 return 1;
50201 case 1:
50202 *ptree = sysv_va_list_type_node;
50203 *pname = "__builtin_sysv_va_list";
50204 return 1;
50208 return 0;
50211 #undef TARGET_SCHED_DISPATCH
50212 #define TARGET_SCHED_DISPATCH has_dispatch
50213 #undef TARGET_SCHED_DISPATCH_DO
50214 #define TARGET_SCHED_DISPATCH_DO do_dispatch
50215 #undef TARGET_SCHED_REASSOCIATION_WIDTH
50216 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50217 #undef TARGET_SCHED_REORDER
50218 #define TARGET_SCHED_REORDER ix86_sched_reorder
50219 #undef TARGET_SCHED_ADJUST_PRIORITY
50220 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50221 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50222 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50223 ix86_dependencies_evaluation_hook
50225 /* The size of the dispatch window is the total number of bytes of
50226 object code allowed in a window. */
50227 #define DISPATCH_WINDOW_SIZE 16
50229 /* Number of dispatch windows considered for scheduling. */
50230 #define MAX_DISPATCH_WINDOWS 3
50232 /* Maximum number of instructions in a window. */
50233 #define MAX_INSN 4
50235 /* Maximum number of immediate operands in a window. */
50236 #define MAX_IMM 4
50238 /* Maximum number of immediate bits allowed in a window. */
50239 #define MAX_IMM_SIZE 128
50241 /* Maximum number of 32 bit immediates allowed in a window. */
50242 #define MAX_IMM_32 4
50244 /* Maximum number of 64 bit immediates allowed in a window. */
50245 #define MAX_IMM_64 2
50247 /* Maximum total of loads or prefetches allowed in a window. */
50248 #define MAX_LOAD 2
50250 /* Maximum total of stores allowed in a window. */
50251 #define MAX_STORE 1
50253 #undef BIG
50254 #define BIG 100
50257 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
50258 enum dispatch_group {
50259 disp_no_group = 0,
50260 disp_load,
50261 disp_store,
50262 disp_load_store,
50263 disp_prefetch,
50264 disp_imm,
50265 disp_imm_32,
50266 disp_imm_64,
50267 disp_branch,
50268 disp_cmp,
50269 disp_jcc,
50270 disp_last
50273 /* Number of allowable groups in a dispatch window. It is an array
50274 indexed by dispatch_group enum. 100 is used as a big number,
50275 because the number of these kind of operations does not have any
50276 effect in dispatch window, but we need them for other reasons in
50277 the table. */
50278 static unsigned int num_allowable_groups[disp_last] = {
50279 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
50282 char group_name[disp_last + 1][16] = {
50283 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
50284 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
50285 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
50288 /* Instruction path. */
50289 enum insn_path {
50290 no_path = 0,
50291 path_single, /* Single micro op. */
50292 path_double, /* Double micro op. */
50293 path_multi, /* Instructions with more than 2 micro op.. */
50294 last_path
50297 /* sched_insn_info defines a window to the instructions scheduled in
50298 the basic block. It contains a pointer to the insn_info table and
50299 the instruction scheduled.
50301 Windows are allocated for each basic block and are linked
50302 together. */
50303 typedef struct sched_insn_info_s {
50304 rtx insn;
50305 enum dispatch_group group;
50306 enum insn_path path;
50307 int byte_len;
50308 int imm_bytes;
50309 } sched_insn_info;
50311 /* Linked list of dispatch windows. This is a two way list of
50312 dispatch windows of a basic block. It contains information about
50313 the number of uops in the window and the total number of
50314 instructions and of bytes in the object code for this dispatch
50315 window. */
50316 typedef struct dispatch_windows_s {
50317 int num_insn; /* Number of insn in the window. */
50318 int num_uops; /* Number of uops in the window. */
50319 int window_size; /* Number of bytes in the window. */
50320 int window_num; /* Window number between 0 or 1. */
50321 int num_imm; /* Number of immediates in an insn. */
50322 int num_imm_32; /* Number of 32 bit immediates in an insn. */
50323 int num_imm_64; /* Number of 64 bit immediates in an insn. */
50324 int imm_size; /* Total immediates in the window. */
50325 int num_loads; /* Total memory loads in the window. */
50326 int num_stores; /* Total memory stores in the window. */
50327 int violation; /* Violation exists in window. */
50328 sched_insn_info *window; /* Pointer to the window. */
50329 struct dispatch_windows_s *next;
50330 struct dispatch_windows_s *prev;
50331 } dispatch_windows;
50333 /* Immediate valuse used in an insn. */
50334 typedef struct imm_info_s
50336 int imm;
50337 int imm32;
50338 int imm64;
50339 } imm_info;
50341 static dispatch_windows *dispatch_window_list;
50342 static dispatch_windows *dispatch_window_list1;
50344 /* Get dispatch group of insn. */
50346 static enum dispatch_group
50347 get_mem_group (rtx_insn *insn)
50349 enum attr_memory memory;
50351 if (INSN_CODE (insn) < 0)
50352 return disp_no_group;
50353 memory = get_attr_memory (insn);
50354 if (memory == MEMORY_STORE)
50355 return disp_store;
50357 if (memory == MEMORY_LOAD)
50358 return disp_load;
50360 if (memory == MEMORY_BOTH)
50361 return disp_load_store;
50363 return disp_no_group;
50366 /* Return true if insn is a compare instruction. */
50368 static bool
50369 is_cmp (rtx_insn *insn)
50371 enum attr_type type;
50373 type = get_attr_type (insn);
50374 return (type == TYPE_TEST
50375 || type == TYPE_ICMP
50376 || type == TYPE_FCMP
50377 || GET_CODE (PATTERN (insn)) == COMPARE);
50380 /* Return true if a dispatch violation encountered. */
50382 static bool
50383 dispatch_violation (void)
50385 if (dispatch_window_list->next)
50386 return dispatch_window_list->next->violation;
50387 return dispatch_window_list->violation;
50390 /* Return true if insn is a branch instruction. */
50392 static bool
50393 is_branch (rtx insn)
50395 return (CALL_P (insn) || JUMP_P (insn));
50398 /* Return true if insn is a prefetch instruction. */
50400 static bool
50401 is_prefetch (rtx insn)
50403 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
50406 /* This function initializes a dispatch window and the list container holding a
50407 pointer to the window. */
50409 static void
50410 init_window (int window_num)
50412 int i;
50413 dispatch_windows *new_list;
50415 if (window_num == 0)
50416 new_list = dispatch_window_list;
50417 else
50418 new_list = dispatch_window_list1;
50420 new_list->num_insn = 0;
50421 new_list->num_uops = 0;
50422 new_list->window_size = 0;
50423 new_list->next = NULL;
50424 new_list->prev = NULL;
50425 new_list->window_num = window_num;
50426 new_list->num_imm = 0;
50427 new_list->num_imm_32 = 0;
50428 new_list->num_imm_64 = 0;
50429 new_list->imm_size = 0;
50430 new_list->num_loads = 0;
50431 new_list->num_stores = 0;
50432 new_list->violation = false;
50434 for (i = 0; i < MAX_INSN; i++)
50436 new_list->window[i].insn = NULL;
50437 new_list->window[i].group = disp_no_group;
50438 new_list->window[i].path = no_path;
50439 new_list->window[i].byte_len = 0;
50440 new_list->window[i].imm_bytes = 0;
50442 return;
50445 /* This function allocates and initializes a dispatch window and the
50446 list container holding a pointer to the window. */
50448 static dispatch_windows *
50449 allocate_window (void)
50451 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
50452 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
50454 return new_list;
50457 /* This routine initializes the dispatch scheduling information. It
50458 initiates building dispatch scheduler tables and constructs the
50459 first dispatch window. */
50461 static void
50462 init_dispatch_sched (void)
50464 /* Allocate a dispatch list and a window. */
50465 dispatch_window_list = allocate_window ();
50466 dispatch_window_list1 = allocate_window ();
50467 init_window (0);
50468 init_window (1);
50471 /* This function returns true if a branch is detected. End of a basic block
50472 does not have to be a branch, but here we assume only branches end a
50473 window. */
50475 static bool
50476 is_end_basic_block (enum dispatch_group group)
50478 return group == disp_branch;
50481 /* This function is called when the end of a window processing is reached. */
50483 static void
50484 process_end_window (void)
50486 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
50487 if (dispatch_window_list->next)
50489 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
50490 gcc_assert (dispatch_window_list->window_size
50491 + dispatch_window_list1->window_size <= 48);
50492 init_window (1);
50494 init_window (0);
50497 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
50498 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
50499 for 48 bytes of instructions. Note that these windows are not dispatch
50500 windows that their sizes are DISPATCH_WINDOW_SIZE. */
50502 static dispatch_windows *
50503 allocate_next_window (int window_num)
50505 if (window_num == 0)
50507 if (dispatch_window_list->next)
50508 init_window (1);
50509 init_window (0);
50510 return dispatch_window_list;
50513 dispatch_window_list->next = dispatch_window_list1;
50514 dispatch_window_list1->prev = dispatch_window_list;
50516 return dispatch_window_list1;
50519 /* Compute number of immediate operands of an instruction. */
50521 static void
50522 find_constant (rtx in_rtx, imm_info *imm_values)
50524 if (INSN_P (in_rtx))
50525 in_rtx = PATTERN (in_rtx);
50526 subrtx_iterator::array_type array;
50527 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
50528 if (const_rtx x = *iter)
50529 switch (GET_CODE (x))
50531 case CONST:
50532 case SYMBOL_REF:
50533 case CONST_INT:
50534 (imm_values->imm)++;
50535 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
50536 (imm_values->imm32)++;
50537 else
50538 (imm_values->imm64)++;
50539 break;
50541 case CONST_DOUBLE:
50542 (imm_values->imm)++;
50543 (imm_values->imm64)++;
50544 break;
50546 case CODE_LABEL:
50547 if (LABEL_KIND (x) == LABEL_NORMAL)
50549 (imm_values->imm)++;
50550 (imm_values->imm32)++;
50552 break;
50554 default:
50555 break;
50559 /* Return total size of immediate operands of an instruction along with number
50560 of corresponding immediate-operands. It initializes its parameters to zero
50561 befor calling FIND_CONSTANT.
50562 INSN is the input instruction. IMM is the total of immediates.
50563 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
50564 bit immediates. */
50566 static int
50567 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
50569 imm_info imm_values = {0, 0, 0};
50571 find_constant (insn, &imm_values);
50572 *imm = imm_values.imm;
50573 *imm32 = imm_values.imm32;
50574 *imm64 = imm_values.imm64;
50575 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
50578 /* This function indicates if an operand of an instruction is an
50579 immediate. */
50581 static bool
50582 has_immediate (rtx insn)
50584 int num_imm_operand;
50585 int num_imm32_operand;
50586 int num_imm64_operand;
50588 if (insn)
50589 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50590 &num_imm64_operand);
50591 return false;
50594 /* Return single or double path for instructions. */
50596 static enum insn_path
50597 get_insn_path (rtx_insn *insn)
50599 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
50601 if ((int)path == 0)
50602 return path_single;
50604 if ((int)path == 1)
50605 return path_double;
50607 return path_multi;
50610 /* Return insn dispatch group. */
50612 static enum dispatch_group
50613 get_insn_group (rtx_insn *insn)
50615 enum dispatch_group group = get_mem_group (insn);
50616 if (group)
50617 return group;
50619 if (is_branch (insn))
50620 return disp_branch;
50622 if (is_cmp (insn))
50623 return disp_cmp;
50625 if (has_immediate (insn))
50626 return disp_imm;
50628 if (is_prefetch (insn))
50629 return disp_prefetch;
50631 return disp_no_group;
50634 /* Count number of GROUP restricted instructions in a dispatch
50635 window WINDOW_LIST. */
50637 static int
50638 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
50640 enum dispatch_group group = get_insn_group (insn);
50641 int imm_size;
50642 int num_imm_operand;
50643 int num_imm32_operand;
50644 int num_imm64_operand;
50646 if (group == disp_no_group)
50647 return 0;
50649 if (group == disp_imm)
50651 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50652 &num_imm64_operand);
50653 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
50654 || num_imm_operand + window_list->num_imm > MAX_IMM
50655 || (num_imm32_operand > 0
50656 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
50657 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
50658 || (num_imm64_operand > 0
50659 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
50660 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
50661 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
50662 && num_imm64_operand > 0
50663 && ((window_list->num_imm_64 > 0
50664 && window_list->num_insn >= 2)
50665 || window_list->num_insn >= 3)))
50666 return BIG;
50668 return 1;
50671 if ((group == disp_load_store
50672 && (window_list->num_loads >= MAX_LOAD
50673 || window_list->num_stores >= MAX_STORE))
50674 || ((group == disp_load
50675 || group == disp_prefetch)
50676 && window_list->num_loads >= MAX_LOAD)
50677 || (group == disp_store
50678 && window_list->num_stores >= MAX_STORE))
50679 return BIG;
50681 return 1;
50684 /* This function returns true if insn satisfies dispatch rules on the
50685 last window scheduled. */
50687 static bool
50688 fits_dispatch_window (rtx_insn *insn)
50690 dispatch_windows *window_list = dispatch_window_list;
50691 dispatch_windows *window_list_next = dispatch_window_list->next;
50692 unsigned int num_restrict;
50693 enum dispatch_group group = get_insn_group (insn);
50694 enum insn_path path = get_insn_path (insn);
50695 int sum;
50697 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
50698 instructions should be given the lowest priority in the
50699 scheduling process in Haifa scheduler to make sure they will be
50700 scheduled in the same dispatch window as the reference to them. */
50701 if (group == disp_jcc || group == disp_cmp)
50702 return false;
50704 /* Check nonrestricted. */
50705 if (group == disp_no_group || group == disp_branch)
50706 return true;
50708 /* Get last dispatch window. */
50709 if (window_list_next)
50710 window_list = window_list_next;
50712 if (window_list->window_num == 1)
50714 sum = window_list->prev->window_size + window_list->window_size;
50716 if (sum == 32
50717 || (min_insn_size (insn) + sum) >= 48)
50718 /* Window 1 is full. Go for next window. */
50719 return true;
50722 num_restrict = count_num_restricted (insn, window_list);
50724 if (num_restrict > num_allowable_groups[group])
50725 return false;
50727 /* See if it fits in the first window. */
50728 if (window_list->window_num == 0)
50730 /* The first widow should have only single and double path
50731 uops. */
50732 if (path == path_double
50733 && (window_list->num_uops + 2) > MAX_INSN)
50734 return false;
50735 else if (path != path_single)
50736 return false;
50738 return true;
50741 /* Add an instruction INSN with NUM_UOPS micro-operations to the
50742 dispatch window WINDOW_LIST. */
50744 static void
50745 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
50747 int byte_len = min_insn_size (insn);
50748 int num_insn = window_list->num_insn;
50749 int imm_size;
50750 sched_insn_info *window = window_list->window;
50751 enum dispatch_group group = get_insn_group (insn);
50752 enum insn_path path = get_insn_path (insn);
50753 int num_imm_operand;
50754 int num_imm32_operand;
50755 int num_imm64_operand;
50757 if (!window_list->violation && group != disp_cmp
50758 && !fits_dispatch_window (insn))
50759 window_list->violation = true;
50761 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50762 &num_imm64_operand);
50764 /* Initialize window with new instruction. */
50765 window[num_insn].insn = insn;
50766 window[num_insn].byte_len = byte_len;
50767 window[num_insn].group = group;
50768 window[num_insn].path = path;
50769 window[num_insn].imm_bytes = imm_size;
50771 window_list->window_size += byte_len;
50772 window_list->num_insn = num_insn + 1;
50773 window_list->num_uops = window_list->num_uops + num_uops;
50774 window_list->imm_size += imm_size;
50775 window_list->num_imm += num_imm_operand;
50776 window_list->num_imm_32 += num_imm32_operand;
50777 window_list->num_imm_64 += num_imm64_operand;
50779 if (group == disp_store)
50780 window_list->num_stores += 1;
50781 else if (group == disp_load
50782 || group == disp_prefetch)
50783 window_list->num_loads += 1;
50784 else if (group == disp_load_store)
50786 window_list->num_stores += 1;
50787 window_list->num_loads += 1;
50791 /* Adds a scheduled instruction, INSN, to the current dispatch window.
50792 If the total bytes of instructions or the number of instructions in
50793 the window exceed allowable, it allocates a new window. */
50795 static void
50796 add_to_dispatch_window (rtx_insn *insn)
50798 int byte_len;
50799 dispatch_windows *window_list;
50800 dispatch_windows *next_list;
50801 dispatch_windows *window0_list;
50802 enum insn_path path;
50803 enum dispatch_group insn_group;
50804 bool insn_fits;
50805 int num_insn;
50806 int num_uops;
50807 int window_num;
50808 int insn_num_uops;
50809 int sum;
50811 if (INSN_CODE (insn) < 0)
50812 return;
50814 byte_len = min_insn_size (insn);
50815 window_list = dispatch_window_list;
50816 next_list = window_list->next;
50817 path = get_insn_path (insn);
50818 insn_group = get_insn_group (insn);
50820 /* Get the last dispatch window. */
50821 if (next_list)
50822 window_list = dispatch_window_list->next;
50824 if (path == path_single)
50825 insn_num_uops = 1;
50826 else if (path == path_double)
50827 insn_num_uops = 2;
50828 else
50829 insn_num_uops = (int) path;
50831 /* If current window is full, get a new window.
50832 Window number zero is full, if MAX_INSN uops are scheduled in it.
50833 Window number one is full, if window zero's bytes plus window
50834 one's bytes is 32, or if the bytes of the new instruction added
50835 to the total makes it greater than 48, or it has already MAX_INSN
50836 instructions in it. */
50837 num_insn = window_list->num_insn;
50838 num_uops = window_list->num_uops;
50839 window_num = window_list->window_num;
50840 insn_fits = fits_dispatch_window (insn);
50842 if (num_insn >= MAX_INSN
50843 || num_uops + insn_num_uops > MAX_INSN
50844 || !(insn_fits))
50846 window_num = ~window_num & 1;
50847 window_list = allocate_next_window (window_num);
50850 if (window_num == 0)
50852 add_insn_window (insn, window_list, insn_num_uops);
50853 if (window_list->num_insn >= MAX_INSN
50854 && insn_group == disp_branch)
50856 process_end_window ();
50857 return;
50860 else if (window_num == 1)
50862 window0_list = window_list->prev;
50863 sum = window0_list->window_size + window_list->window_size;
50864 if (sum == 32
50865 || (byte_len + sum) >= 48)
50867 process_end_window ();
50868 window_list = dispatch_window_list;
50871 add_insn_window (insn, window_list, insn_num_uops);
50873 else
50874 gcc_unreachable ();
50876 if (is_end_basic_block (insn_group))
50878 /* End of basic block is reached do end-basic-block process. */
50879 process_end_window ();
50880 return;
50884 /* Print the dispatch window, WINDOW_NUM, to FILE. */
50886 DEBUG_FUNCTION static void
50887 debug_dispatch_window_file (FILE *file, int window_num)
50889 dispatch_windows *list;
50890 int i;
50892 if (window_num == 0)
50893 list = dispatch_window_list;
50894 else
50895 list = dispatch_window_list1;
50897 fprintf (file, "Window #%d:\n", list->window_num);
50898 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
50899 list->num_insn, list->num_uops, list->window_size);
50900 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
50901 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
50903 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
50904 list->num_stores);
50905 fprintf (file, " insn info:\n");
50907 for (i = 0; i < MAX_INSN; i++)
50909 if (!list->window[i].insn)
50910 break;
50911 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
50912 i, group_name[list->window[i].group],
50913 i, (void *)list->window[i].insn,
50914 i, list->window[i].path,
50915 i, list->window[i].byte_len,
50916 i, list->window[i].imm_bytes);
50920 /* Print to stdout a dispatch window. */
50922 DEBUG_FUNCTION void
50923 debug_dispatch_window (int window_num)
50925 debug_dispatch_window_file (stdout, window_num);
50928 /* Print INSN dispatch information to FILE. */
50930 DEBUG_FUNCTION static void
50931 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
50933 int byte_len;
50934 enum insn_path path;
50935 enum dispatch_group group;
50936 int imm_size;
50937 int num_imm_operand;
50938 int num_imm32_operand;
50939 int num_imm64_operand;
50941 if (INSN_CODE (insn) < 0)
50942 return;
50944 byte_len = min_insn_size (insn);
50945 path = get_insn_path (insn);
50946 group = get_insn_group (insn);
50947 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50948 &num_imm64_operand);
50950 fprintf (file, " insn info:\n");
50951 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
50952 group_name[group], path, byte_len);
50953 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
50954 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
50957 /* Print to STDERR the status of the ready list with respect to
50958 dispatch windows. */
50960 DEBUG_FUNCTION void
50961 debug_ready_dispatch (void)
50963 int i;
50964 int no_ready = number_in_ready ();
50966 fprintf (stdout, "Number of ready: %d\n", no_ready);
50968 for (i = 0; i < no_ready; i++)
50969 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
50972 /* This routine is the driver of the dispatch scheduler. */
50974 static void
50975 do_dispatch (rtx_insn *insn, int mode)
50977 if (mode == DISPATCH_INIT)
50978 init_dispatch_sched ();
50979 else if (mode == ADD_TO_DISPATCH_WINDOW)
50980 add_to_dispatch_window (insn);
50983 /* Return TRUE if Dispatch Scheduling is supported. */
50985 static bool
50986 has_dispatch (rtx_insn *insn, int action)
50988 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
50989 && flag_dispatch_scheduler)
50990 switch (action)
50992 default:
50993 return false;
50995 case IS_DISPATCH_ON:
50996 return true;
50997 break;
50999 case IS_CMP:
51000 return is_cmp (insn);
51002 case DISPATCH_VIOLATION:
51003 return dispatch_violation ();
51005 case FITS_DISPATCH_WINDOW:
51006 return fits_dispatch_window (insn);
51009 return false;
51012 /* Implementation of reassociation_width target hook used by
51013 reassoc phase to identify parallelism level in reassociated
51014 tree. Statements tree_code is passed in OPC. Arguments type
51015 is passed in MODE.
51017 Currently parallel reassociation is enabled for Atom
51018 processors only and we set reassociation width to be 2
51019 because Atom may issue up to 2 instructions per cycle.
51021 Return value should be fixed if parallel reassociation is
51022 enabled for other processors. */
51024 static int
51025 ix86_reassociation_width (unsigned int, machine_mode mode)
51027 int res = 1;
51029 /* Vector part. */
51030 if (VECTOR_MODE_P (mode))
51032 if (TARGET_VECTOR_PARALLEL_EXECUTION)
51033 return 2;
51034 else
51035 return 1;
51038 /* Scalar part. */
51039 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
51040 res = 2;
51041 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
51042 res = 2;
51044 return res;
51047 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
51048 place emms and femms instructions. */
51050 static machine_mode
51051 ix86_preferred_simd_mode (machine_mode mode)
51053 if (!TARGET_SSE)
51054 return word_mode;
51056 switch (mode)
51058 case QImode:
51059 return TARGET_AVX512BW ? V64QImode :
51060 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
51061 case HImode:
51062 return TARGET_AVX512BW ? V32HImode :
51063 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
51064 case SImode:
51065 return TARGET_AVX512F ? V16SImode :
51066 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
51067 case DImode:
51068 return TARGET_AVX512F ? V8DImode :
51069 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
51071 case SFmode:
51072 if (TARGET_AVX512F)
51073 return V16SFmode;
51074 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51075 return V8SFmode;
51076 else
51077 return V4SFmode;
51079 case DFmode:
51080 if (!TARGET_VECTORIZE_DOUBLE)
51081 return word_mode;
51082 else if (TARGET_AVX512F)
51083 return V8DFmode;
51084 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51085 return V4DFmode;
51086 else if (TARGET_SSE2)
51087 return V2DFmode;
51088 /* FALLTHRU */
51090 default:
51091 return word_mode;
51095 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
51096 vectors. If AVX512F is enabled then try vectorizing with 512bit,
51097 256bit and 128bit vectors. */
51099 static unsigned int
51100 ix86_autovectorize_vector_sizes (void)
51102 return TARGET_AVX512F ? 64 | 32 | 16 :
51103 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
51108 /* Return class of registers which could be used for pseudo of MODE
51109 and of class RCLASS for spilling instead of memory. Return NO_REGS
51110 if it is not possible or non-profitable. */
51111 static reg_class_t
51112 ix86_spill_class (reg_class_t rclass, machine_mode mode)
51114 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
51115 && (mode == SImode || (TARGET_64BIT && mode == DImode))
51116 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
51117 return ALL_SSE_REGS;
51118 return NO_REGS;
51121 /* Implement targetm.vectorize.init_cost. */
51123 static void *
51124 ix86_init_cost (struct loop *)
51126 unsigned *cost = XNEWVEC (unsigned, 3);
51127 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
51128 return cost;
51131 /* Implement targetm.vectorize.add_stmt_cost. */
51133 static unsigned
51134 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
51135 struct _stmt_vec_info *stmt_info, int misalign,
51136 enum vect_cost_model_location where)
51138 unsigned *cost = (unsigned *) data;
51139 unsigned retval = 0;
51141 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
51142 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
51144 /* Statements in an inner loop relative to the loop being
51145 vectorized are weighted more heavily. The value here is
51146 arbitrary and could potentially be improved with analysis. */
51147 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
51148 count *= 50; /* FIXME. */
51150 retval = (unsigned) (count * stmt_cost);
51152 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
51153 for Silvermont as it has out of order integer pipeline and can execute
51154 2 scalar instruction per tick, but has in order SIMD pipeline. */
51155 if (TARGET_SILVERMONT || TARGET_INTEL)
51156 if (stmt_info && stmt_info->stmt)
51158 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
51159 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
51160 retval = (retval * 17) / 10;
51163 cost[where] += retval;
51165 return retval;
51168 /* Implement targetm.vectorize.finish_cost. */
51170 static void
51171 ix86_finish_cost (void *data, unsigned *prologue_cost,
51172 unsigned *body_cost, unsigned *epilogue_cost)
51174 unsigned *cost = (unsigned *) data;
51175 *prologue_cost = cost[vect_prologue];
51176 *body_cost = cost[vect_body];
51177 *epilogue_cost = cost[vect_epilogue];
51180 /* Implement targetm.vectorize.destroy_cost_data. */
51182 static void
51183 ix86_destroy_cost_data (void *data)
51185 free (data);
51188 /* Validate target specific memory model bits in VAL. */
51190 static unsigned HOST_WIDE_INT
51191 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
51193 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
51194 bool strong;
51196 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
51197 |MEMMODEL_MASK)
51198 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
51200 warning (OPT_Winvalid_memory_model,
51201 "Unknown architecture specific memory model");
51202 return MEMMODEL_SEQ_CST;
51204 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
51205 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
51207 warning (OPT_Winvalid_memory_model,
51208 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
51209 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
51211 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
51213 warning (OPT_Winvalid_memory_model,
51214 "HLE_RELEASE not used with RELEASE or stronger memory model");
51215 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
51217 return val;
51220 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
51221 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
51222 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
51223 or number of vecsize_mangle variants that should be emitted. */
51225 static int
51226 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
51227 struct cgraph_simd_clone *clonei,
51228 tree base_type, int num)
51230 int ret = 1;
51232 if (clonei->simdlen
51233 && (clonei->simdlen < 2
51234 || clonei->simdlen > 16
51235 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
51237 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51238 "unsupported simdlen %d", clonei->simdlen);
51239 return 0;
51242 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
51243 if (TREE_CODE (ret_type) != VOID_TYPE)
51244 switch (TYPE_MODE (ret_type))
51246 case QImode:
51247 case HImode:
51248 case SImode:
51249 case DImode:
51250 case SFmode:
51251 case DFmode:
51252 /* case SCmode: */
51253 /* case DCmode: */
51254 break;
51255 default:
51256 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51257 "unsupported return type %qT for simd\n", ret_type);
51258 return 0;
51261 tree t;
51262 int i;
51264 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
51265 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
51266 switch (TYPE_MODE (TREE_TYPE (t)))
51268 case QImode:
51269 case HImode:
51270 case SImode:
51271 case DImode:
51272 case SFmode:
51273 case DFmode:
51274 /* case SCmode: */
51275 /* case DCmode: */
51276 break;
51277 default:
51278 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51279 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
51280 return 0;
51283 if (clonei->cilk_elemental)
51285 /* Parse here processor clause. If not present, default to 'b'. */
51286 clonei->vecsize_mangle = 'b';
51288 else if (!TREE_PUBLIC (node->decl))
51290 /* If the function isn't exported, we can pick up just one ISA
51291 for the clones. */
51292 if (TARGET_AVX2)
51293 clonei->vecsize_mangle = 'd';
51294 else if (TARGET_AVX)
51295 clonei->vecsize_mangle = 'c';
51296 else
51297 clonei->vecsize_mangle = 'b';
51298 ret = 1;
51300 else
51302 clonei->vecsize_mangle = "bcd"[num];
51303 ret = 3;
51305 switch (clonei->vecsize_mangle)
51307 case 'b':
51308 clonei->vecsize_int = 128;
51309 clonei->vecsize_float = 128;
51310 break;
51311 case 'c':
51312 clonei->vecsize_int = 128;
51313 clonei->vecsize_float = 256;
51314 break;
51315 case 'd':
51316 clonei->vecsize_int = 256;
51317 clonei->vecsize_float = 256;
51318 break;
51320 if (clonei->simdlen == 0)
51322 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
51323 clonei->simdlen = clonei->vecsize_int;
51324 else
51325 clonei->simdlen = clonei->vecsize_float;
51326 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
51327 if (clonei->simdlen > 16)
51328 clonei->simdlen = 16;
51330 return ret;
51333 /* Add target attribute to SIMD clone NODE if needed. */
51335 static void
51336 ix86_simd_clone_adjust (struct cgraph_node *node)
51338 const char *str = NULL;
51339 gcc_assert (node->decl == cfun->decl);
51340 switch (node->simdclone->vecsize_mangle)
51342 case 'b':
51343 if (!TARGET_SSE2)
51344 str = "sse2";
51345 break;
51346 case 'c':
51347 if (!TARGET_AVX)
51348 str = "avx";
51349 break;
51350 case 'd':
51351 if (!TARGET_AVX2)
51352 str = "avx2";
51353 break;
51354 default:
51355 gcc_unreachable ();
51357 if (str == NULL)
51358 return;
51359 push_cfun (NULL);
51360 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
51361 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
51362 gcc_assert (ok);
51363 pop_cfun ();
51364 ix86_reset_previous_fndecl ();
51365 ix86_set_current_function (node->decl);
51368 /* If SIMD clone NODE can't be used in a vectorized loop
51369 in current function, return -1, otherwise return a badness of using it
51370 (0 if it is most desirable from vecsize_mangle point of view, 1
51371 slightly less desirable, etc.). */
51373 static int
51374 ix86_simd_clone_usable (struct cgraph_node *node)
51376 switch (node->simdclone->vecsize_mangle)
51378 case 'b':
51379 if (!TARGET_SSE2)
51380 return -1;
51381 if (!TARGET_AVX)
51382 return 0;
51383 return TARGET_AVX2 ? 2 : 1;
51384 case 'c':
51385 if (!TARGET_AVX)
51386 return -1;
51387 return TARGET_AVX2 ? 1 : 0;
51388 break;
51389 case 'd':
51390 if (!TARGET_AVX2)
51391 return -1;
51392 return 0;
51393 default:
51394 gcc_unreachable ();
51398 /* This function adjusts the unroll factor based on
51399 the hardware capabilities. For ex, bdver3 has
51400 a loop buffer which makes unrolling of smaller
51401 loops less important. This function decides the
51402 unroll factor using number of memory references
51403 (value 32 is used) as a heuristic. */
51405 static unsigned
51406 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
51408 basic_block *bbs;
51409 rtx_insn *insn;
51410 unsigned i;
51411 unsigned mem_count = 0;
51413 if (!TARGET_ADJUST_UNROLL)
51414 return nunroll;
51416 /* Count the number of memory references within the loop body.
51417 This value determines the unrolling factor for bdver3 and bdver4
51418 architectures. */
51419 subrtx_iterator::array_type array;
51420 bbs = get_loop_body (loop);
51421 for (i = 0; i < loop->num_nodes; i++)
51422 FOR_BB_INSNS (bbs[i], insn)
51423 if (NONDEBUG_INSN_P (insn))
51424 FOR_EACH_SUBRTX (iter, array, insn, NONCONST)
51425 if (const_rtx x = *iter)
51426 if (MEM_P (x))
51428 machine_mode mode = GET_MODE (x);
51429 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
51430 if (n_words > 4)
51431 mem_count += 2;
51432 else
51433 mem_count += 1;
51435 free (bbs);
51437 if (mem_count && mem_count <=32)
51438 return 32/mem_count;
51440 return nunroll;
51444 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
51446 static bool
51447 ix86_float_exceptions_rounding_supported_p (void)
51449 /* For x87 floating point with standard excess precision handling,
51450 there is no adddf3 pattern (since x87 floating point only has
51451 XFmode operations) so the default hook implementation gets this
51452 wrong. */
51453 return TARGET_80387 || TARGET_SSE_MATH;
51456 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
51458 static void
51459 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
51461 if (!TARGET_80387 && !TARGET_SSE_MATH)
51462 return;
51463 tree exceptions_var = create_tmp_var (integer_type_node);
51464 if (TARGET_80387)
51466 tree fenv_index_type = build_index_type (size_int (6));
51467 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
51468 tree fenv_var = create_tmp_var (fenv_type);
51469 mark_addressable (fenv_var);
51470 tree fenv_ptr = build_pointer_type (fenv_type);
51471 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
51472 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
51473 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
51474 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
51475 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
51476 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
51477 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
51478 tree hold_fnclex = build_call_expr (fnclex, 0);
51479 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
51480 hold_fnclex);
51481 *clear = build_call_expr (fnclex, 0);
51482 tree sw_var = create_tmp_var (short_unsigned_type_node);
51483 tree fnstsw_call = build_call_expr (fnstsw, 0);
51484 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
51485 sw_var, fnstsw_call);
51486 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
51487 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
51488 exceptions_var, exceptions_x87);
51489 *update = build2 (COMPOUND_EXPR, integer_type_node,
51490 sw_mod, update_mod);
51491 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
51492 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
51494 if (TARGET_SSE_MATH)
51496 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node);
51497 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node);
51498 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
51499 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
51500 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
51501 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
51502 mxcsr_orig_var, stmxcsr_hold_call);
51503 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
51504 mxcsr_orig_var,
51505 build_int_cst (unsigned_type_node, 0x1f80));
51506 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
51507 build_int_cst (unsigned_type_node, 0xffffffc0));
51508 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
51509 mxcsr_mod_var, hold_mod_val);
51510 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51511 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
51512 hold_assign_orig, hold_assign_mod);
51513 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
51514 ldmxcsr_hold_call);
51515 if (*hold)
51516 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
51517 else
51518 *hold = hold_all;
51519 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51520 if (*clear)
51521 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
51522 ldmxcsr_clear_call);
51523 else
51524 *clear = ldmxcsr_clear_call;
51525 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
51526 tree exceptions_sse = fold_convert (integer_type_node,
51527 stxmcsr_update_call);
51528 if (*update)
51530 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
51531 exceptions_var, exceptions_sse);
51532 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
51533 exceptions_var, exceptions_mod);
51534 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
51535 exceptions_assign);
51537 else
51538 *update = build2 (MODIFY_EXPR, integer_type_node,
51539 exceptions_var, exceptions_sse);
51540 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
51541 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51542 ldmxcsr_update_call);
51544 tree atomic_feraiseexcept
51545 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
51546 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
51547 1, exceptions_var);
51548 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51549 atomic_feraiseexcept_call);
51552 /* Return mode to be used for bounds or VOIDmode
51553 if bounds are not supported. */
51555 static enum machine_mode
51556 ix86_mpx_bound_mode ()
51558 /* Do not support pointer checker if MPX
51559 is not enabled. */
51560 if (!TARGET_MPX)
51562 if (flag_check_pointer_bounds)
51563 warning (0, "Pointer Checker requires MPX support on this target."
51564 " Use -mmpx options to enable MPX.");
51565 return VOIDmode;
51568 return BNDmode;
51571 /* Return constant used to statically initialize constant bounds.
51573 This function is used to create special bound values. For now
51574 only INIT bounds and NONE bounds are expected. More special
51575 values may be added later. */
51577 static tree
51578 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
51580 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
51581 : build_zero_cst (pointer_sized_int_node);
51582 tree high = ub ? build_zero_cst (pointer_sized_int_node)
51583 : build_minus_one_cst (pointer_sized_int_node);
51585 /* This function is supposed to be used to create INIT and
51586 NONE bounds only. */
51587 gcc_assert ((lb == 0 && ub == -1)
51588 || (lb == -1 && ub == 0));
51590 return build_complex (NULL, low, high);
51593 /* Generate a list of statements STMTS to initialize pointer bounds
51594 variable VAR with bounds LB and UB. Return the number of generated
51595 statements. */
51597 static int
51598 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
51600 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
51601 tree lhs, modify, var_p;
51603 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
51604 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
51606 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
51607 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
51608 append_to_statement_list (modify, stmts);
51610 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
51611 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
51612 TYPE_SIZE_UNIT (pointer_sized_int_node)));
51613 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
51614 append_to_statement_list (modify, stmts);
51616 return 2;
51619 /* Initialize the GCC target structure. */
51620 #undef TARGET_RETURN_IN_MEMORY
51621 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
51623 #undef TARGET_LEGITIMIZE_ADDRESS
51624 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
51626 #undef TARGET_ATTRIBUTE_TABLE
51627 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
51628 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
51629 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
51630 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51631 # undef TARGET_MERGE_DECL_ATTRIBUTES
51632 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
51633 #endif
51635 #undef TARGET_COMP_TYPE_ATTRIBUTES
51636 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
51638 #undef TARGET_INIT_BUILTINS
51639 #define TARGET_INIT_BUILTINS ix86_init_builtins
51640 #undef TARGET_BUILTIN_DECL
51641 #define TARGET_BUILTIN_DECL ix86_builtin_decl
51642 #undef TARGET_EXPAND_BUILTIN
51643 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
51645 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
51646 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
51647 ix86_builtin_vectorized_function
51649 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
51650 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
51652 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
51653 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
51655 #undef TARGET_VECTORIZE_BUILTIN_GATHER
51656 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
51658 #undef TARGET_BUILTIN_RECIPROCAL
51659 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
51661 #undef TARGET_ASM_FUNCTION_EPILOGUE
51662 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
51664 #undef TARGET_ENCODE_SECTION_INFO
51665 #ifndef SUBTARGET_ENCODE_SECTION_INFO
51666 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
51667 #else
51668 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
51669 #endif
51671 #undef TARGET_ASM_OPEN_PAREN
51672 #define TARGET_ASM_OPEN_PAREN ""
51673 #undef TARGET_ASM_CLOSE_PAREN
51674 #define TARGET_ASM_CLOSE_PAREN ""
51676 #undef TARGET_ASM_BYTE_OP
51677 #define TARGET_ASM_BYTE_OP ASM_BYTE
51679 #undef TARGET_ASM_ALIGNED_HI_OP
51680 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
51681 #undef TARGET_ASM_ALIGNED_SI_OP
51682 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
51683 #ifdef ASM_QUAD
51684 #undef TARGET_ASM_ALIGNED_DI_OP
51685 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
51686 #endif
51688 #undef TARGET_PROFILE_BEFORE_PROLOGUE
51689 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
51691 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
51692 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
51694 #undef TARGET_ASM_UNALIGNED_HI_OP
51695 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
51696 #undef TARGET_ASM_UNALIGNED_SI_OP
51697 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
51698 #undef TARGET_ASM_UNALIGNED_DI_OP
51699 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
51701 #undef TARGET_PRINT_OPERAND
51702 #define TARGET_PRINT_OPERAND ix86_print_operand
51703 #undef TARGET_PRINT_OPERAND_ADDRESS
51704 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
51705 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
51706 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
51707 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
51708 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
51710 #undef TARGET_SCHED_INIT_GLOBAL
51711 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
51712 #undef TARGET_SCHED_ADJUST_COST
51713 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
51714 #undef TARGET_SCHED_ISSUE_RATE
51715 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
51716 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
51717 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
51718 ia32_multipass_dfa_lookahead
51719 #undef TARGET_SCHED_MACRO_FUSION_P
51720 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
51721 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
51722 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
51724 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
51725 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
51727 #undef TARGET_MEMMODEL_CHECK
51728 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
51730 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
51731 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
51733 #ifdef HAVE_AS_TLS
51734 #undef TARGET_HAVE_TLS
51735 #define TARGET_HAVE_TLS true
51736 #endif
51737 #undef TARGET_CANNOT_FORCE_CONST_MEM
51738 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
51739 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
51740 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
51742 #undef TARGET_DELEGITIMIZE_ADDRESS
51743 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
51745 #undef TARGET_MS_BITFIELD_LAYOUT_P
51746 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
51748 #if TARGET_MACHO
51749 #undef TARGET_BINDS_LOCAL_P
51750 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
51751 #endif
51752 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51753 #undef TARGET_BINDS_LOCAL_P
51754 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
51755 #endif
51757 #undef TARGET_ASM_OUTPUT_MI_THUNK
51758 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
51759 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
51760 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
51762 #undef TARGET_ASM_FILE_START
51763 #define TARGET_ASM_FILE_START x86_file_start
51765 #undef TARGET_OPTION_OVERRIDE
51766 #define TARGET_OPTION_OVERRIDE ix86_option_override
51768 #undef TARGET_REGISTER_MOVE_COST
51769 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
51770 #undef TARGET_MEMORY_MOVE_COST
51771 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
51772 #undef TARGET_RTX_COSTS
51773 #define TARGET_RTX_COSTS ix86_rtx_costs
51774 #undef TARGET_ADDRESS_COST
51775 #define TARGET_ADDRESS_COST ix86_address_cost
51777 #undef TARGET_FIXED_CONDITION_CODE_REGS
51778 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
51779 #undef TARGET_CC_MODES_COMPATIBLE
51780 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
51782 #undef TARGET_MACHINE_DEPENDENT_REORG
51783 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
51785 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
51786 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
51788 #undef TARGET_BUILD_BUILTIN_VA_LIST
51789 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
51791 #undef TARGET_FOLD_BUILTIN
51792 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
51794 #undef TARGET_COMPARE_VERSION_PRIORITY
51795 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
51797 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
51798 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
51799 ix86_generate_version_dispatcher_body
51801 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
51802 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
51803 ix86_get_function_versions_dispatcher
51805 #undef TARGET_ENUM_VA_LIST_P
51806 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
51808 #undef TARGET_FN_ABI_VA_LIST
51809 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
51811 #undef TARGET_CANONICAL_VA_LIST_TYPE
51812 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
51814 #undef TARGET_EXPAND_BUILTIN_VA_START
51815 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
51817 #undef TARGET_MD_ASM_CLOBBERS
51818 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
51820 #undef TARGET_PROMOTE_PROTOTYPES
51821 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
51822 #undef TARGET_SETUP_INCOMING_VARARGS
51823 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
51824 #undef TARGET_MUST_PASS_IN_STACK
51825 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
51826 #undef TARGET_FUNCTION_ARG_ADVANCE
51827 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
51828 #undef TARGET_FUNCTION_ARG
51829 #define TARGET_FUNCTION_ARG ix86_function_arg
51830 #undef TARGET_INIT_PIC_REG
51831 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
51832 #undef TARGET_USE_PSEUDO_PIC_REG
51833 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
51834 #undef TARGET_FUNCTION_ARG_BOUNDARY
51835 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
51836 #undef TARGET_PASS_BY_REFERENCE
51837 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
51838 #undef TARGET_INTERNAL_ARG_POINTER
51839 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
51840 #undef TARGET_UPDATE_STACK_BOUNDARY
51841 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
51842 #undef TARGET_GET_DRAP_RTX
51843 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
51844 #undef TARGET_STRICT_ARGUMENT_NAMING
51845 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
51846 #undef TARGET_STATIC_CHAIN
51847 #define TARGET_STATIC_CHAIN ix86_static_chain
51848 #undef TARGET_TRAMPOLINE_INIT
51849 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
51850 #undef TARGET_RETURN_POPS_ARGS
51851 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
51853 #undef TARGET_LEGITIMATE_COMBINED_INSN
51854 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
51856 #undef TARGET_ASAN_SHADOW_OFFSET
51857 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
51859 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
51860 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
51862 #undef TARGET_SCALAR_MODE_SUPPORTED_P
51863 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
51865 #undef TARGET_VECTOR_MODE_SUPPORTED_P
51866 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
51868 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
51869 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
51870 ix86_libgcc_floating_mode_supported_p
51872 #undef TARGET_C_MODE_FOR_SUFFIX
51873 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
51875 #ifdef HAVE_AS_TLS
51876 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
51877 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
51878 #endif
51880 #ifdef SUBTARGET_INSERT_ATTRIBUTES
51881 #undef TARGET_INSERT_ATTRIBUTES
51882 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
51883 #endif
51885 #undef TARGET_MANGLE_TYPE
51886 #define TARGET_MANGLE_TYPE ix86_mangle_type
51888 #if !TARGET_MACHO
51889 #undef TARGET_STACK_PROTECT_FAIL
51890 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
51891 #endif
51893 #undef TARGET_FUNCTION_VALUE
51894 #define TARGET_FUNCTION_VALUE ix86_function_value
51896 #undef TARGET_FUNCTION_VALUE_REGNO_P
51897 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
51899 #undef TARGET_PROMOTE_FUNCTION_MODE
51900 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
51902 #undef TARGET_MEMBER_TYPE_FORCES_BLK
51903 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
51905 #undef TARGET_INSTANTIATE_DECLS
51906 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
51908 #undef TARGET_SECONDARY_RELOAD
51909 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
51911 #undef TARGET_CLASS_MAX_NREGS
51912 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
51914 #undef TARGET_PREFERRED_RELOAD_CLASS
51915 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
51916 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
51917 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
51918 #undef TARGET_CLASS_LIKELY_SPILLED_P
51919 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
51921 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
51922 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
51923 ix86_builtin_vectorization_cost
51924 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
51925 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
51926 ix86_vectorize_vec_perm_const_ok
51927 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
51928 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
51929 ix86_preferred_simd_mode
51930 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
51931 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
51932 ix86_autovectorize_vector_sizes
51933 #undef TARGET_VECTORIZE_INIT_COST
51934 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
51935 #undef TARGET_VECTORIZE_ADD_STMT_COST
51936 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
51937 #undef TARGET_VECTORIZE_FINISH_COST
51938 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
51939 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
51940 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
51942 #undef TARGET_SET_CURRENT_FUNCTION
51943 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
51945 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
51946 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
51948 #undef TARGET_OPTION_SAVE
51949 #define TARGET_OPTION_SAVE ix86_function_specific_save
51951 #undef TARGET_OPTION_RESTORE
51952 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
51954 #undef TARGET_OPTION_PRINT
51955 #define TARGET_OPTION_PRINT ix86_function_specific_print
51957 #undef TARGET_OPTION_FUNCTION_VERSIONS
51958 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
51960 #undef TARGET_CAN_INLINE_P
51961 #define TARGET_CAN_INLINE_P ix86_can_inline_p
51963 #undef TARGET_EXPAND_TO_RTL_HOOK
51964 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
51966 #undef TARGET_LEGITIMATE_ADDRESS_P
51967 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
51969 #undef TARGET_LRA_P
51970 #define TARGET_LRA_P hook_bool_void_true
51972 #undef TARGET_REGISTER_PRIORITY
51973 #define TARGET_REGISTER_PRIORITY ix86_register_priority
51975 #undef TARGET_REGISTER_USAGE_LEVELING_P
51976 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
51978 #undef TARGET_LEGITIMATE_CONSTANT_P
51979 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
51981 #undef TARGET_FRAME_POINTER_REQUIRED
51982 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
51984 #undef TARGET_CAN_ELIMINATE
51985 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
51987 #undef TARGET_EXTRA_LIVE_ON_ENTRY
51988 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
51990 #undef TARGET_ASM_CODE_END
51991 #define TARGET_ASM_CODE_END ix86_code_end
51993 #undef TARGET_CONDITIONAL_REGISTER_USAGE
51994 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
51996 #if TARGET_MACHO
51997 #undef TARGET_INIT_LIBFUNCS
51998 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
51999 #endif
52001 #undef TARGET_LOOP_UNROLL_ADJUST
52002 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
52004 #undef TARGET_SPILL_CLASS
52005 #define TARGET_SPILL_CLASS ix86_spill_class
52007 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
52008 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
52009 ix86_simd_clone_compute_vecsize_and_simdlen
52011 #undef TARGET_SIMD_CLONE_ADJUST
52012 #define TARGET_SIMD_CLONE_ADJUST \
52013 ix86_simd_clone_adjust
52015 #undef TARGET_SIMD_CLONE_USABLE
52016 #define TARGET_SIMD_CLONE_USABLE \
52017 ix86_simd_clone_usable
52019 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
52020 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
52021 ix86_float_exceptions_rounding_supported_p
52023 #undef TARGET_MODE_EMIT
52024 #define TARGET_MODE_EMIT ix86_emit_mode_set
52026 #undef TARGET_MODE_NEEDED
52027 #define TARGET_MODE_NEEDED ix86_mode_needed
52029 #undef TARGET_MODE_AFTER
52030 #define TARGET_MODE_AFTER ix86_mode_after
52032 #undef TARGET_MODE_ENTRY
52033 #define TARGET_MODE_ENTRY ix86_mode_entry
52035 #undef TARGET_MODE_EXIT
52036 #define TARGET_MODE_EXIT ix86_mode_exit
52038 #undef TARGET_MODE_PRIORITY
52039 #define TARGET_MODE_PRIORITY ix86_mode_priority
52041 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
52042 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
52044 #undef TARGET_LOAD_BOUNDS_FOR_ARG
52045 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
52047 #undef TARGET_STORE_BOUNDS_FOR_ARG
52048 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
52050 #undef TARGET_LOAD_RETURNED_BOUNDS
52051 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
52053 #undef TARGET_STORE_RETURNED_BOUNDS
52054 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
52056 #undef TARGET_CHKP_BOUND_MODE
52057 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
52059 #undef TARGET_BUILTIN_CHKP_FUNCTION
52060 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
52062 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
52063 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
52065 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
52066 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
52068 #undef TARGET_CHKP_INITIALIZE_BOUNDS
52069 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
52071 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
52072 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
52074 #undef TARGET_OFFLOAD_OPTIONS
52075 #define TARGET_OFFLOAD_OPTIONS \
52076 ix86_offload_options
52078 struct gcc_target targetm = TARGET_INITIALIZER;
52080 #include "gt-i386.h"